Package tdi :: Package tools :: Module rjsmin
[frames] | no frames]

Source Code for Module tdi.tools.rjsmin

  1  #!/usr/bin/env python 
  2  # -*- coding: ascii -*- 
  3  r""" 
  4  ===================== 
  5   Javascript Minifier 
  6  ===================== 
  7   
  8  rJSmin is a javascript minifier written in python. 
  9   
 10  The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\\. 
 11   
 12  :Copyright: 
 13   
 14   Copyright 2011 - 2014 
 15   Andr\xe9 Malo or his licensors, as applicable 
 16   
 17  :License: 
 18   
 19   Licensed under the Apache License, Version 2.0 (the "License"); 
 20   you may not use this file except in compliance with the License. 
 21   You may obtain a copy of the License at 
 22   
 23       http://www.apache.org/licenses/LICENSE-2.0 
 24   
 25   Unless required by applicable law or agreed to in writing, software 
 26   distributed under the License is distributed on an "AS IS" BASIS, 
 27   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 28   See the License for the specific language governing permissions and 
 29   limitations under the License. 
 30   
 31  The module is a re-implementation aiming for speed, so it can be used at 
 32  runtime (rather than during a preprocessing step). Usually it produces the 
 33  same results as the original ``jsmin.c``. It differs in the following ways: 
 34   
 35  - there is no error detection: unterminated string, regex and comment 
 36    literals are treated as regular javascript code and minified as such. 
 37  - Control characters inside string and regex literals are left untouched; they 
 38    are not converted to spaces (nor to \\n) 
 39  - Newline characters are not allowed inside string and regex literals, except 
 40    for line continuations in string literals (ECMA-5). 
 41  - "return /regex/" is recognized correctly. 
 42  - "+ +" and "- -" sequences are not collapsed to '++' or '--' 
 43  - Newlines before ! operators are removed more sensibly 
 44  - Comments starting with an exclamation mark (``!``) can be kept optionally 
 45  - rJSmin does not handle streams, but only complete strings. (However, the 
 46    module provides a "streamy" interface). 
 47   
 48  Since most parts of the logic are handled by the regex engine it's way 
 49  faster than the original python port of ``jsmin.c`` by Baruch Even. The speed 
 50  factor varies between about 6 and 55 depending on input and python version 
 51  (it gets faster the more compressed the input already is). Compared to the 
 52  speed-refactored python port by Dave St.Germain the performance gain is less 
 53  dramatic but still between 1.2 and 7. See the docs/BENCHMARKS file for 
 54  details. 
 55   
 56  rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more. 
 57   
 58  Both python 2 and python 3 are supported. 
 59   
 60  .. _jsmin.c by Douglas Crockford: 
 61     http://www.crockford.com/javascript/jsmin.c 
 62  """ 
 63  if __doc__: 
 64      # pylint: disable = W0622 
 65      __doc__ = __doc__.encode('ascii').decode('unicode_escape') 
 66  __author__ = r"Andr\xe9 Malo".encode('ascii').decode('unicode_escape') 
 67  __docformat__ = "restructuredtext en" 
 68  __license__ = "Apache License, Version 2.0" 
 69  __version__ = '1.0.9' 
 70  __all__ = ['jsmin'] 
 71   
 72  import re as _re 
 73   
 74   
75 -def _make_jsmin(python_only=False):
76 """ 77 Generate JS minifier based on `jsmin.c by Douglas Crockford`_ 78 79 .. _jsmin.c by Douglas Crockford: 80 http://www.crockford.com/javascript/jsmin.c 81 82 :Parameters: 83 `python_only` : ``bool`` 84 Use only the python variant. If true, the c extension is not even 85 tried to be loaded. (tdi.c._tdi_rjsmin) 86 87 :Return: Minifier 88 :Rtype: ``callable`` 89 """ 90 # pylint: disable = R0912, R0914, W0612 91 if not python_only: 92 from tdi import c 93 rjsmin = c.load('rjsmin') 94 if rjsmin is not None: 95 return rjsmin.jsmin 96 try: 97 xrange 98 except NameError: 99 xrange = range # pylint: disable = W0622 100 101 space_chars = r'[\000-\011\013\014\016-\040]' 102 103 line_comment = r'(?://[^\r\n]*)' 104 space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' 105 space_comment_nobang = r'(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)' 106 bang_comment = r'(?:/\*![^*]*\*+(?:[^/*][^*]*\*+)*/)' 107 108 string1 = \ 109 r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)' 110 string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")' 111 strings = r'(?:%s|%s)' % (string1, string2) 112 113 charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])' 114 nospecial = r'[^/\\\[\r\n]' 115 regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % ( 116 nospecial, charclass, nospecial 117 ) 118 space = r'(?:%s|%s)' % (space_chars, space_comment) 119 space_nobang = r'(?:%s|%s)' % (space_chars, space_comment_nobang) 120 newline = r'(?:%s?[\r\n])' % line_comment 121 122 def fix_charclass(result): 123 """ Fixup string of chars to fit into a regex char class """ 124 pos = result.find('-') 125 if pos >= 0: 126 result = r'%s%s-' % (result[:pos], result[pos + 1:]) 127 128 def sequentize(string): 129 """ 130 Notate consecutive characters as sequence 131 132 (1-4 instead of 1234) 133 """ 134 first, last, result = None, None, [] 135 for char in map(ord, string): 136 if last is None: 137 first = last = char 138 elif last + 1 == char: 139 last = char 140 else: 141 result.append((first, last)) 142 first = last = char 143 if last is not None: 144 result.append((first, last)) 145 return ''.join(['%s%s%s' % ( 146 chr(first), 147 last > first + 1 and '-' or '', 148 last != first and chr(last) or '' 149 ) for first, last in result])
150 151 return _re.sub(r'([\000-\040\047])', # for better portability 152 lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result) 153 .replace('\\', '\\\\') 154 .replace('[', '\\[') 155 .replace(']', '\\]') 156 ) 157 ) 158 159 def id_literal_(what): 160 """ Make id_literal like char class """ 161 match = _re.compile(what).match 162 result = ''.join([ 163 chr(c) for c in xrange(127) if not match(chr(c)) 164 ]) 165 return '[^%s]' % fix_charclass(result) 166 167 def not_id_literal_(keep): 168 """ Make negated id_literal like char class """ 169 match = _re.compile(id_literal_(keep)).match 170 result = ''.join([ 171 chr(c) for c in xrange(127) if not match(chr(c)) 172 ]) 173 return r'[%s]' % fix_charclass(result) 174 175 not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]') 176 preregex1 = r'[(,=:\[!&|?{};\r\n]' 177 preregex2 = r'%(not_id_literal)sreturn' % locals() 178 179 id_literal = id_literal_(r'[a-zA-Z0-9_$]') 180 id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]') 181 id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]') 182 183 dull = r'[^\047"/\000-\040]' 184 185 space_sub_simple = _re.compile(( 186 r'(%(dull)s+)' 187 r'|(%(strings)s%(dull)s*)' 188 r'|(?<=%(preregex1)s)' 189 r'%(space)s*(?:%(newline)s%(space)s*)*' 190 r'(%(regex)s%(dull)s*)' 191 r'|(?<=%(preregex2)s)' 192 r'%(space)s*(?:%(newline)s%(space)s)*' 193 r'(%(regex)s%(dull)s*)' 194 r'|(?<=%(id_literal_close)s)' 195 r'%(space)s*(?:(%(newline)s)%(space)s*)+' 196 r'(?=%(id_literal_open)s)' 197 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)' 198 r'|(?<=\+)(%(space)s)+(?=\+)' 199 r'|(?<=-)(%(space)s)+(?=-)' 200 r'|%(space)s+' 201 r'|(?:%(newline)s%(space)s*)+' 202 ) % locals()).sub 203 #print space_sub_simple.__self__.pattern 204 205 def space_subber_simple(match): 206 """ Substitution callback """ 207 # pylint: disable = C0321, R0911 208 groups = match.groups() 209 if groups[0]: return groups[0] 210 elif groups[1]: return groups[1] 211 elif groups[2]: return groups[2] 212 elif groups[3]: return groups[3] 213 elif groups[4]: return '\n' 214 elif groups[5] or groups[6] or groups[7]: return ' ' 215 else: return '' 216 217 space_sub_banged = _re.compile(( 218 r'(%(dull)s+)' 219 r'|(%(strings)s%(dull)s*)' 220 r'|(%(bang_comment)s%(dull)s*)' 221 r'|(?<=%(preregex1)s)' 222 r'%(space)s*(?:%(newline)s%(space)s*)*' 223 r'(%(regex)s%(dull)s*)' 224 r'|(?<=%(preregex2)s)' 225 r'%(space)s*(?:%(newline)s%(space)s)*' 226 r'(%(regex)s%(dull)s*)' 227 r'|(?<=%(id_literal_close)s)' 228 r'%(space)s*(?:(%(newline)s)%(space)s*)+' 229 r'(?=%(id_literal_open)s)' 230 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)' 231 r'|(?<=\+)(%(space)s)+(?=\+)' 232 r'|(?<=-)(%(space)s)+(?=-)' 233 r'|%(space)s+' 234 r'|(?:%(newline)s%(space)s*)+' 235 ) % dict(locals(), space=space_nobang)).sub 236 #print space_sub_banged.__self__.pattern 237 238 def space_subber_banged(match): 239 """ Substitution callback """ 240 # pylint: disable = C0321, R0911 241 groups = match.groups() 242 if groups[0]: return groups[0] 243 elif groups[1]: return groups[1] 244 elif groups[2]: return groups[2] 245 elif groups[3]: return groups[3] 246 elif groups[4]: return groups[4] 247 elif groups[5]: return '\n' 248 elif groups[6] or groups[7] or groups[8]: return ' ' 249 else: return '' 250 251 def jsmin(script, keep_bang_comments=False): # pylint: disable = W0621 252 r""" 253 Minify javascript based on `jsmin.c by Douglas Crockford`_\. 254 255 Instead of parsing the stream char by char, it uses a regular 256 expression approach which minifies the whole script with one big 257 substitution regex. 258 259 .. _jsmin.c by Douglas Crockford: 260 http://www.crockford.com/javascript/jsmin.c 261 262 :Parameters: 263 `script` : ``str`` 264 Script to minify 265 266 `keep_bang_comments` : ``bool`` 267 Keep comments starting with an exclamation mark? (``/*!...*/``) 268 269 :Return: Minified script 270 :Rtype: ``str`` 271 """ 272 if keep_bang_comments: 273 return space_sub_banged( 274 space_subber_banged, '\n%s\n' % script 275 ).strip() 276 else: 277 return space_sub_simple( 278 space_subber_simple, '\n%s\n' % script 279 ).strip() 280 281 return jsmin 282 283 jsmin = _make_jsmin() 284 285
286 -def jsmin_for_posers(script, keep_bang_comments=False):
287 r""" 288 Minify javascript based on `jsmin.c by Douglas Crockford`_\. 289 290 Instead of parsing the stream char by char, it uses a regular 291 expression approach which minifies the whole script with one big 292 substitution regex. 293 294 .. _jsmin.c by Douglas Crockford: 295 http://www.crockford.com/javascript/jsmin.c 296 297 :Warning: This function is the digest of a _make_jsmin() call. It just 298 utilizes the resulting regexes. It's here for fun and may 299 vanish any time. Use the `jsmin` function instead. 300 301 :Parameters: 302 `script` : ``str`` 303 Script to minify 304 305 `keep_bang_comments` : ``bool`` 306 Keep comments starting with an exclamation mark? (``/*!...*/``) 307 308 :Return: Minified script 309 :Rtype: ``str`` 310 """ 311 if not keep_bang_comments: 312 rex = ( 313 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]' 314 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]' 315 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?' 316 r'{};\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*' 317 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0' 318 r'14\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r' 319 r'\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r' 320 r'\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<' 321 r'=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\016-\04' 322 r'0]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[' 323 r'\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^' 324 r'*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:' 325 r'\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[' 326 r'^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])(?:[\000' 327 r'-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?' 328 r':((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]|(?' 329 r':/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,.' 330 r'/:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\0' 331 r'13\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\00' 332 r'0-#%-,./:-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]' 333 r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-' 334 r'\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?' 335 r'=-)|(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]' 336 r'*\*+)*/))+|(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\0' 337 r'16-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+' 338 ) 339 def subber(match): 340 """ Substitution callback """ 341 groups = match.groups() 342 return ( 343 groups[0] or 344 groups[1] or 345 groups[2] or 346 groups[3] or 347 (groups[4] and '\n') or 348 (groups[5] and ' ') or 349 (groups[6] and ' ') or 350 (groups[7] and ' ') or 351 '' 352 )
353 else: 354 rex = ( 355 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]' 356 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]' 357 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|((?:/\*![^*]*\*' 358 r'+(?:[^/*][^*]*\*+)*/)[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?{};\r' 359 r'\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*' 360 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0' 361 r'14\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(' 362 r'?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:' 363 r'\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]' 364 r'*)|(?<=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\0' 365 r'16-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[' 366 r'^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*' 367 r']*\*+(?:[^/*][^*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(' 368 r'?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/' 369 r'\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\' 370 r'^`{|~])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[' 371 r'^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011' 372 r'\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+' 373 r'(?=[^\000-\040"#%-\047)*,./:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@' 374 r'\[-^`{-~-])((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*' 375 r'+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./:-@\[-^`{-~-])|(?<=\+)' 376 r'((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^' 377 r'*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013\014\016-\040]|(?:' 378 r'/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:[\000-\011\013' 379 r'\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))+|(?:(?' 380 r':(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(' 381 r'?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+' 382 ) 383 def subber(match): 384 """ Substitution callback """ 385 groups = match.groups() 386 return ( 387 groups[0] or 388 groups[1] or 389 groups[2] or 390 groups[3] or 391 groups[4] or 392 (groups[5] and '\n') or 393 (groups[6] and ' ') or 394 (groups[7] and ' ') or 395 (groups[8] and ' ') or 396 '' 397 ) 398 399 return _re.sub(rex, subber, '\n%s\n' % script).strip() 400 401 402 if __name__ == '__main__':
403 - def main():
404 """ Main """ 405 import sys as _sys 406 keep_bang_comments = ( 407 '-b' in _sys.argv[1:] 408 or '-bp' in _sys.argv[1:] 409 or '-pb' in _sys.argv[1:] 410 ) 411 if '-p' in _sys.argv[1:] or '-bp' in _sys.argv[1:] \ 412 or '-pb' in _sys.argv[1:]: 413 global jsmin # pylint: disable = W0603 414 jsmin = _make_jsmin(python_only=True) 415 _sys.stdout.write(jsmin( 416 _sys.stdin.read(), keep_bang_comments=keep_bang_comments 417 ))
418 main() 419