1
2
3 r"""
4 =====================
5 Javascript Minifier
6 =====================
7
8 rJSmin is a javascript minifier written in python.
9
10 The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\\.
11
12 :Copyright:
13
14 Copyright 2011 - 2014
15 Andr\xe9 Malo or his licensors, as applicable
16
17 :License:
18
19 Licensed under the Apache License, Version 2.0 (the "License");
20 you may not use this file except in compliance with the License.
21 You may obtain a copy of the License at
22
23 http://www.apache.org/licenses/LICENSE-2.0
24
25 Unless required by applicable law or agreed to in writing, software
26 distributed under the License is distributed on an "AS IS" BASIS,
27 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28 See the License for the specific language governing permissions and
29 limitations under the License.
30
31 The module is a re-implementation aiming for speed, so it can be used at
32 runtime (rather than during a preprocessing step). Usually it produces the
33 same results as the original ``jsmin.c``. It differs in the following ways:
34
35 - there is no error detection: unterminated string, regex and comment
36 literals are treated as regular javascript code and minified as such.
37 - Control characters inside string and regex literals are left untouched; they
38 are not converted to spaces (nor to \\n)
39 - Newline characters are not allowed inside string and regex literals, except
40 for line continuations in string literals (ECMA-5).
41 - "return /regex/" is recognized correctly.
42 - "+ +" and "- -" sequences are not collapsed to '++' or '--'
43 - Newlines before ! operators are removed more sensibly
44 - Comments starting with an exclamation mark (``!``) can be kept optionally
45 - rJSmin does not handle streams, but only complete strings. (However, the
46 module provides a "streamy" interface).
47
48 Since most parts of the logic are handled by the regex engine it's way
49 faster than the original python port of ``jsmin.c`` by Baruch Even. The speed
50 factor varies between about 6 and 55 depending on input and python version
51 (it gets faster the more compressed the input already is). Compared to the
52 speed-refactored python port by Dave St.Germain the performance gain is less
53 dramatic but still between 1.2 and 7. See the docs/BENCHMARKS file for
54 details.
55
56 rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more.
57
58 Both python 2 and python 3 are supported.
59
60 .. _jsmin.c by Douglas Crockford:
61 http://www.crockford.com/javascript/jsmin.c
62 """
63 if __doc__:
64
65 __doc__ = __doc__.encode('ascii').decode('unicode_escape')
66 __author__ = r"Andr\xe9 Malo".encode('ascii').decode('unicode_escape')
67 __docformat__ = "restructuredtext en"
68 __license__ = "Apache License, Version 2.0"
69 __version__ = '1.0.9'
70 __all__ = ['jsmin']
71
72 import re as _re
73
74
76 """
77 Generate JS minifier based on `jsmin.c by Douglas Crockford`_
78
79 .. _jsmin.c by Douglas Crockford:
80 http://www.crockford.com/javascript/jsmin.c
81
82 :Parameters:
83 `python_only` : ``bool``
84 Use only the python variant. If true, the c extension is not even
85 tried to be loaded. (tdi.c._tdi_rjsmin)
86
87 :Return: Minifier
88 :Rtype: ``callable``
89 """
90
91 if not python_only:
92 from tdi import c
93 rjsmin = c.load('rjsmin')
94 if rjsmin is not None:
95 return rjsmin.jsmin
96 try:
97 xrange
98 except NameError:
99 xrange = range
100
101 space_chars = r'[\000-\011\013\014\016-\040]'
102
103 line_comment = r'(?://[^\r\n]*)'
104 space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
105 space_comment_nobang = r'(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)'
106 bang_comment = r'(?:/\*![^*]*\*+(?:[^/*][^*]*\*+)*/)'
107
108 string1 = \
109 r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
110 string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
111 strings = r'(?:%s|%s)' % (string1, string2)
112
113 charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
114 nospecial = r'[^/\\\[\r\n]'
115 regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
116 nospecial, charclass, nospecial
117 )
118 space = r'(?:%s|%s)' % (space_chars, space_comment)
119 space_nobang = r'(?:%s|%s)' % (space_chars, space_comment_nobang)
120 newline = r'(?:%s?[\r\n])' % line_comment
121
122 def fix_charclass(result):
123 """ Fixup string of chars to fit into a regex char class """
124 pos = result.find('-')
125 if pos >= 0:
126 result = r'%s%s-' % (result[:pos], result[pos + 1:])
127
128 def sequentize(string):
129 """
130 Notate consecutive characters as sequence
131
132 (1-4 instead of 1234)
133 """
134 first, last, result = None, None, []
135 for char in map(ord, string):
136 if last is None:
137 first = last = char
138 elif last + 1 == char:
139 last = char
140 else:
141 result.append((first, last))
142 first = last = char
143 if last is not None:
144 result.append((first, last))
145 return ''.join(['%s%s%s' % (
146 chr(first),
147 last > first + 1 and '-' or '',
148 last != first and chr(last) or ''
149 ) for first, last in result])
150
151 return _re.sub(r'([\000-\040\047])',
152 lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result)
153 .replace('\\', '\\\\')
154 .replace('[', '\\[')
155 .replace(']', '\\]')
156 )
157 )
158
159 def id_literal_(what):
160 """ Make id_literal like char class """
161 match = _re.compile(what).match
162 result = ''.join([
163 chr(c) for c in xrange(127) if not match(chr(c))
164 ])
165 return '[^%s]' % fix_charclass(result)
166
167 def not_id_literal_(keep):
168 """ Make negated id_literal like char class """
169 match = _re.compile(id_literal_(keep)).match
170 result = ''.join([
171 chr(c) for c in xrange(127) if not match(chr(c))
172 ])
173 return r'[%s]' % fix_charclass(result)
174
175 not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
176 preregex1 = r'[(,=:\[!&|?{};\r\n]'
177 preregex2 = r'%(not_id_literal)sreturn' % locals()
178
179 id_literal = id_literal_(r'[a-zA-Z0-9_$]')
180 id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]')
181 id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
182
183 dull = r'[^\047"/\000-\040]'
184
185 space_sub_simple = _re.compile((
186 r'(%(dull)s+)'
187 r'|(%(strings)s%(dull)s*)'
188 r'|(?<=%(preregex1)s)'
189 r'%(space)s*(?:%(newline)s%(space)s*)*'
190 r'(%(regex)s%(dull)s*)'
191 r'|(?<=%(preregex2)s)'
192 r'%(space)s*(?:%(newline)s%(space)s)*'
193 r'(%(regex)s%(dull)s*)'
194 r'|(?<=%(id_literal_close)s)'
195 r'%(space)s*(?:(%(newline)s)%(space)s*)+'
196 r'(?=%(id_literal_open)s)'
197 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
198 r'|(?<=\+)(%(space)s)+(?=\+)'
199 r'|(?<=-)(%(space)s)+(?=-)'
200 r'|%(space)s+'
201 r'|(?:%(newline)s%(space)s*)+'
202 ) % locals()).sub
203
204
205 def space_subber_simple(match):
206 """ Substitution callback """
207
208 groups = match.groups()
209 if groups[0]: return groups[0]
210 elif groups[1]: return groups[1]
211 elif groups[2]: return groups[2]
212 elif groups[3]: return groups[3]
213 elif groups[4]: return '\n'
214 elif groups[5] or groups[6] or groups[7]: return ' '
215 else: return ''
216
217 space_sub_banged = _re.compile((
218 r'(%(dull)s+)'
219 r'|(%(strings)s%(dull)s*)'
220 r'|(%(bang_comment)s%(dull)s*)'
221 r'|(?<=%(preregex1)s)'
222 r'%(space)s*(?:%(newline)s%(space)s*)*'
223 r'(%(regex)s%(dull)s*)'
224 r'|(?<=%(preregex2)s)'
225 r'%(space)s*(?:%(newline)s%(space)s)*'
226 r'(%(regex)s%(dull)s*)'
227 r'|(?<=%(id_literal_close)s)'
228 r'%(space)s*(?:(%(newline)s)%(space)s*)+'
229 r'(?=%(id_literal_open)s)'
230 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
231 r'|(?<=\+)(%(space)s)+(?=\+)'
232 r'|(?<=-)(%(space)s)+(?=-)'
233 r'|%(space)s+'
234 r'|(?:%(newline)s%(space)s*)+'
235 ) % dict(locals(), space=space_nobang)).sub
236
237
238 def space_subber_banged(match):
239 """ Substitution callback """
240
241 groups = match.groups()
242 if groups[0]: return groups[0]
243 elif groups[1]: return groups[1]
244 elif groups[2]: return groups[2]
245 elif groups[3]: return groups[3]
246 elif groups[4]: return groups[4]
247 elif groups[5]: return '\n'
248 elif groups[6] or groups[7] or groups[8]: return ' '
249 else: return ''
250
251 def jsmin(script, keep_bang_comments=False):
252 r"""
253 Minify javascript based on `jsmin.c by Douglas Crockford`_\.
254
255 Instead of parsing the stream char by char, it uses a regular
256 expression approach which minifies the whole script with one big
257 substitution regex.
258
259 .. _jsmin.c by Douglas Crockford:
260 http://www.crockford.com/javascript/jsmin.c
261
262 :Parameters:
263 `script` : ``str``
264 Script to minify
265
266 `keep_bang_comments` : ``bool``
267 Keep comments starting with an exclamation mark? (``/*!...*/``)
268
269 :Return: Minified script
270 :Rtype: ``str``
271 """
272 if keep_bang_comments:
273 return space_sub_banged(
274 space_subber_banged, '\n%s\n' % script
275 ).strip()
276 else:
277 return space_sub_simple(
278 space_subber_simple, '\n%s\n' % script
279 ).strip()
280
281 return jsmin
282
283 jsmin = _make_jsmin()
284
285
287 r"""
288 Minify javascript based on `jsmin.c by Douglas Crockford`_\.
289
290 Instead of parsing the stream char by char, it uses a regular
291 expression approach which minifies the whole script with one big
292 substitution regex.
293
294 .. _jsmin.c by Douglas Crockford:
295 http://www.crockford.com/javascript/jsmin.c
296
297 :Warning: This function is the digest of a _make_jsmin() call. It just
298 utilizes the resulting regexes. It's here for fun and may
299 vanish any time. Use the `jsmin` function instead.
300
301 :Parameters:
302 `script` : ``str``
303 Script to minify
304
305 `keep_bang_comments` : ``bool``
306 Keep comments starting with an exclamation mark? (``/*!...*/``)
307
308 :Return: Minified script
309 :Rtype: ``str``
310 """
311 if not keep_bang_comments:
312 rex = (
313 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]'
314 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]'
315 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?'
316 r'{};\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*'
317 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0'
318 r'14\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r'
319 r'\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r'
320 r'\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<'
321 r'=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\016-\04'
322 r'0]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?['
323 r'\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^'
324 r'*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:'
325 r'\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)['
326 r'^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])(?:[\000'
327 r'-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?'
328 r':((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]|(?'
329 r':/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,.'
330 r'/:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\0'
331 r'13\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\00'
332 r'0-#%-,./:-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]'
333 r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-'
334 r'\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?'
335 r'=-)|(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]'
336 r'*\*+)*/))+|(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\0'
337 r'16-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
338 )
339 def subber(match):
340 """ Substitution callback """
341 groups = match.groups()
342 return (
343 groups[0] or
344 groups[1] or
345 groups[2] or
346 groups[3] or
347 (groups[4] and '\n') or
348 (groups[5] and ' ') or
349 (groups[6] and ' ') or
350 (groups[7] and ' ') or
351 ''
352 )
353 else:
354 rex = (
355 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]'
356 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]'
357 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|((?:/\*![^*]*\*'
358 r'+(?:[^/*][^*]*\*+)*/)[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?{};\r'
359 r'\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*'
360 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0'
361 r'14\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/('
362 r'?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:'
363 r'\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]'
364 r'*)|(?<=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\0'
365 r'16-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://['
366 r'^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*'
367 r']*\*+(?:[^/*][^*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:('
368 r'?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/'
369 r'\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\'
370 r'^`{|~])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:['
371 r'^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011'
372 r'\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
373 r'(?=[^\000-\040"#%-\047)*,./:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@'
374 r'\[-^`{-~-])((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*'
375 r'+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./:-@\[-^`{-~-])|(?<=\+)'
376 r'((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^'
377 r'*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013\014\016-\040]|(?:'
378 r'/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:[\000-\011\013'
379 r'\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))+|(?:(?'
380 r':(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*('
381 r'?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
382 )
383 def subber(match):
384 """ Substitution callback """
385 groups = match.groups()
386 return (
387 groups[0] or
388 groups[1] or
389 groups[2] or
390 groups[3] or
391 groups[4] or
392 (groups[5] and '\n') or
393 (groups[6] and ' ') or
394 (groups[7] and ' ') or
395 (groups[8] and ' ') or
396 ''
397 )
398
399 return _re.sub(rex, subber, '\n%s\n' % script).strip()
400
401
402 if __name__ == '__main__':
404 """ Main """
405 import sys as _sys
406 keep_bang_comments = (
407 '-b' in _sys.argv[1:]
408 or '-bp' in _sys.argv[1:]
409 or '-pb' in _sys.argv[1:]
410 )
411 if '-p' in _sys.argv[1:] or '-bp' in _sys.argv[1:] \
412 or '-pb' in _sys.argv[1:]:
413 global jsmin
414 jsmin = _make_jsmin(python_only=True)
415 _sys.stdout.write(jsmin(
416 _sys.stdin.read(), keep_bang_comments=keep_bang_comments
417 ))
418 main()
419