Add python package: jsbeautifier

This commit is contained in:
EvilOS
2020-04-28 20:58:37 +08:00
parent f1ac10a81e
commit 6d22c9a8b5
35 changed files with 14147 additions and 0 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

BIN
HaE/.DS_Store vendored Normal file

Binary file not shown.

View File

@@ -0,0 +1,440 @@
from __future__ import print_function
import sys
import os
import platform
import io
import getopt
import re
import string
import errno
import copy
import glob
from jsbeautifier.__version__ import __version__
from jsbeautifier.javascript.options import BeautifierOptions
from jsbeautifier.javascript.beautifier import Beautifier
#
# The MIT License (MIT)
# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# Originally written by Einar Lielmanis et al.,
# Conversion to python by Einar Lielmanis, einar@beautifier.io,
# Parsing improvement for brace-less and semicolon-less statements
# by Liam Newman <bitwiseman@beautifier.io>
# Python is not my native language, feel free to push things around.
#
# Use either from command line (script displays its usage when run
# without any parameters),
#
#
# or, alternatively, use it as a module:
#
# import jsbeautifier
# res = jsbeautifier.beautify('your javascript string')
# res = jsbeautifier.beautify_file('some_file.js')
#
# you may specify some options:
#
# opts = jsbeautifier.default_options()
# opts.indent_size = 2
# res = jsbeautifier.beautify('some javascript', opts)
#
#
# Here are the available options: (read source)
class MissingInputStreamError(Exception):
pass
def default_options():
return BeautifierOptions()
def beautify(string, opts=default_options()):
b = Beautifier()
return b.beautify(string, opts)
def set_file_editorconfig_opts(filename, js_options):
from editorconfig import get_properties, EditorConfigError
try:
_ecoptions = get_properties(os.path.abspath(filename))
if _ecoptions.get("indent_style") == "tab":
js_options.indent_with_tabs = True
elif _ecoptions.get("indent_style") == "space":
js_options.indent_with_tabs = False
if _ecoptions.get("indent_size"):
js_options.indent_size = int(_ecoptions["indent_size"])
if _ecoptions.get("max_line_length"):
if _ecoptions.get("max_line_length") == "off":
js_options.wrap_line_length = 0
else:
js_options.wrap_line_length = int(
_ecoptions["max_line_length"])
if _ecoptions.get("insert_final_newline") == 'true':
js_options.end_with_newline = True
elif _ecoptions.get("insert_final_newline") == 'false':
js_options.end_with_newline = False
if _ecoptions.get("end_of_line"):
if _ecoptions["end_of_line"] == "cr":
js_options.eol = '\r'
elif _ecoptions["end_of_line"] == "lf":
js_options.eol = '\n'
elif _ecoptions["end_of_line"] == "crlf":
js_options.eol = '\r\n'
except EditorConfigError:
# do not error on bad editor config
print("Error loading EditorConfig. Ignoring.", file=sys.stderr)
def beautify_file(file_name, opts=default_options()):
input_string = ''
if file_name == '-': # stdin
if sys.stdin.isatty():
raise MissingInputStreamError()
stream = sys.stdin
if platform.platform().lower().startswith('windows'):
if sys.version_info.major >= 3:
# for python 3 on windows this prevents conversion
stream = io.TextIOWrapper(sys.stdin.buffer, newline='')
elif platform.architecture()[0] == '32bit':
# for python 2 x86 on windows this prevents conversion
import msvcrt
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
else:
raise Exception('Pipe to stdin not supported on Windows with Python 2.x 64-bit.')
input_string = stream.read()
# if you pipe an empty string, that is a failure
if input_string == '':
raise MissingInputStreamError()
else:
stream = io.open(file_name, 'rt', newline='', encoding='UTF-8')
input_string = stream.read()
return beautify(input_string, opts)
def usage(stream=sys.stdout):
print("jsbeautifier.py@" + __version__ + """
Javascript beautifier (https://beautifier.io/)
Usage: jsbeautifier.py [options] <infile>
<infile> can be "-", which means stdin.
Input options:
-i, --stdin Read input from stdin
Output options:
-s, --indent-size=NUMBER Indentation size. (default 4).
-c, --indent-char=CHAR Character to indent with. (default space).
-e, --eol=STRING Character(s) to use as line terminators.
(default first newline in file, otherwise "\\n")
-t, --indent-with-tabs Indent with tabs, overrides -s and -c
-d, --disable-preserve-newlines Do not preserve existing line breaks.
-P, --space-in-paren Add padding spaces within paren, ie. f( a, b )
-E, --space-in-empty-paren Add a single space inside empty paren, ie. f( )
-j, --jslint-happy More jslint-compatible output
-a, --space-after-anon-function Add a space before an anonymous function's parens, ie. function ()
--space-after-named-function Add a space before a named function's parens, i.e. function example ()
-b, --brace-style=collapse Brace style (collapse, expand, end-expand, none)(,preserve-inline)
-k, --keep-array-indentation Keep array indentation.
-r, --replace Write output in-place, replacing input
-o, --outfile=FILE Specify a file to output to (default stdout)
-f, --keep-function-indentation Do not re-indent function bodies defined in var lines.
-x, --unescape-strings Decode printable chars encoded in \\xNN notation.
-X, --e4x Pass E4X xml literals through untouched
-C, --comma-first Put commas at the beginning of new line instead of end.
-O, --operator-position=STRING Set operator position (before-newline, after-newline, preserve-newline)
-w, --wrap-line-length Attempt to wrap line when it exceeds this length.
NOTE: Line continues until next wrap point is found.
-n, --end-with-newline End output with newline
--indent-empty-lines Keep indentation on empty lines
--templating List of templating languages (auto,none,django,erb,handlebars,php) ["auto"] auto = none in JavaScript, all in html
--editorconfig Enable setting configuration from EditorConfig
Rarely needed options:
--eval-code evaluate code if a JS interpreter is
installed. May be useful with some obfuscated
script but poses a potential security issue.
-l, --indent-level=NUMBER Initial indentation level. (default 0).
-h, --help, --usage Prints this help statement.
-v, --version Show the version
""", file=stream)
if stream == sys.stderr:
return 1
else:
return 0
def mkdir_p(path):
try:
if path:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise Exception()
def isFileDifferent(filepath, expected):
try:
return (
''.join(
io.open(
filepath,
'rt',
newline='').readlines()) != expected)
except BaseException:
return True
def main():
argv = sys.argv[1:]
try:
opts, args = getopt.getopt(argv, "f:s:c:e:o:rdEPjab:kil:xhtvXnCO:w:m:",
['file=', 'indent-size=', 'indent-char=', 'eol=', 'outfile=', 'replace', 'disable-preserve-newlines',
'space-in-paren', 'space-in-empty-paren', 'jslint-happy', 'space-after-anon-function',
'brace-style=', 'indent-level=', 'unescape-strings',
'help', 'usage', 'stdin', 'eval-code', 'indent-with-tabs', 'keep-function-indentation', 'version',
'e4x', 'end-with-newline', 'comma-first', 'operator-position=', 'wrap-line-length', 'editorconfig', 'space-after-named-function',
'keep-array-indentation', 'indent-empty-lines', 'templating'])
except getopt.GetoptError as ex:
print(ex, file=sys.stderr)
return usage(sys.stderr)
js_options = default_options()
filepath_params = []
filepath_params.extend(args)
outfile_param = 'stdout'
replace = False
for opt, arg in opts:
if opt in ('--file', '-f'):
filepath_params.append(arg)
elif opt in ('--keep-array-indentation', '-k'):
js_options.keep_array_indentation = True
elif opt in ('--keep-function-indentation'):
js_options.keep_function_indentation = True
elif opt in ('--outfile', '-o'):
outfile_param = arg
elif opt in ('--replace', '-r'):
replace = True
elif opt in ('--indent-size', '-s'):
js_options.indent_size = int(arg)
elif opt in ('--indent-char', '-c'):
js_options.indent_char = arg
elif opt in ('--eol', '-e'):
js_options.eol = arg
elif opt in ('--indent-with-tabs', '-t'):
js_options.indent_with_tabs = True
elif opt in ('--disable-preserve-newlines', '-d'):
js_options.preserve_newlines = False
elif opt in ('--max-preserve-newlines', '-m'):
js_options.max_preserve_newlines = int(arg)
elif opt in ('--space-in-paren', '-P'):
js_options.space_in_paren = True
elif opt in ('--space-in-empty-paren', '-E'):
js_options.space_in_empty_paren = True
elif opt in ('--jslint-happy', '-j'):
js_options.jslint_happy = True
elif opt in ('--space-after-anon-function', '-a'):
js_options.space_after_anon_function = True
elif opt in ('--space-after-named-function'):
js_options.space_after_named_function = True
elif opt in ('--eval-code'):
js_options.eval_code = True
elif opt in ('--brace-style', '-b'):
js_options.brace_style = arg
elif opt in ('--unescape-strings', '-x'):
js_options.unescape_strings = True
elif opt in ('--e4x', '-X'):
js_options.e4x = True
elif opt in ('--end-with-newline', '-n'):
js_options.end_with_newline = True
elif opt in ('--comma-first', '-C'):
js_options.comma_first = True
elif opt in ('--operator-position', '-O'):
js_options.operator_position = arg
elif opt in ('--wrap-line-length ', '-w'):
js_options.wrap_line_length = int(arg)
elif opt in ('--indent-empty-lines'):
js_options.indent_empty_lines = True
elif opt in ('--templating'):
js_options.templating = arg.split(',')
elif opt in ('--stdin', '-i'):
# stdin is the default if no files are passed
filepath_params = []
elif opt in ('--editorconfig'):
js_options.editorconfig = True
elif opt in ('--version', '-v'):
return print(__version__)
elif opt in ('--help', '--usage', '-h'):
return usage()
try:
filepaths = []
if not filepath_params or (
len(filepath_params) == 1 and filepath_params[0] == '-'):
# default to stdin
filepath_params = []
filepaths.append('-')
for filepath_param in filepath_params:
# ignore stdin setting if files are specified
if '-' == filepath_param:
continue
# Check if each literal filepath exists
if os.path.isfile(filepath_param):
filepaths.append(filepath_param)
elif '*' in filepath_param or '?' in filepath_param:
# handle globs
# empty result is okay
if sys.version_info.major == 2 or (
sys.version_info.major == 3 and
sys.version_info.minor <= 4):
if '**' in filepath_param:
raise Exception('Recursive globs not supported on Python <= 3.4.')
filepaths.extend(glob.glob(filepath_param))
else:
filepaths.extend(glob.glob(filepath_param, recursive=True))
else:
# not a glob and not a file
raise OSError(errno.ENOENT, os.strerror(errno.ENOENT),
filepath_param)
if len(filepaths) > 1:
replace = True
elif filepaths and filepaths[0] == '-':
replace = False
# remove duplicates
filepaths = set(filepaths)
for filepath in filepaths:
if not replace:
outfile = outfile_param
else:
outfile = filepath
# Editorconfig used only on files, not stdin
if getattr(js_options, 'editorconfig'):
editorconfig_filepath = filepath
if editorconfig_filepath == '-':
if outfile != 'stdout':
editorconfig_filepath = outfile
else:
fileType = 'js'
editorconfig_filepath = 'stdin.' + fileType
# debug("EditorConfig is enabled for ", editorconfig_filepath);
js_options = copy.copy(js_options)
set_file_editorconfig_opts(editorconfig_filepath, js_options)
pretty = beautify_file(filepath, js_options)
if outfile == 'stdout':
stream = sys.stdout
# python automatically converts newlines in text to "\r\n" when on windows
# switch to binary to prevent this
if platform.platform().lower().startswith('windows'):
if sys.version_info.major >= 3:
# for python 3 on windows this prevents conversion
stream = io.TextIOWrapper(sys.stdout.buffer, newline='')
elif platform.architecture()[0] == '32bit':
# for python 2 x86 on windows this prevents conversion
import msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
else:
raise Exception('Pipe to stdout not supported on Windows with Python 2.x 64-bit.')
stream.write(pretty)
else:
if isFileDifferent(outfile, pretty):
mkdir_p(os.path.dirname(outfile))
# python automatically converts newlines in text to "\r\n" when on windows
# set newline to empty to prevent this
with io.open(outfile, 'wt', newline='', encoding='UTF-8') as f:
print('beautified ' + outfile, file=sys.stdout)
try:
f.write(pretty)
except TypeError:
# This is not pretty, but given how we did the version import
# it is the only way to do this without having setup.py
# fail on a missing six dependency.
six = __import__("six")
f.write(six.u(pretty))
else:
print('beautified ' + outfile + ' - unchanged', file=sys.stdout)
except MissingInputStreamError:
print(
"Must pipe input or define at least one file.\n",
file=sys.stderr)
usage(sys.stderr)
return 1
except UnicodeError as ex:
print("Error while decoding input or encoding output:",
file=sys.stderr)
print(ex, file=sys.stderr)
return 1
except Exception as ex:
print(ex, file=sys.stderr)
return 1
# Success
return 0
if __name__ == "__main__":
main()

View File

@@ -0,0 +1 @@
__version__ = '1.11.0'

View File

@@ -0,0 +1 @@
# Empty file :)

View File

@@ -0,0 +1,53 @@
# The MIT License (MIT)
#
# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import re
class Directives:
def __init__(self, start_block_pattern, end_block_pattern):
self.__directives_block_pattern = re.compile(start_block_pattern + r' beautify( \w+[:]\w+)+ ' + end_block_pattern)
self.__directive_pattern = re.compile(r' (\w+)[:](\w+)')
self.__directives_end_ignore_pattern = re.compile(start_block_pattern + r'\sbeautify\signore:end\s' + end_block_pattern)
def get_directives(self, text):
if not self.__directives_block_pattern.match(text):
return None
directives = {}
directive_match = self.__directive_pattern.search(text)
while directive_match:
directives[directive_match.group(1)] = directive_match.group(2)
directive_match = self.__directive_pattern.search(
text, directive_match.end())
return directives
def readIgnored(self, input):
return input.readUntilAfter(self.__directives_end_ignore_pattern)

View File

@@ -0,0 +1,136 @@
# The MIT License (MIT)
#
# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import re
class InputScanner:
def __init__(self, input_string):
self.__six = __import__("six")
if input_string is None:
input_string = ''
self.__input = input_string
self.__input_length = len(self.__input)
self.__position = 0
def restart(self):
self.__position = 0
def back(self):
if self.__position > 0:
self.__position -= 1
def hasNext(self):
return self.__position < self.__input_length
def next(self):
val = None
if self.hasNext():
val = self.__input[self.__position]
self.__position += 1
return val
def peek(self, index=0):
val = None
index += self.__position
if index >= 0 and index < self.__input_length:
val = self.__input[index]
return val
def test(self, pattern, index=0):
index += self.__position
return index >= 0 and index < self.__input_length and bool(
pattern.match(self.__input, index))
def testChar(self, pattern, index=0):
# test one character regex match
val = self.peek(index)
return val is not None and bool(pattern.match(val))
def match(self, pattern):
pattern_match = None
if self.hasNext():
pattern_match = pattern.match(self.__input, self.__position)
if bool(pattern_match):
self.__position = pattern_match.end(0)
return pattern_match
def read(self, starting_pattern, until_pattern=None, until_after=False):
val = ''
pattern_match = None
if bool(starting_pattern):
pattern_match = self.match(starting_pattern)
if bool(pattern_match):
val = pattern_match.group(0)
if bool(until_pattern) and \
(bool(pattern_match) or not bool(starting_pattern)):
val += self.readUntil(until_pattern, until_after)
return val
def readUntil(self, pattern, include_match=False):
val = ''
pattern_match = None
match_index = self.__position
if self.hasNext():
pattern_match = pattern.search(self.__input, self.__position)
if bool(pattern_match):
if include_match:
match_index = pattern_match.end(0)
else:
match_index = pattern_match.start(0)
else:
match_index = self.__input_length
val = self.__input[self.__position:match_index]
self.__position = match_index
return val
def readUntilAfter(self, pattern):
return self.readUntil(pattern, True)
def get_regexp(self, pattern, match_from=False):
result = None
# strings are converted to regexp
if isinstance(pattern, self.__six.string_types) and pattern != '':
result = re.compile(pattern)
elif pattern is not None:
result = re.compile(pattern.pattern)
return result
# css beautifier legacy helpers
def peekUntilAfter(self, pattern):
start = self.__position
val = self.readUntilAfter(pattern)
self.__position = start
return val
def lookBack(self, testVal):
start = self.__position - 1
return start >= len(testVal) and \
self.__input[start - len(testVal):start].lower() == testVal

View File

@@ -0,0 +1,216 @@
# The MIT License (MIT)
#
# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import copy
import re
from collections import namedtuple
class Options:
def __init__(self, options=None, merge_child_field=None):
self.css = None
self.js = None
self.html = None
self.raw_options = _mergeOpts(options, merge_child_field)
# Support passing the source text back with no change
self.disabled = self._get_boolean('disabled')
self.eol = self._get_characters('eol', 'auto')
self.end_with_newline = self._get_boolean('end_with_newline')
self.indent_size = self._get_number('indent_size', 4)
self.indent_char = self._get_characters('indent_char', ' ')
self.indent_level = self._get_number('indent_level')
self.preserve_newlines = self._get_boolean('preserve_newlines', True)
self.max_preserve_newlines = self._get_number(
'max_preserve_newlines', 32786)
if not self.preserve_newlines:
self.max_preserve_newlines = 0
self.indent_with_tabs = self._get_boolean(
'indent_with_tabs', self.indent_char == '\t')
if self.indent_with_tabs:
self.indent_char = '\t'
# indent_size behavior changed after 1.8.6
# It used to be that indent_size would be
# set to 1 for indent_with_tabs. That is no longer needed and
# actually doesn't make sense - why not use spaces? Further,
# that might produce unexpected behavior - tabs being used
# for single-column alignment. So, when indent_with_tabs is true
# and indent_size is 1, reset indent_size to 4.
if self.indent_size == 1:
self.indent_size = 4
# Backwards compat with 1.3.x
self.wrap_line_length = self._get_number(
'wrap_line_length', self._get_number('max_char'))
self.indent_empty_lines = self._get_boolean('indent_empty_lines')
# valid templating languages ['django', 'erb', 'handlebars', 'php']
# For now, 'auto' = all off for javascript, all on for html (and inline javascript).
# other values ignored
self.templating = self._get_selection_list('templating',
['auto', 'none', 'django', 'erb', 'handlebars', 'php'], ['auto'])
def _get_array(self, name, default_value=[]):
option_value = getattr(self.raw_options, name, default_value)
result = []
if isinstance(option_value, list):
result = copy.copy(option_value)
elif isinstance(option_value, str):
result = re.compile(r"[^a-zA-Z0-9_/\-]+").split(option_value)
return result
def _get_boolean(self, name, default_value=False):
option_value = getattr(self.raw_options, name, default_value)
result = False
try:
result = bool(option_value)
except ValueError:
pass
return result
def _get_characters(self, name, default_value=''):
option_value = getattr(self.raw_options, name, default_value)
result = ''
if isinstance(option_value, str):
result = option_value.replace('\\r', '\r').replace(
'\\n', '\n').replace('\\t', '\t')
return result
def _get_number(self, name, default_value=0):
option_value = getattr(self.raw_options, name, default_value)
result = 0
try:
result = int(option_value)
except ValueError:
pass
return result
def _get_selection(self, name, selection_list, default_value=None):
result = self._get_selection_list(name, selection_list, default_value)
if len(result) != 1:
raise ValueError(
"Invalid Option Value: The option '" + name + "' can only be one of the following values:\n" +
str(selection_list) +
"\nYou passed in: '" +
str(getattr(self.raw_options, name, None)) +
"'")
return result[0]
def _get_selection_list(self, name, selection_list, default_value=None):
if not selection_list:
raise ValueError("Selection list cannot be empty.")
default_value = default_value or [selection_list[0]]
if not self._is_valid_selection(default_value, selection_list):
raise ValueError("Invalid Default Value!")
result = self._get_array(name, default_value)
if not self._is_valid_selection(result, selection_list):
raise ValueError(
"Invalid Option Value: The option '" + name + "' can contain only the following values:\n" +
str(selection_list) +
"\nYou passed in: '" +
str(getattr(self.raw_options, name, None)) +
"'")
return result
def _is_valid_selection(self, result, selection_list):
if len(result) == 0 or len(selection_list) == 0:
return False
for item in result:
if item not in selection_list:
return False
return True
# merges child options up with the parent options object
# Example: obj = {a: 1, b: {a: 2}}
# mergeOpts(obj, 'b')
#
# Returns: {a: 2}
def _mergeOpts(options, childFieldName):
if options is None:
options = {}
if isinstance(options, tuple):
options = dict(options)
options = _normalizeOpts(options)
finalOpts = copy.copy(options)
if isinstance(options, dict):
local = finalOpts.get(childFieldName, None)
if local:
del(finalOpts[childFieldName])
for key in local:
finalOpts[key] = local[key]
finalOpts = namedtuple("CustomOptions", finalOpts.keys())(
*finalOpts.values())
if isinstance(options, Options):
local = getattr(finalOpts, childFieldName, None)
if local:
delattr(finalOpts, childFieldName)
for key in local:
setattr(finalOpts, key, local[key])
return finalOpts
def _normalizeOpts(options):
convertedOpts = copy.copy(options)
if isinstance(convertedOpts, dict):
option_keys = list(convertedOpts.keys())
for key in option_keys:
if '-' in key:
del convertedOpts[key]
convertedOpts[key.replace('-', '_')] = options[key]
else:
option_keys = list(getattr(convertedOpts, '__dict__', {}))
for key in option_keys:
if '-' in key:
delattr(convertedOpts, key)
setattr(convertedOpts, key.replace(
'-', '_'), getattr(options, key, None))
return convertedOpts

View File

@@ -0,0 +1,348 @@
# The MIT License (MIT)
#
# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import re
import math
# Using object instead of string to allow for later expansion of info
# about each line
__all__ = ["Output"]
class OutputLine:
def __init__(self, parent):
self.__parent = parent
self.__character_count = 0
self.__indent_count = -1
self.__alignment_count = 0
self.__wrap_point_index = 0
self.__wrap_point_character_count = 0
self.__wrap_point_indent_count = -1
self.__wrap_point_alignment_count = 0
self.__items = []
def clone_empty(self):
line = OutputLine(self.__parent)
line.set_indent(self.__indent_count, self.__alignment_count)
return line
def item(self, index):
return self.__items[index]
def is_empty(self):
return len(self.__items) == 0
def set_indent(self, indent=0, alignment=0):
if self.is_empty():
self.__indent_count = indent
self.__alignment_count = alignment
self.__character_count = self.__parent.get_indent_size(
self.__indent_count, self.__alignment_count)
def _set_wrap_point(self):
if self.__parent.wrap_line_length:
self.__wrap_point_index = len(self.__items)
self.__wrap_point_character_count = self.__character_count
self.__wrap_point_indent_count = \
self.__parent.next_line.__indent_count
self.__wrap_point_alignment_count = \
self.__parent.next_line.__alignment_count
def _should_wrap(self):
return self.__wrap_point_index and \
self.__character_count > \
self.__parent.wrap_line_length and \
self.__wrap_point_character_count > \
self.__parent.next_line.__character_count
def _allow_wrap(self):
if self._should_wrap():
self.__parent.add_new_line()
next = self.__parent.current_line
next.set_indent(self.__wrap_point_indent_count,
self.__wrap_point_alignment_count)
next.__items = self.__items[self.__wrap_point_index:]
self.__items = self.__items[:self.__wrap_point_index]
next.__character_count += self.__character_count - \
self.__wrap_point_character_count
self.__character_count = self.__wrap_point_character_count
if next.__items[0] == " ":
next.__items.pop(0)
next.__character_count -= 1
return True
return False
def last(self):
if not self.is_empty():
return self.__items[-1]
return None
def push(self, item):
self.__items.append(item)
last_newline_index = item.rfind('\n')
if last_newline_index != -1:
self.__character_count = len(item) - last_newline_index
else:
self.__character_count += len(item)
def pop(self):
item = None
if not self.is_empty():
item = self.__items.pop()
self.__character_count -= len(item)
return item
def _remove_indent(self):
if self.__indent_count > 0:
self.__indent_count -= 1
self.__character_count -= self.__parent.indent_size
def _remove_wrap_indent(self):
if self.__wrap_point_indent_count > 0:
self.__wrap_point_indent_count -= 1
def trim(self):
while self.last() == ' ':
self.__items.pop()
self.__character_count -= 1
def toString(self):
result = ''
if self.is_empty():
if self.__parent.indent_empty_lines:
result = self.__parent.get_indent_string(self.__indent_count)
else:
result = self.__parent.get_indent_string(
self.__indent_count, self.__alignment_count)
result += ''.join(self.__items)
return result
class IndentStringCache:
def __init__(self, options, base_string):
self.__cache = ['']
self.__indent_size = options.indent_size
self.__indent_string = options.indent_char
if not options.indent_with_tabs:
self.__indent_string = options.indent_char * options.indent_size
# Set to null to continue support of auto detection of base indent
base_string = base_string or ''
if options.indent_level > 0:
base_string = options.indent_level * self.__indent_string
self.__base_string = base_string
self.__base_string_length = len(base_string)
def get_indent_size(self, indent, column=0):
result = self.__base_string_length
if indent < 0:
result = 0
result += indent * self.__indent_size
result += column
return result
def get_indent_string(self, indent_level, column=0):
result = self.__base_string
if indent_level < 0:
indent_level = 0
result = ''
column += indent_level * self.__indent_size
self.__ensure_cache(column)
result += self.__cache[column]
return result
def __ensure_cache(self, column):
while column >= len(self.__cache):
self.__add_column()
def __add_column(self):
column = len(self.__cache)
indent = 0
result = ''
if self.__indent_size and column >= self.__indent_size:
indent = int(math.floor(column / self.__indent_size))
column -= indent * self.__indent_size
result = indent * self.__indent_string
if column:
result += column * ' '
self.__cache.append(result)
class Output:
def __init__(self, options, baseIndentString=''):
self.__indent_cache = IndentStringCache(options, baseIndentString)
self.raw = False
self._end_with_newline = options.end_with_newline
self.indent_size = options.indent_size
self.wrap_line_length = options.wrap_line_length
self.indent_empty_lines = options.indent_empty_lines
self.__lines = []
self.previous_line = None
self.current_line = None
self.next_line = OutputLine(self)
self.space_before_token = False
self.non_breaking_space = False
self.previous_token_wrapped = False
# initialize
self.__add_outputline()
def __add_outputline(self):
self.previous_line = self.current_line
self.current_line = self.next_line.clone_empty()
self.__lines.append(self.current_line)
def get_line_number(self):
return len(self.__lines)
def get_indent_string(self, indent, column=0):
return self.__indent_cache.get_indent_string(indent, column)
def get_indent_size(self, indent, column=0):
return self.__indent_cache.get_indent_size(indent, column)
def is_empty(self):
return self.previous_line is None and self.current_line.is_empty()
def add_new_line(self, force_newline=False):
# never newline at the start of file
# otherwise, newline only if we didn't just add one or we're forced
if self.is_empty() or \
(not force_newline and self.just_added_newline()):
return False
# if raw output is enabled, don't print additional newlines,
# but still return True as though you had
if not self.raw:
self.__add_outputline()
return True
def get_code(self, eol):
self.trim(True)
# handle some edge cases where the last tokens
# has text that ends with newline(s)
last_item = self.current_line.pop()
if last_item:
if last_item[-1] == '\n':
last_item = re.sub(r'[\n]+$', '', last_item)
self.current_line.push(last_item)
if self._end_with_newline:
self.__add_outputline()
sweet_code = "\n".join(line.toString() for line in self.__lines)
if not eol == '\n':
sweet_code = sweet_code.replace('\n', eol)
return sweet_code
def set_wrap_point(self):
self.current_line._set_wrap_point()
def set_indent(self, indent=0, alignment=0):
# Next line stores alignment values
self.next_line.set_indent(indent, alignment)
# Never indent your first output indent at the start of the file
if len(self.__lines) > 1:
self.current_line.set_indent(indent, alignment)
return True
self.current_line.set_indent()
return False
def add_raw_token(self, token):
for _ in range(token.newlines):
self.__add_outputline()
self.current_line.set_indent(-1)
self.current_line.push(token.whitespace_before)
self.current_line.push(token.text)
self.space_before_token = False
self.non_breaking_space = False
self.previous_token_wrapped = False
def add_token(self, printable_token):
self.__add_space_before_token()
self.current_line.push(printable_token)
self.space_before_token = False
self.non_breaking_space = False
self.previous_token_wrapped = self.current_line._allow_wrap()
def __add_space_before_token(self):
if self.space_before_token and not self.just_added_newline():
if not self.non_breaking_space:
self.set_wrap_point()
self.current_line.push(' ')
self.space_before_token = False
def remove_indent(self, index):
while index < len(self.__lines):
self.__lines[index]._remove_indent()
index += 1
self.current_line._remove_wrap_indent()
def trim(self, eat_newlines=False):
self.current_line.trim()
while eat_newlines and len(
self.__lines) > 1 and self.current_line.is_empty():
self.__lines.pop()
self.current_line = self.__lines[-1]
self.current_line.trim()
if len(self.__lines) > 1:
self.previous_line = self.__lines[-2]
else:
self.previous_line = None
def just_added_newline(self):
return self.current_line.is_empty()
def just_added_blankline(self):
return self.is_empty() or \
(self.current_line.is_empty() and self.previous_line.is_empty())
def ensure_empty_line_above(self, starts_with, ends_with):
index = len(self.__lines) - 2
while index >= 0:
potentialEmptyLine = self.__lines[index]
if potentialEmptyLine.is_empty():
break
elif not potentialEmptyLine.item(0).startswith(starts_with) and \
potentialEmptyLine.item(-1) != ends_with:
self.__lines.insert(index + 1, OutputLine(self))
self.previous_line = self.__lines[-2]
break
index -= 1

View File

@@ -0,0 +1,82 @@
# The MIT License (MIT)
#
# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
__all__ = ["Pattern"]
class Pattern:
def __init__(self, input_scanner, parent=None):
self._input = input_scanner
self._starting_pattern = None
self._match_pattern = None
self._until_pattern = None
self._until_after = False
if parent is not None:
self._starting_pattern = self._input.get_regexp(parent._starting_pattern)
self._match_pattern = self._input.get_regexp(parent._match_pattern)
self._until_pattern = self._input.get_regexp(parent._until_pattern)
self._until_after = parent._until_after
def read(self):
result = self._input.read(self._starting_pattern)
if (self._starting_pattern is None) or result:
result += self._input.read(self._match_pattern,
self._until_pattern, self._until_after)
return result
def read_match(self):
return self._input.match(self._match_pattern)
def until_after(self, pattern):
result = self._create()
result._until_after = True
result._until_pattern = self._input.get_regexp(pattern)
result._update()
return result
def until(self, pattern):
result = self._create()
result._until_after = False
result._until_pattern = self._input.get_regexp(pattern)
result._update()
return result
def starting_with(self, pattern):
result = self._create()
result._starting_pattern = self._input.get_regexp(pattern)
result._update()
return result
def matching(self, pattern):
result = self._create()
result._match_pattern = self._input.get_regexp(pattern)
result._update()
return result
def _create(self):
return Pattern(self._input, self)
def _update(self):
pass

View File

@@ -0,0 +1,177 @@
# The MIT License (MIT)
#
# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import copy
from ..core.pattern import Pattern
__all__ = ["TemplatablePattern"]
class TemplateNames:
def __init__(self):
self.django = False
self.erb = False
self.handlebars = False
self.php = False
class TemplatePatterns:
def __init__(self, input_scanner):
pattern = Pattern(input_scanner)
self.handlebars_comment = pattern.starting_with(r'{{!--').until_after(r'--}}')
self.handlebars_unescaped = pattern.starting_with(r'{{{').until_after(r'}}}')
self.handlebars = pattern.starting_with(r'{{').until_after(r'}}')
self.php = pattern.starting_with(r'<\?(?:[=]|php)').until_after(r'\?>')
self.erb = pattern.starting_with(r'<%[^%]').until_after(r'[^%]%>')
# django coflicts with handlebars a bit.
self.django = pattern.starting_with(r'{%').until_after(r'%}')
self.django_value = pattern.starting_with(r'{{').until_after(r'}}')
self.django_comment = pattern.starting_with(r'{#').until_after(r'#}')
class TemplatablePattern(Pattern):
def __init__(self, input_scanner, parent=None):
Pattern.__init__(self, input_scanner, parent)
self.__template_pattern = None
self._disabled = TemplateNames()
self._excluded = TemplateNames()
if parent is not None:
self.__template_pattern = \
self._input.get_regexp(parent.__template_pattern)
self._disabled = copy.copy(parent._disabled)
self._excluded = copy.copy(parent._excluded)
self.__patterns = TemplatePatterns(input_scanner)
def _create(self):
return TemplatablePattern(self._input, self)
def _update(self):
self.__set_templated_pattern()
def read_options(self, options):
result = self._create()
for language in ['django', 'erb', 'handlebars', 'php']:
setattr(result._disabled, language,
not (language in options.templating))
result._update()
return result
def disable(self, language):
result = self._create()
setattr(result._disabled, language, True)
result._update()
return result
def exclude(self, language):
result = self._create()
setattr(result._excluded, language, True)
result._update()
return result
def read(self):
result = ''
if bool(self._match_pattern):
result = self._input.read(self._starting_pattern)
else:
result = self._input.read(self._starting_pattern,
self.__template_pattern)
next = self._read_template()
while (bool(next)):
if self._match_pattern is not None:
next += self._input.read(self._match_pattern)
else:
next += self._input.readUntil(self.__template_pattern)
result += next
next = self._read_template()
if self._until_after:
result += self._input.readUntilAfter(self._until_after)
return result
def __set_templated_pattern(self):
items = list()
if not self._disabled.php:
items.append(self.__patterns.php._starting_pattern.pattern)
if not self._disabled.handlebars:
items.append(self.__patterns.handlebars._starting_pattern.pattern)
if not self._disabled.erb:
items.append(self.__patterns.erb._starting_pattern.pattern)
if not self._disabled.django:
items.append(self.__patterns.django._starting_pattern.pattern)
items.append(self.__patterns.django_value._starting_pattern.pattern)
items.append(self.__patterns.django_comment._starting_pattern.pattern)
if self._until_pattern:
items.append(self._until_pattern.pattern)
self.__template_pattern = self._input.get_regexp(
r'(?:' + '|'.join(items) + ')')
def _read_template(self):
resulting_string = ''
c = self._input.peek()
if c == '<':
peek1 = self._input.peek(1)
if not self._disabled.php and \
not self._excluded.php and \
peek1 == '?':
resulting_string = resulting_string or \
self.__patterns.php.read()
if not self._disabled.erb and \
not self._excluded.erb and \
peek1 == '%':
resulting_string = resulting_string or \
self.__patterns.erb.read()
elif c == '{':
if not self._disabled.handlebars and \
not self._excluded.handlebars:
resulting_string = resulting_string or \
self.__patterns.handlebars_comment.read()
resulting_string = resulting_string or \
self.__patterns.handlebars_unescaped.read()
resulting_string = resulting_string or \
self.__patterns.handlebars.read()
if not self._disabled.django:
# django coflicts with handlebars a bit.
if not self._excluded.django and \
not self._excluded.handlebars:
resulting_string = resulting_string or \
self.__patterns.django_value.read()
if not self._excluded.django:
resulting_string = resulting_string or \
self.__patterns.django_comment.read()
resulting_string = resulting_string or \
self.__patterns.django.read()
return resulting_string

View File

@@ -0,0 +1,43 @@
# The MIT License (MIT)
#
# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
class Token:
def __init__(
self,
type,
text,
newlines=0,
whitespace_before=''):
self.type = type
self.text = text
self.comments_before = None
self.newlines = newlines
self.whitespace_before = whitespace_before
self.parent = None
self.next = None
self.previous = None
self.opened = None
self.closed = None
self.directives = None

View File

@@ -0,0 +1,135 @@
# The MIT License (MIT)
#
# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import re
from ..core.inputscanner import InputScanner
from ..core.token import Token
from ..core.tokenstream import TokenStream
from ..core.pattern import Pattern
from ..core.whitespacepattern import WhitespacePattern
__all__ = ["TOKEN", "Tokenizer", "TokenizerPatterns", "TokenTypes"]
class TokenTypes:
START = 'TK_START'
RAW = 'TK_RAW'
EOF = 'TK_EOF'
def __init__(self):
pass
TOKEN = TokenTypes()
class TokenizerPatterns:
def __init__(self, input_scanner):
self.whitespace = WhitespacePattern(input_scanner)
class Tokenizer:
def __init__(self, input_string, options):
self._input = InputScanner(input_string)
self._options = options
self.__tokens = None
self._patterns = TokenizerPatterns(self._input)
def tokenize(self):
self._input.restart()
self.__tokens = TokenStream()
current = None
previous = Token(TOKEN.START,'')
open_token = None
open_stack = []
comments = TokenStream()
while previous.type != TOKEN.EOF:
current = self.__get_next_token_with_comments(previous, open_token)
if self._is_opening(current):
open_stack.append(open_token)
open_token = current
elif open_token is not None and \
self._is_closing(current, open_token):
current.opened = open_token
open_token.closed = current
open_token = open_stack.pop()
current.parent = open_token
self.__tokens.add(current)
previous = current
return self.__tokens
def __get_next_token_with_comments(self, previous, open_token):
current = self._get_next_token(previous, open_token)
if self._is_comment(current):
comments = TokenStream()
while self._is_comment(current):
comments.add(current)
current = self._get_next_token(previous, open_token)
if not comments.isEmpty():
current.comments_before = comments
comments = TokenStream()
current.parent = open_token
current.previous = previous
previous.next = current
return current
def _is_first_token(self):
return self.__tokens.isEmpty()
def _reset(self):
pass
def _get_next_token(self, previous_token, open_token):
self._readWhitespace()
resulting_string = self._input.read(re.compile(r'.+'))
if resulting_string:
return self._create_token(TOKEN.RAW, resulting_string)
else:
return self._create_token(TOKEN.EOF, '')
def _is_comment(self, current_token):
return False
def _is_opening(self, current_token):
return False
def _is_closing(self, current_token, open_token):
return False
def _create_token(self, token_type, text):
token = Token(token_type, text,
self._patterns.whitespace.newline_count,
self._patterns.whitespace.whitespace_before_token)
return token
def _readWhitespace(self):
return self._patterns.whitespace.read()

View File

@@ -0,0 +1,74 @@
# The MIT License (MIT)
#
# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import re
from ..core.inputscanner import InputScanner
from ..core.token import Token
class TokenStream:
def __init__(self, parent_token=None):
self.__tokens = []
self.__tokens_length = len(self.__tokens)
self.__position = 0
self.__parent_token = parent_token
def restart(self):
self.__position = 0
def isEmpty(self):
return self.__tokens_length == 0
def hasNext(self):
return self.__position < self.__tokens_length
def next(self):
if self.hasNext():
val = self.__tokens[self.__position]
self.__position += 1
return val
else:
raise StopIteration
def peek(self, index=0):
val = None
index += self.__position
if index >= 0 and index < self.__tokens_length:
val = self.__tokens[index]
return val
def add(self, token):
if self.__parent_token:
token.parent = self.__parent_token
self.__tokens.append(token)
self.__tokens_length += 1
def __iter__(self):
self.restart()
return self
def __next__(self):
return self.next()

View File

@@ -0,0 +1,78 @@
# The MIT License (MIT)
#
# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import re
from ..core.pattern import Pattern
__all__ = ["WhitespacePattern"]
class WhitespacePattern(Pattern):
def __init__(self, input_scanner, parent=None):
Pattern.__init__(self, input_scanner, parent)
if parent is not None:
self._newline_regexp = \
self._input.get_regexp(parent._newline_regexp)
else:
self.__set_whitespace_patterns('', '')
self.newline_count = 0
self.whitespace_before_token = ''
def __set_whitespace_patterns(self, whitespace_chars, newline_chars):
whitespace_chars += '\\t '
newline_chars += '\\n\\r'
self._match_pattern = self._input.get_regexp(
'[' + whitespace_chars + newline_chars + ']+')
self._newline_regexp = self._input.get_regexp(
'\\r\\n|[' + newline_chars + ']')
def read(self):
self.newline_count = 0
self.whitespace_before_token = ''
resulting_string = self._input.read(self._match_pattern)
if resulting_string == ' ':
self.whitespace_before_token = ' '
elif bool(resulting_string):
lines = self._newline_regexp.split(resulting_string)
self.newline_count = len(lines) - 1
self.whitespace_before_token = lines[-1]
return resulting_string
def matching(self, whitespace_chars, newline_chars):
result = self._create()
result.__set_whitespace_patterns(whitespace_chars, newline_chars)
result._update()
return result
def _create(self):
return WhitespacePattern(self._input, self)

View File

@@ -0,0 +1 @@
# Empty file :)

View File

@@ -0,0 +1,75 @@
import re
# This section of code was translated to python from acorn (javascript).
#
# Acorn was written by Marijn Haverbeke and released under an MIT
# license. The Unicode regexps (for identifiers and whitespace) were
# taken from [Esprima](http://esprima.org) by Ariya Hidayat.
#
# Git repositories for Acorn are available at
#
# http://marijnhaverbeke.nl/git/acorn
# https://github.com/marijnh/acorn.git
# This is not pretty, but given how we did the version import
# it is the only way to do this without having setup.py fail on a missing
# six dependency.
six = __import__("six")
# ## Character categories
# acorn used char codes to squeeze the last bit of performance out
# Beautifier is okay without that, so we're using regex
# permit #(23), $ (36), and @ (64). @ is used in ES7 decorators.
# 65 through 91 are uppercase letters.
# permit _ (95).
# 97 through 123 are lowercase letters.
_baseASCIIidentifierStartChars = six.u(r"\x23\x24\x40\x41-\x5a\x5f\x61-\x7a")
# inside an identifier @ is not allowed but 0-9 are.
_baseASCIIidentifierChars = six.u(r"\x24\x30-\x39\x41-\x5a\x5f\x61-\x7a")
# Big ugly regular expressions that match characters in the
# whitespace, identifier, and identifier-start categories. These
# are only applied when a character is found to actually have a
# code point above 128.
# IMPORTANT: These strings must be run through six to handle \u chars
_nonASCIIidentifierStartChars = six.u(r"\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ec\u02ee\u0370-\u0374\u0376\u0377\u037a-\u037d\u0386\u0388-\u038a\u038c\u038e-\u03a1\u03a3-\u03f5\u03f7-\u0481\u048a-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05d0-\u05ea\u05f0-\u05f2\u0620-\u064a\u066e\u066f\u0671-\u06d3\u06d5\u06e5\u06e6\u06ee\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u07f4\u07f5\u07fa\u0800-\u0815\u081a\u0824\u0828\u0840-\u0858\u08a0\u08a2-\u08ac\u0904-\u0939\u093d\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097f\u0985-\u098c\u098f\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc\u09dd\u09df-\u09e1\u09f0\u09f1\u0a05-\u0a0a\u0a0f\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0\u0ae1\u0b05-\u0b0c\u0b0f\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32\u0b33\u0b35-\u0b39\u0b3d\u0b5c\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99\u0b9a\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c33\u0c35-\u0c39\u0c3d\u0c58\u0c59\u0c60\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0\u0ce1\u0cf1\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d60\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u0e01-\u0e30\u0e32\u0e33\u0e40-\u0e46\u0e81\u0e82\u0e84\u0e87\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa\u0eab\u0ead-\u0eb0\u0eb2\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0edc-\u0edf\u0f00\u0f40-\u0f47\u0f49-\u0f6c\u0f88-\u0f8c\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10c5\u10c7\u10cd\u10d0-\u10fa\u10fc-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u13a0-\u13f4\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f0\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1780-\u17b3\u17d7\u17dc\u1820-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191c\u1950-\u196d\u1970-\u1974\u1980-\u19ab\u19c1-\u19c7\u1a00-\u1a16\u1a20-\u1a54\u1aa7\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c7d\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5\u1cf6\u1d00-\u1dbf\u1e00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2071\u207f\u2090-\u209c\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c2e\u2c30-\u2c5e\u2c60-\u2ce4\u2ceb-\u2cee\u2cf2\u2cf3\u2d00-\u2d25\u2d27\u2d2d\u2d30-\u2d67\u2d6f\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u2e2f\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312d\u3131-\u318e\u31a0-\u31ba\u31f0-\u31ff\u3400-\u4db5\u4e00-\u9fcc\ua000-\ua48c\ua4d0-\ua4fd\ua500-\ua60c\ua610-\ua61f\ua62a\ua62b\ua640-\ua66e\ua67f-\ua697\ua6a0-\ua6ef\ua717-\ua71f\ua722-\ua788\ua78b-\ua78e\ua790-\ua793\ua7a0-\ua7aa\ua7f8-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua90a-\ua925\ua930-\ua946\ua960-\ua97c\ua984-\ua9b2\ua9cf\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaa60-\uaa76\uaa7a\uaa80-\uaaaf\uaab1\uaab5\uaab6\uaab9-\uaabd\uaac0\uaac2\uaadb-\uaadd\uaae0-\uaaea\uaaf2-\uaaf4\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uabc0-\uabe2\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uf900-\ufa6d\ufa70-\ufad9\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\ufb38-\ufb3c\ufb3e\ufb40\ufb41\ufb43\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc")
_nonASCIIidentifierChars = six.u(r"\u0300-\u036f\u0483-\u0487\u0591-\u05bd\u05bf\u05c1\u05c2\u05c4\u05c5\u05c7\u0610-\u061a\u0620-\u0649\u0672-\u06d3\u06e7-\u06e8\u06fb-\u06fc\u0730-\u074a\u0800-\u0814\u081b-\u0823\u0825-\u0827\u0829-\u082d\u0840-\u0857\u08e4-\u08fe\u0900-\u0903\u093a-\u093c\u093e-\u094f\u0951-\u0957\u0962-\u0963\u0966-\u096f\u0981-\u0983\u09bc\u09be-\u09c4\u09c7\u09c8\u09d7\u09df-\u09e0\u0a01-\u0a03\u0a3c\u0a3e-\u0a42\u0a47\u0a48\u0a4b-\u0a4d\u0a51\u0a66-\u0a71\u0a75\u0a81-\u0a83\u0abc\u0abe-\u0ac5\u0ac7-\u0ac9\u0acb-\u0acd\u0ae2-\u0ae3\u0ae6-\u0aef\u0b01-\u0b03\u0b3c\u0b3e-\u0b44\u0b47\u0b48\u0b4b-\u0b4d\u0b56\u0b57\u0b5f-\u0b60\u0b66-\u0b6f\u0b82\u0bbe-\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcd\u0bd7\u0be6-\u0bef\u0c01-\u0c03\u0c46-\u0c48\u0c4a-\u0c4d\u0c55\u0c56\u0c62-\u0c63\u0c66-\u0c6f\u0c82\u0c83\u0cbc\u0cbe-\u0cc4\u0cc6-\u0cc8\u0cca-\u0ccd\u0cd5\u0cd6\u0ce2-\u0ce3\u0ce6-\u0cef\u0d02\u0d03\u0d46-\u0d48\u0d57\u0d62-\u0d63\u0d66-\u0d6f\u0d82\u0d83\u0dca\u0dcf-\u0dd4\u0dd6\u0dd8-\u0ddf\u0df2\u0df3\u0e34-\u0e3a\u0e40-\u0e45\u0e50-\u0e59\u0eb4-\u0eb9\u0ec8-\u0ecd\u0ed0-\u0ed9\u0f18\u0f19\u0f20-\u0f29\u0f35\u0f37\u0f39\u0f41-\u0f47\u0f71-\u0f84\u0f86-\u0f87\u0f8d-\u0f97\u0f99-\u0fbc\u0fc6\u1000-\u1029\u1040-\u1049\u1067-\u106d\u1071-\u1074\u1082-\u108d\u108f-\u109d\u135d-\u135f\u170e-\u1710\u1720-\u1730\u1740-\u1750\u1772\u1773\u1780-\u17b2\u17dd\u17e0-\u17e9\u180b-\u180d\u1810-\u1819\u1920-\u192b\u1930-\u193b\u1951-\u196d\u19b0-\u19c0\u19c8-\u19c9\u19d0-\u19d9\u1a00-\u1a15\u1a20-\u1a53\u1a60-\u1a7c\u1a7f-\u1a89\u1a90-\u1a99\u1b46-\u1b4b\u1b50-\u1b59\u1b6b-\u1b73\u1bb0-\u1bb9\u1be6-\u1bf3\u1c00-\u1c22\u1c40-\u1c49\u1c5b-\u1c7d\u1cd0-\u1cd2\u1d00-\u1dbe\u1e01-\u1f15\u200c\u200d\u203f\u2040\u2054\u20d0-\u20dc\u20e1\u20e5-\u20f0\u2d81-\u2d96\u2de0-\u2dff\u3021-\u3028\u3099\u309a\ua640-\ua66d\ua674-\ua67d\ua69f\ua6f0-\ua6f1\ua7f8-\ua800\ua806\ua80b\ua823-\ua827\ua880-\ua881\ua8b4-\ua8c4\ua8d0-\ua8d9\ua8f3-\ua8f7\ua900-\ua909\ua926-\ua92d\ua930-\ua945\ua980-\ua983\ua9b3-\ua9c0\uaa00-\uaa27\uaa40-\uaa41\uaa4c-\uaa4d\uaa50-\uaa59\uaa7b\uaae0-\uaae9\uaaf2-\uaaf3\uabc0-\uabe1\uabec\uabed\uabf0-\uabf9\ufb20-\ufb28\ufe00-\ufe0f\ufe20-\ufe26\ufe33\ufe34\ufe4d-\ufe4f\uff10-\uff19\uff3f")
#_nonASCIIidentifierStart = re.compile("[" + _nonASCIIidentifierStartChars + "]")
#_nonASCIIidentifier = re.compile("[" + _nonASCIIidentifierStartChars + _nonASCIIidentifierChars + "]")
_identifierStart = six.u(r"(?:\\u[0-9a-fA-F]{4}|[") + \
_baseASCIIidentifierStartChars + \
_nonASCIIidentifierStartChars + \
six.u("])")
_identifierChars = six.u(r"(?:\\u[0-9a-fA-F]{4}|[") + \
_baseASCIIidentifierChars + \
_nonASCIIidentifierStartChars + \
_nonASCIIidentifierChars + \
six.u("])*")
identifier = re.compile(_identifierStart + _identifierChars)
identifierStart = re.compile(_identifierStart)
identifierMatch = re.compile(six.u(r"(?:\\u[0-9a-fA-F]{4}|[") + \
_baseASCIIidentifierChars + \
_nonASCIIidentifierStartChars + \
_nonASCIIidentifierChars + \
six.u("])+"))
_nonASCIIwhitespace = re.compile(
six.u(r"[\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]"))
# Whether a single character denotes a newline.
# IMPORTANT: This string must be run through six to handle \u chars
newline = re.compile(six.u(r"[\n\r\u2028\u2029]"))
# Matches a whole line break (where CRLF is considered a single
# line break). Used to count lines.
# in javascript, these two differ
# in python they are the same, different methods are called on them
# IMPORTANT: This string must be run through six to handle \u chars
lineBreak = re.compile(six.u(r"\r\n|[\n\r\u2028\u2029]"))
allLineBreaks = lineBreak

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,94 @@
# The MIT License (MIT)
#
# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from ..core.options import Options as BaseOptions
OPERATOR_POSITION = [
'before-newline',
'after-newline',
'preserve-newline'
]
class BeautifierOptions(BaseOptions):
def __init__(self, options=None):
BaseOptions.__init__(self, options, 'js')
self.css = None
self.js = None
self.html = None
# compatibility, re
raw_brace_style = getattr(self.raw_options, 'brace_style', None)
if raw_brace_style == "expand-strict": # graceful handling of deprecated option
setattr(self.raw_options, 'brace_style', "expand")
elif raw_brace_style == "collapse-preserve-inline": # graceful handling of deprecated option
setattr(self.raw_options, 'brace_style', "collapse,preserve-inline")
# elif bool(self.raw_options.braces_on_own_line): # graceful handling of deprecated option
# raw_brace_style = "expand": "collapse"
# elif raw_brace_style is None: # Nothing exists to set it
# setattr(self.raw_options, 'brace_style', "collapse")
# preserve-inline in delimited string will trigger brace_preserve_inline, everything
# else is considered a brace_style and the last one only will have an effect
brace_style_split = self._get_selection_list('brace_style', ['collapse', 'expand', 'end-expand', 'none', 'preserve-inline'])
# preserve-inline in delimited string will trigger brace_preserve_inline
# Everything else is considered a brace_style and the last one only will
# have an effect
# specify defaults in case one half of meta-option is missing
self.brace_preserve_inline = False
self.brace_style = "collapse"
for bs in brace_style_split:
if bs == "preserve-inline":
self.brace_preserve_inline = True
else:
self.brace_style = bs
self.unindent_chained_methods = self._get_boolean('unindent_chained_methods')
self.break_chained_methods = self._get_boolean('break_chained_methods')
self.space_in_paren = self._get_boolean('space_in_paren')
self.space_in_empty_paren = self._get_boolean('space_in_empty_paren')
self.jslint_happy = self._get_boolean('jslint_happy')
self.space_after_anon_function = self._get_boolean('space_after_anon_function')
self.space_after_named_function = self._get_boolean('space_after_named_function')
self.keep_array_indentation = self._get_boolean('keep_array_indentation')
self.space_before_conditional = self._get_boolean('space_before_conditional', True)
self.unescape_strings = self._get_boolean('unescape_strings')
self.e4x = self._get_boolean('e4x')
self.comma_first = self._get_boolean('comma_first')
self.operator_position = self._get_selection('operator_position', OPERATOR_POSITION)
# For testing of beautify preserve:start directive
self.test_output_raw = False
self.editorconfig = False
# force opts.space_after_anon_function to true if opts.jslint_happy
if self.jslint_happy:
self.space_after_anon_function = True
self.eval_code = False

View File

@@ -0,0 +1,562 @@
# The MIT License (MIT)
#
# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import re
from ..core.inputscanner import InputScanner
from ..core.tokenizer import TokenTypes as BaseTokenTypes
from ..core.tokenizer import Tokenizer as BaseTokenizer
from ..core.tokenizer import TokenizerPatterns as BaseTokenizerPatterns
from ..core.directives import Directives
from ..core.pattern import Pattern
from ..core.templatablepattern import TemplatablePattern
__all__ = ["TOKEN", "Tokenizer", "TokenTypes"]
class TokenTypes(BaseTokenTypes):
START_EXPR = 'TK_START_EXPR'
END_EXPR = 'TK_END_EXPR'
START_BLOCK = 'TK_START_BLOCK'
END_BLOCK = 'TK_END_BLOCK'
WORD = 'TK_WORD'
RESERVED = 'TK_RESERVED'
SEMICOLON = 'TK_SEMICOLON'
STRING = 'TK_STRING'
EQUALS = 'TK_EQUALS'
OPERATOR = 'TK_OPERATOR'
COMMA = 'TK_COMMA'
BLOCK_COMMENT = 'TK_BLOCK_COMMENT'
COMMENT = 'TK_COMMENT'
DOT = 'TK_DOT'
UNKNOWN = 'TK_UNKNOWN'
def __init__(self):
pass
TOKEN = TokenTypes()
dot_pattern = re.compile(r'[^\d\.]')
number_pattern = re.compile(
r'0[xX][0123456789abcdefABCDEF]*|0[oO][01234567]*|0[bB][01]*|\d+n|(?:\.\d+|\d+\.?\d*)(?:[eE][+-]?\d+)?')
digit = re.compile(r'[0-9]')
positionable_operators = frozenset(
(">>> === !== " +
"<< && >= ** != == <= >> || |> " +
"< / - + > : & % ? ^ | *").split(' '))
punct = (">>>= " +
"... >>= <<= === >>> !== **= " +
"=> ^= :: /= << <= == && -= >= >> != -- += ** || ++ %= &= *= |= |> " +
"= ! ? > < : / ^ - + * & % ~ |")
punct = re.compile(r'([-[\]{}()*+?.,\\^$|#])').sub(r'\\\1', punct)
# ?. but not if followed by a number
punct = '\\?\\.(?!\\d) ' + punct
punct = punct.replace(' ', '|')
punct_pattern = re.compile(punct)
# Words which always should start on a new line
line_starters = frozenset(
('continue,try,throw,return,var,let,const,if,switch,case,default,for,' +
'while,break,function,import,export').split(','))
reserved_words = line_starters | frozenset(['do',
'in',
'of',
'else',
'get',
'set',
'new',
'catch',
'finally',
'typeof',
'yield',
'async',
'await',
'from',
'as'])
reserved_word_pattern = re.compile(r'^(?:' + '|'.join(reserved_words) + r')$')
directives_core = Directives(r'/\*', r'\*/')
xmlRegExp = re.compile(
r'[\s\S]*?<(\/?)([-a-zA-Z:0-9_.]+|{[\s\S]+?}|!\[CDATA\[[\s\S]*?\]\])(\s+{[\s\S]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{[\s\S]+?}))*\s*(/?)\s*>')
class TokenizerPatterns(BaseTokenizerPatterns):
def __init__(self, input_scanner, acorn, options):
BaseTokenizerPatterns.__init__(self, input_scanner)
# This is not pretty, but given how we did the version import
# it is the only way to do this without having setup.py fail on a missing
# six dependency.
six = __import__("six")
# IMPORTANT: This string must be run through six to handle \u chars
self.whitespace = self.whitespace.matching(
six.u(r'\u00A0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff'),
six.u(r'\u2028\u2029'))
pattern = Pattern(input_scanner)
templatable = TemplatablePattern(input_scanner) \
.read_options(options)
self.identifier = templatable.starting_with(acorn.identifier \
).matching(acorn.identifierMatch)
self.number = pattern.matching(number_pattern)
self.punct = pattern.matching(punct_pattern)
self.comment = pattern.starting_with(r'//').until(
six.u(r'[\n\r\u2028\u2029]'))
self.block_comment = pattern.starting_with(r'/\*').until_after(r'\*/')
self.html_comment_start = pattern.matching(r'<!--')
self.html_comment_end = pattern.matching(r'-->')
self.include = pattern.starting_with(r'#include' \
).until_after(acorn.lineBreak)
self.shebang = pattern.starting_with(r'#!' \
).until_after(acorn.lineBreak)
self.xml = pattern.matching(xmlRegExp)
self.single_quote = templatable.until(six.u(r"['\\\n\r\u2028\u2029]"))
self.double_quote = templatable.until(six.u(r'["\\\n\r\u2028\u2029]'))
self.template_text = templatable.until(r'[`\\$]')
self.template_expression = templatable.until(r'[`}\\]')
class Tokenizer(BaseTokenizer):
positionable_operators = positionable_operators
line_starters = line_starters
def __init__(self, input_string, opts):
BaseTokenizer.__init__(self, input_string, opts)
import jsbeautifier.javascript.acorn as acorn
self.acorn = acorn
self.in_html_comment = False
self.has_char_escapes = False
self._patterns = TokenizerPatterns(self._input, self.acorn, opts)
def _reset(self):
self.in_html_comment = False
def _is_comment(self, current_token):
return current_token.type == TOKEN.COMMENT or \
current_token.type == TOKEN.BLOCK_COMMENT or \
current_token.type == TOKEN.UNKNOWN
def _is_opening(self, current_token):
return current_token.type == TOKEN.START_BLOCK or current_token.type == TOKEN.START_EXPR
def _is_closing(self, current_token, open_token):
return (current_token.type == TOKEN.END_BLOCK or current_token.type == TOKEN.END_EXPR) and \
(open_token is not None and (
(current_token.text == ']' and open_token.text == '[') or
(current_token.text == ')' and open_token.text == '(') or
(current_token.text == '}' and open_token.text == '{')))
def _get_next_token(self, previous_token, open_token):
token = None
self._readWhitespace()
c = self._input.peek()
if c is None:
token = self._create_token(TOKEN.EOF, '')
token = token or self._read_non_javascript(c)
token = token or self._read_string(c)
token = token or self._read_word(previous_token)
token = token or self._read_singles(c)
token = token or self._read_comment(c)
token = token or self._read_regexp(c, previous_token)
token = token or self._read_xml(c, previous_token)
token = token or self._read_punctuation()
token = token or self._create_token(TOKEN.UNKNOWN, self._input.next())
return token
def _read_singles(self, c):
token = None
if c == '(' or c == '[':
token = self._create_token(TOKEN.START_EXPR, c)
elif c == ')' or c == ']':
token = self._create_token(TOKEN.END_EXPR, c)
elif c == '{':
token = self._create_token(TOKEN.START_BLOCK, c)
elif c == '}':
token = self._create_token(TOKEN.END_BLOCK, c)
elif c == ';':
token = self._create_token(TOKEN.SEMICOLON, c)
elif c == '.' and self._input.peek(1) is not None and \
bool(dot_pattern.match(self._input.peek(1))):
token = self._create_token(TOKEN.DOT, c)
elif c == ',':
token = self._create_token(TOKEN.COMMA, c)
if token is not None:
self._input.next()
return token
def _read_word(self, previous_token):
resulting_string = self._patterns.identifier.read()
if bool(resulting_string):
resulting_string = re.sub(self.acorn.allLineBreaks, '\n', resulting_string)
if not (previous_token.type == TOKEN.DOT or (
previous_token.type == TOKEN.RESERVED and (
previous_token.text == 'set' or previous_token.text == 'get')
)) and reserved_word_pattern.match(resulting_string):
if resulting_string == 'in' or resulting_string == 'of':
# in and of are operators, need to hack
return self._create_token(TOKEN.OPERATOR, resulting_string)
return self._create_token(TOKEN.RESERVED, resulting_string)
return self._create_token(TOKEN.WORD, resulting_string)
resulting_string = self._patterns.number.read()
if resulting_string != '':
return self._create_token(TOKEN.WORD, resulting_string)
def _read_comment(self, c):
token = None
if c == '/':
comment = ''
if self._input.peek(1) == '*': # peek /* .. */ comment
comment = self._patterns.block_comment.read()
directives = directives_core.get_directives(comment)
if directives and directives.get('ignore') == 'start':
comment += directives_core.readIgnored(self._input)
comment = re.sub(self.acorn.allLineBreaks, '\n', comment)
token = self._create_token(TOKEN.BLOCK_COMMENT, comment)
token.directives = directives
elif self._input.peek(1) == '/': # peek // comment
comment = self._patterns.comment.read()
token = self._create_token(TOKEN.COMMENT, comment)
return token
def _read_string(self, c):
if c == '`' or c == "'" or c == '"':
resulting_string = self._input.next()
self.has_char_escapes = False
if c == '`':
resulting_string += self.parse_string('`', True, '${')
else:
resulting_string += self.parse_string(c)
if self.has_char_escapes and self._options.unescape_strings:
resulting_string = self.unescape_string(resulting_string)
if self._input.peek() == c:
resulting_string += self._input.next()
resulting_string = re.sub(
self.acorn.allLineBreaks, '\n', resulting_string)
return self._create_token(TOKEN.STRING, resulting_string)
return None
def _read_regexp(self, c, previous_token):
if c == '/' and self.allowRegExOrXML(previous_token):
# handle regexp
resulting_string = self._input.next()
esc = False
in_char_class = False
while self._input.hasNext() and \
(esc or in_char_class or self._input.peek() != c) and \
not self._input.testChar(self.acorn.newline):
resulting_string += self._input.peek()
if not esc:
esc = self._input.peek() == '\\'
if self._input.peek() == '[':
in_char_class = True
elif self._input.peek() == ']':
in_char_class = False
else:
esc = False
self._input.next()
if self._input.peek() == c:
resulting_string += self._input.next()
if c == '/':
# regexps may have modifiers /regexp/MOD, so fetch those too
# Only [gim] are valid, but if the user puts in garbage, do
# what we can to take it.
resulting_string += self._input.read(
self.acorn.identifier)
return self._create_token(TOKEN.STRING, resulting_string)
return None
def _read_xml(self, c, previous_token):
if self._options.e4x and c == "<" and self.allowRegExOrXML(previous_token):
# handle e4x xml literals
xmlStr = ""
match = self._patterns.xml.read_match()
if match and not match.group(1):
rootTag = match.group(2)
rootTag = re.sub(r'^{\s+', '{', re.sub(r'\s+}$', '}', rootTag))
isCurlyRoot = rootTag.startswith('{')
depth = 0
while bool(match):
isEndTag = match.group(1)
tagName = match.group(2)
isSingletonTag = (
match.groups()[-1] != "") or (match.group(2)[0:8] == "![CDATA[")
if not isSingletonTag and (tagName == rootTag or (
isCurlyRoot and re.sub(r'^{\s+', '{', re.sub(r'\s+}$', '}', tagName)))):
if isEndTag:
depth -= 1
else:
depth += 1
xmlStr += match.group(0)
if depth <= 0:
break
match = self._patterns.xml.read_match()
# if we didn't close correctly, keep unformatted.
if not match:
xmlStr += self._input.match(re.compile(r'[\s\S]*')).group(0)
xmlStr = re.sub(self.acorn.allLineBreaks, '\n', xmlStr)
return self._create_token(TOKEN.STRING, xmlStr)
return None
def _read_non_javascript(self, c):
resulting_string = ''
if c == '#':
# she-bang
if self._is_first_token():
resulting_string = self._patterns.shebang.read()
if resulting_string:
return self._create_token(TOKEN.UNKNOWN, resulting_string.strip() + '\n')
# handles extendscript #includes
resulting_string = self._patterns.include.read()
if resulting_string:
return self._create_token(TOKEN.UNKNOWN, resulting_string.strip() + '\n')
c = self._input.next()
# Spidermonkey-specific sharp variables for circular references
# https://developer.mozilla.org/En/Sharp_variables_in_JavaScript
# http://mxr.mozilla.org/mozilla-central/source/js/src/jsscan.cpp
# around line 1935
sharp = '#'
if self._input.hasNext() and self._input.testChar(digit):
while True:
c = self._input.next()
sharp += c
if (not self._input.hasNext()) or c == '#' or c == '=':
break
if c == '#':
pass
elif self._input.peek() == '[' and self._input.peek(1) == ']':
sharp += '[]'
self._input.next()
self._input.next()
elif self._input.peek() == '{' and self._input.peek(1) == '}':
sharp += '{}'
self._input.next()
self._input.next()
return self._create_token(TOKEN.WORD, sharp)
self._input.back()
elif c == '<' and self._is_first_token():
if self._patterns.html_comment_start.read():
c = '<!--'
while self._input.hasNext() and not self._input.testChar(self.acorn.newline):
c += self._input.next()
self.in_html_comment = True
return self._create_token(TOKEN.COMMENT, c)
elif c == '-' and self.in_html_comment and \
self._patterns.html_comment_end.read():
self.in_html_comment = False
return self._create_token(TOKEN.COMMENT, '-->')
return None
def _read_punctuation(self):
token = None
resulting_string = self._patterns.punct.read()
if resulting_string != '':
if resulting_string == '=':
token = self._create_token(TOKEN.EQUALS, resulting_string)
elif resulting_string == '?.':
token = self._create_token(TOKEN.DOT, resulting_string)
else:
token = self._create_token(TOKEN.OPERATOR, resulting_string)
return token
__regexTokens = { TOKEN.COMMENT, TOKEN.START_EXPR, TOKEN.START_BLOCK,
TOKEN.START, TOKEN.END_BLOCK, TOKEN.OPERATOR,
TOKEN.EQUALS, TOKEN.EOF, TOKEN.SEMICOLON, TOKEN.COMMA }
def allowRegExOrXML(self, previous_token):
return (previous_token.type == TOKEN.RESERVED and previous_token.text in {'return', 'case', 'throw', 'else', 'do', 'typeof', 'yield'}) or \
(previous_token.type == TOKEN.END_EXPR and previous_token.text == ')' and
previous_token.opened.previous.type == TOKEN.RESERVED and previous_token.opened.previous.text in {'if', 'while', 'for'}) or \
(previous_token.type in self.__regexTokens )
def parse_string(
self,
delimiter,
allow_unescaped_newlines=False,
start_sub=None):
if delimiter == '\'':
pattern = self._patterns.single_quote
elif delimiter == '"':
pattern = self._patterns.double_quote
elif delimiter == '`':
pattern = self._patterns.template_text
elif delimiter == '}':
pattern = self._patterns.template_expression
resulting_string = pattern.read()
next = ''
while self._input.hasNext():
next = self._input.next()
if next == delimiter or \
(not allow_unescaped_newlines and
self.acorn.newline.match(next)):
self._input.back()
break
elif next == '\\' and self._input.hasNext():
current_char = self._input.peek()
if current_char == 'x' or current_char == 'u':
self.has_char_escapes = True
elif current_char == '\r' and self._input.peek(1) == '\n':
self._input.next()
next += self._input.next()
elif start_sub is not None:
if start_sub == '${' and next == '$' and \
self._input.peek() == '{':
next += self._input.next()
if start_sub == next:
if delimiter == '`':
next += self.parse_string(
'}', allow_unescaped_newlines, '`')
else:
next += self.parse_string(
'`', allow_unescaped_newlines, '${')
if self._input.hasNext():
next += self._input.next()
next += pattern.read()
resulting_string += next
return resulting_string
def unescape_string(self, s):
# You think that a regex would work for this
# return s.replace(/\\x([0-9a-f]{2})/gi, function(match, val) {
# return String.fromCharCode(parseInt(val, 16));
# })
# However, dealing with '\xff', '\\xff', '\\\xff' makes this more fun.
out = self.acorn.six.u('')
escaped = 0
input_scan = InputScanner(s)
matched = None
while input_scan.hasNext():
# Keep any whitespace, non-slash characters
# also keep slash pairs.
matched = input_scan.match(re.compile(r'([\s]|[^\\]|\\\\)+'))
if matched:
out += matched.group(0)
if input_scan.peek() != '\\':
continue
input_scan.next()
if input_scan.peek() == 'x':
matched = input_scan.match(re.compile(r'x([0-9A-Fa-f]{2})'))
elif input_scan.peek() == 'u':
matched = input_scan.match(re.compile(r'u([0-9A-Fa-f]{4})'))
else:
out += '\\'
if input_scan.hasNext():
out += input_scan.next()
continue
# If there's some error decoding, return the original string
if not matched:
return s
escaped = int(matched.group(1), 16)
if escaped > 0x7e and escaped <= 0xff and matched.group(
0).startswith('x'):
# we bail out on \x7f..\xff,
# leaving whole string escaped,
# as it's probably completely binary
return s
elif escaped >= 0x00 and escaped < 0x20:
# leave 0x00...0x1f escaped
out += '\\' + matched.group(0)
continue
elif escaped == 0x22 or escaped == 0x27 or escaped == 0x5c:
# single-quote, apostrophe, backslash - escape these
out += ('\\' + chr(escaped))
else:
out += self.acorn.six.unichr(escaped)
return out

View File

@@ -0,0 +1 @@
# Empty file :)

View File

@@ -0,0 +1 @@
# Empty file :)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,48 @@
import re
import unittest
import jsbeautifier
class TestJSBeautifierIndentation(unittest.TestCase):
def test_tabs(self):
test_fragment = self.decodesto
self.options.indent_with_tabs = 1
test_fragment('{tabs()}', "{\n\ttabs()\n}")
def test_function_indent(self):
test_fragment = self.decodesto
self.options.indent_with_tabs = 1
self.options.keep_function_indentation = 1
test_fragment(
'var foo = function(){ bar() }();',
"var foo = function() {\n\tbar()\n}();")
self.options.tabs = 1
self.options.keep_function_indentation = 0
test_fragment(
'var foo = function(){ baz() }();',
"var foo = function() {\n\tbaz()\n}();")
def decodesto(self, input, expectation=None):
self.assertEqual(
jsbeautifier.beautify(input, self.options), expectation or input)
@classmethod
def setUpClass(cls):
options = jsbeautifier.default_options()
options.indent_size = 4
options.indent_char = ' '
options.preserve_newlines = True
options.jslint_happy = False
options.keep_array_indentation = False
options.brace_style = 'collapse'
options.indent_level = 0
cls.options = options
cls.wrapregex = re.compile('^(.+)$', re.MULTILINE)
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,72 @@
#
# General code for JSBeautifier unpackers infrastructure. See README.specs
# written by Stefano Sanfilippo <a.little.coder@gmail.com>
#
"""General code for JSBeautifier unpackers infrastructure."""
import pkgutil
import re
from jsbeautifier.unpackers import evalbased
# NOTE: AT THE MOMENT, IT IS DEACTIVATED FOR YOUR SECURITY: it runs js!
BLACKLIST = ['jsbeautifier.unpackers.evalbased']
class UnpackingError(Exception):
"""Badly packed source or general error. Argument is a
meaningful description."""
pass
def getunpackers():
"""Scans the unpackers dir, finds unpackers and add them to UNPACKERS list.
An unpacker will be loaded only if it is a valid python module (name must
adhere to naming conventions) and it is not blacklisted (i.e. inserted
into BLACKLIST."""
path = __path__
prefix = __name__ + '.'
unpackers = []
interface = ['unpack', 'detect', 'PRIORITY']
for _importer, modname, _ispkg in pkgutil.iter_modules(path, prefix):
if 'tests' not in modname and modname not in BLACKLIST:
try:
module = __import__(modname, fromlist=interface)
except ImportError:
raise UnpackingError('Bad unpacker: %s' % modname)
else:
unpackers.append(module)
return sorted(unpackers, key=lambda mod: mod.PRIORITY)
UNPACKERS = getunpackers()
def run(source, evalcode=False):
"""Runs the applicable unpackers and return unpacked source as a string."""
for unpacker in [mod for mod in UNPACKERS if mod.detect(source)]:
source = unpacker.unpack(source)
if evalcode and evalbased.detect(source):
source = evalbased.unpack(source)
return source
def filtercomments(source):
"""NOT USED: strips trailing comments and put them at the top."""
trailing_comments = []
comment = True
while comment:
if re.search(r'^\s*\/\*', source):
comment = source[0, source.index('*/') + 2]
elif re.search(r'^\s*\/\/', source):
comment = re.search(r'^\s*\/\/', source).group(0)
else:
comment = None
if comment:
source = re.sub(r'^\s+', '', source[len(comment):])
trailing_comments.append(comment)
return '\n'.join(trailing_comments) + source

View File

@@ -0,0 +1,43 @@
#
# Unpacker for eval() based packers, a part of javascript beautifier
# by Einar Lielmanis <einar@beautifier.io>
#
# written by Stefano Sanfilippo <a.little.coder@gmail.com>
#
# usage:
#
# if detect(some_string):
# unpacked = unpack(some_string)
#
"""Unpacker for eval() based packers: runs JS code and returns result.
Works only if a JS interpreter (e.g. Mozilla's Rhino) is installed and
properly set up on host."""
from subprocess import PIPE, Popen
PRIORITY = 3
def detect(source):
"""Detects if source is likely to be eval() packed."""
return source.strip().lower().startswith('eval(function(')
def unpack(source):
"""Runs source and return resulting code."""
return jseval('print %s;' % source[4:]) if detect(source) else source
# In case of failure, we'll just return the original, without crashing on user.
def jseval(script):
"""Run code in the JS interpreter and return output."""
try:
interpreter = Popen(['js'], stdin=PIPE, stdout=PIPE)
except OSError:
return script
result, errors = interpreter.communicate(script)
if interpreter.poll() or errors:
return script
return result

View File

@@ -0,0 +1,61 @@
#
# simple unpacker/deobfuscator for scripts messed up with
# javascriptobfuscator.com
#
# written by Einar Lielmanis <einar@beautifier.io>
# rewritten in Python by Stefano Sanfilippo <a.little.coder@gmail.com>
#
# Will always return valid javascript: if `detect()` is false, `code` is
# returned, unmodified.
#
# usage:
#
# if javascriptobfuscator.detect(some_string):
# some_string = javascriptobfuscator.unpack(some_string)
#
"""deobfuscator for scripts messed up with JavascriptObfuscator.com"""
import re
PRIORITY = 1
def smartsplit(code):
"""Split `code` at " symbol, only if it is not escaped."""
strings = []
pos = 0
while pos < len(code):
if code[pos] == '"':
word = '' # new word
pos += 1
while pos < len(code):
if code[pos] == '"':
break
if code[pos] == '\\':
word += '\\'
pos += 1
word += code[pos]
pos += 1
strings.append('"%s"' % word)
pos += 1
return strings
def detect(code):
"""Detects if `code` is JavascriptObfuscator.com packed."""
# prefer `is not` idiom, so that a true boolean is returned
return (re.search(r'^var _0x[a-f0-9]+ ?\= ?\[', code) is not None)
def unpack(code):
"""Unpacks JavascriptObfuscator.com packed code."""
if detect(code):
matches = re.search(r'var (_0x[a-f\d]+) ?\= ?\[(.*?)\];', code)
if matches:
variable = matches.group(1)
dictionary = smartsplit(matches.group(2))
code = code[len(matches.group(0)):]
for key, value in enumerate(dictionary):
code = code.replace(r'%s[%s]' % (variable, key), value)
return code

View File

@@ -0,0 +1,90 @@
#
# deobfuscator for scripts messed up with myobfuscate.com
# by Einar Lielmanis <einar@beautifier.io>
#
# written by Stefano Sanfilippo <a.little.coder@gmail.com>
#
# usage:
#
# if detect(some_string):
# unpacked = unpack(some_string)
#
# CAVEAT by Einar Lielmanis
#
# You really don't want to obfuscate your scripts there: they're tracking
# your unpackings, your script gets turned into something like this,
# as of 2011-08-26:
#
# var _escape = 'your_script_escaped';
# var _111 = document.createElement('script');
# _111.src = 'http://api.www.myobfuscate.com/?getsrc=ok' +
# '&ref=' + encodeURIComponent(document.referrer) +
# '&url=' + encodeURIComponent(document.URL);
# var 000 = document.getElementsByTagName('head')[0];
# 000.appendChild(_111);
# document.write(unescape(_escape));
#
"""Deobfuscator for scripts messed up with MyObfuscate.com"""
import re
import base64
# Python 2 retrocompatibility
# pylint: disable=F0401
# pylint: disable=E0611
try:
from urllib import unquote
except ImportError:
from urllib.parse import unquote
from jsbeautifier.unpackers import UnpackingError
PRIORITY = 1
CAVEAT = """//
// Unpacker warning: be careful when using myobfuscate.com for your projects:
// scripts obfuscated by the free online version call back home.
//
"""
SIGNATURE = (
r'["\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F'
r'\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x61\x62\x63\x64\x65'
r'\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75'
r'\x76\x77\x78\x79\x7A\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x2B'
r'\x2F\x3D","","\x63\x68\x61\x72\x41\x74","\x69\x6E\x64\x65\x78'
r'\x4F\x66","\x66\x72\x6F\x6D\x43\x68\x61\x72\x43\x6F\x64\x65","'
r'\x6C\x65\x6E\x67\x74\x68"]')
def detect(source):
"""Detects MyObfuscate.com packer."""
return SIGNATURE in source
def unpack(source):
"""Unpacks js code packed with MyObfuscate.com"""
if not detect(source):
return source
payload = unquote(_filter(source))
match = re.search(r"^var _escape\='<script>(.*)<\/script>'",
payload, re.DOTALL)
polished = match.group(1) if match else source
return CAVEAT + polished
def _filter(source):
"""Extracts and decode payload (original file) from `source`"""
try:
varname = re.search(r'eval\(\w+\(\w+\((\w+)\)\)\);', source).group(1)
reverse = re.search(r"var +%s *\= *'(.*)';" % varname, source).group(1)
except AttributeError:
raise UnpackingError('Malformed MyObfuscate data.')
try:
return base64.b64decode(reverse[::-1].encode('utf8')).decode('utf8')
except TypeError:
raise UnpackingError('MyObfuscate payload is not base64-encoded.')

View File

@@ -0,0 +1,149 @@
#
# Unpacker for Dean Edward's p.a.c.k.e.r, a part of javascript beautifier
# by Einar Lielmanis <einar@beautifier.io>
#
# written by Stefano Sanfilippo <a.little.coder@gmail.com>
#
# usage:
#
# if detect(some_string):
# unpacked = unpack(some_string)
#
"""Unpacker for Dean Edward's p.a.c.k.e.r"""
import re
import string
from jsbeautifier.unpackers import UnpackingError
PRIORITY = 1
def detect(source):
global beginstr
global endstr
beginstr = ''
endstr = ''
begin_offset = -1
"""Detects whether `source` is P.A.C.K.E.R. coded."""
mystr = re.search('eval[ ]*\([ ]*function[ ]*\([ ]*p[ ]*,[ ]*a[ ]*,[ ]*c['
' ]*,[ ]*k[ ]*,[ ]*e[ ]*,[ ]*', source)
if(mystr):
begin_offset = mystr.start()
beginstr = source[:begin_offset]
if(begin_offset != -1):
""" Find endstr"""
source_end = source[begin_offset:]
if(source_end.split("')))", 1)[0] == source_end):
try:
endstr = source_end.split("}))", 1)[1]
except IndexError:
endstr = ''
else:
endstr = source_end.split("')))", 1)[1]
return (mystr is not None)
def unpack(source):
"""Unpacks P.A.C.K.E.R. packed js code."""
payload, symtab, radix, count = _filterargs(source)
if count != len(symtab):
raise UnpackingError('Malformed p.a.c.k.e.r. symtab.')
try:
unbase = Unbaser(radix)
except TypeError:
raise UnpackingError('Unknown p.a.c.k.e.r. encoding.')
def lookup(match):
"""Look up symbols in the synthetic symtab."""
word = match.group(0)
return symtab[unbase(word)] or word
source = re.sub(r'\b\w+\b', lookup, payload)
return _replacestrings(source)
def _filterargs(source):
"""Juice from a source file the four args needed by decoder."""
juicers = [
(r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\), *(\d+), *(.*)\)\)"),
(r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\)"),
]
for juicer in juicers:
args = re.search(juicer, source, re.DOTALL)
if args:
a = args.groups()
if a[1] == "[]":
a = list(a)
a[1] = 62
a = tuple(a)
try:
return a[0], a[3].split('|'), int(a[1]), int(a[2])
except ValueError:
raise UnpackingError('Corrupted p.a.c.k.e.r. data.')
# could not find a satisfying regex
raise UnpackingError(
'Could not make sense of p.a.c.k.e.r data (unexpected code structure)')
def _replacestrings(source):
global beginstr
global endstr
"""Strip string lookup table (list) and replace values in source."""
match = re.search(r'var *(_\w+)\=\["(.*?)"\];', source, re.DOTALL)
if match:
varname, strings = match.groups()
startpoint = len(match.group(0))
lookup = strings.split('","')
variable = '%s[%%d]' % varname
for index, value in enumerate(lookup):
source = source.replace(variable % index, '"%s"' % value)
return source[startpoint:]
return beginstr + source + endstr
class Unbaser(object):
"""Functor for a given base. Will efficiently convert
strings to natural numbers."""
ALPHABET = {
62: '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',
95: (' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~')
}
def __init__(self, base):
self.base = base
# fill elements 37...61, if necessary
if 36 < base < 62:
if not hasattr(self.ALPHABET, self.ALPHABET[62][:base]):
self.ALPHABET[base] = self.ALPHABET[62][:base]
# attrs = self.ALPHABET
# print ', '.join("%s: %s" % item for item in attrs.items())
# If base can be handled by int() builtin, let it do it for us
if 2 <= base <= 36:
self.unbase = lambda string: int(string, base)
else:
# Build conversion dictionary cache
try:
self.dictionary = dict(
(cipher, index) for index, cipher in enumerate(
self.ALPHABET[base]))
except KeyError:
raise TypeError('Unsupported base encoding.')
self.unbase = self._dictunbaser
def __call__(self, string):
return self.unbase(string)
def _dictunbaser(self, string):
"""Decodes a value to an integer."""
ret = 0
for index, cipher in enumerate(string[::-1]):
ret += (self.base ** index) * self.dictionary[cipher]
return ret

View File

@@ -0,0 +1,2 @@
# Empty file :)
# pylint: disable=C0111

View File

@@ -0,0 +1,54 @@
#
# written by Stefano Sanfilippo <a.little.coder@gmail.com>
#
"""Tests for JavaScriptObfuscator unpacker."""
import unittest
from jsbeautifier.unpackers.javascriptobfuscator import (
unpack, detect, smartsplit)
# pylint: disable=R0904
class TestJavascriptObfuscator(unittest.TestCase):
"""JavascriptObfuscator.com test case."""
def test_smartsplit(self):
"""Test smartsplit() function."""
split = smartsplit
def equals(data, result): return self.assertEqual(split(data), result)
equals('', [])
equals('"a", "b"', ['"a"', '"b"'])
equals('"aaa","bbbb"', ['"aaa"', '"bbbb"'])
equals('"a", "b\\\""', ['"a"', '"b\\\""'])
def test_detect(self):
"""Test detect() function."""
def positive(source): return self.assertTrue(detect(source))
def negative(source): return self.assertFalse(detect(source))
negative('')
negative('abcd')
negative('var _0xaaaa')
positive('var _0xaaaa = ["a", "b"]')
positive('var _0xaaaa=["a", "b"]')
positive('var _0x1234=["a","b"]')
def test_unpack(self):
"""Test unpack() function."""
def decodeto(
ob, original): return self.assertEqual(
unpack(ob), original)
decodeto('var _0x8df3=[];var a=10;', 'var a=10;')
decodeto('var _0xb2a7=["\x74\x27\x65\x73\x74"];var i;for(i=0;i<10;++i)'
'{alert(_0xb2a7[0]);} ;', 'var i;for(i=0;i<10;++i){alert'
'("t\'est");} ;')
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,43 @@
#
# written by Stefano Sanfilippo <a.little.coder@gmail.com>
#
"""Tests for MyObfuscate unpacker."""
import unittest
import os
from jsbeautifier.unpackers.myobfuscate import detect, unpack
from jsbeautifier.unpackers.tests import __path__ as path
INPUT = os.path.join(path[0], 'test-myobfuscate-input.js')
OUTPUT = os.path.join(path[0], 'test-myobfuscate-output.js')
# pylint: disable=R0904
class TestMyObfuscate(unittest.TestCase):
# pylint: disable=C0103
"""MyObfuscate obfuscator testcase."""
@classmethod
def setUpClass(cls):
"""Load source files (encoded and decoded version) for tests."""
with open(INPUT, 'r') as data:
cls.input = data.read()
with open(OUTPUT, 'r') as data:
cls.output = data.read()
def test_detect(self):
"""Test detect() function."""
def detected(source): return self.assertTrue(detect(source))
detected(self.input)
def test_unpack(self):
"""Test unpack() function."""
def check(inp, out): return self.assertEqual(unpack(inp), out)
check(self.input, self.output)
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,56 @@
#
# written by Stefano Sanfilippo <a.little.coder@gmail.com>
#
"""Tests for P.A.C.K.E.R. unpacker."""
import unittest
from jsbeautifier.unpackers.packer import detect, unpack
# pylint: disable=R0904
class TestPacker(unittest.TestCase):
"""P.A.C.K.E.R. testcase."""
def test_detect(self):
"""Test detect() function."""
def positive(source): return self.assertTrue(detect(source))
def negative(source): return self.assertFalse(detect(source))
negative('')
negative('var a = b')
positive('eval(function(p,a,c,k,e,r')
positive('eval ( function(p, a, c, k, e, r')
def test_unpack(self):
"""Test unpack() function."""
def check(inp, out):
return detect(inp) and self.assertEqual(unpack(inp), out)
check("eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)"
"){while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e="
"function(){return'\\\\w+'};c=1};while(c--)if(k[c])p=p.replace("
"new RegExp('\\\\b'+e(c)+'\\\\b','g'),k[c]);return p}('0 2=1',"
"62,3,'var||a'.split('|'),0,{}))", 'var a=1')
check("function test (){alert ('This is a test!')}; "
"eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String))"
"{while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e=function"
"(){return'\\w+'};c=1};while(c--)if(k[c])p=p.replace(new RegExp("
"'\\b'+e(c)+'\\b','g'),k[c]);return p}('0 2=1',3,3,"
"'var||a'.split('|'),0,{}))",
"function test (){alert ('This is a test!')}; var a=1")
check("eval(function(p,a,c,k,e,d){e=function(c){return c.toString(36)};if(!''.replace(/^/,String)){while(c--){d[c.toString(a)]=k[c]||c.toString(a)}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('2 0=\"4 3!\";2 1=0.5(/b/6);a.9(\"8\").7=1;',12,12,'str|n|var|W3Schools|Visit|search|i|innerHTML|demo|getElementById|document|w3Schools'.split('|'),0,{}))",
"var str=\"Visit W3Schools!\";var n=str.search(/w3Schools/i);document.getElementById(\"demo\").innerHTML=n;")
check("a=b;\r\nwhile(1){\ng=h;{return'\\w+'};break;eval(function(p,a,c,k,e,d){e=function(c){return c.toString(36)};if(!''.replace(/^/,String)){while(c--){d[c.toString(a)]=k[c]||c.toString(a)}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('$(5).4(3(){$(\'.1\').0(2);$(\'.6\').0(d);$(\'.7\').0(b);$(\'.a\').0(8);$(\'.9\').0(c)});',14,14,'html|r5e57|8080|function|ready|document|r1655|rc15b|8888|r39b0|r6ae9|3128|65309|80'.split('|'),0,{}))c=abx;",
"a=b;\r\nwhile(1){\ng=h;{return'\\w+'};break;$(document).ready(function(){$('.r5e57').html(8080);$('.r1655').html(80);$('.rc15b').html(3128);$('.r6ae9').html(8888);$('.r39b0').html(65309)});c=abx;")
check("eval(function(p,a,c,k,e,r){e=function(c){return c.toString(36)};if('0'.replace(0,e)==0){while(c--)r[e(c)]=k[c];k=[function(e){return r[e]||e}];e=function(){return'[0-9ab]'};c=1};while(c--)if(k[c])p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c]);return p}('$(5).a(6(){ $(\'.8\').0(1); $(\'.b\').0(4); $(\'.9\').0(2); $(\'.7\').0(3)})',[],12,'html|52136|555|65103|8088|document|function|r542c|r8ce6|rb0de|ready|rfab0'.split('|'),0,{}))",
"$(document).ready(function(){ $('.r8ce6').html(52136); $('.rfab0').html(8088); $('.rb0de').html(555); $('.r542c').html(65103)})")
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,45 @@
#
# written by Stefano Sanfilippo <a.little.coder@gmail.com>
#
"""Tests for urlencoded unpacker."""
import unittest
from jsbeautifier.unpackers.urlencode import detect, unpack
# pylint: disable=R0904
class TestUrlencode(unittest.TestCase):
"""urlencode test case."""
def test_detect(self):
"""Test detect() function."""
def encoded(source): return self.assertTrue(detect(source))
def unencoded(source): return self.assertFalse(detect(source))
unencoded('')
unencoded('var a = b')
encoded('var%20a+=+b')
encoded('var%20a=b')
encoded('var%20%21%22')
def test_unpack(self):
"""Test unpack function."""
def equals(
source,
result): return self.assertEqual(
unpack(source),
result)
equals('', '')
equals('abcd', 'abcd')
equals('var a = b', 'var a = b')
equals('var%20a=b', 'var a=b')
equals('var%20a+=+b', 'var a = b')
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,36 @@
#
# Trivial bookmarklet/escaped script detector for the javascript beautifier
# written by Einar Lielmanis <einar@beautifier.io>
# rewritten in Python by Stefano Sanfilippo <a.little.coder@gmail.com>
#
# Will always return valid javascript: if `detect()` is false, `code` is
# returned, unmodified.
#
# usage:
#
# some_string = urlencode.unpack(some_string)
#
"""Bookmarklet/escaped script unpacker."""
# Python 2 retrocompatibility
# pylint: disable=F0401
# pylint: disable=E0611
try:
from urllib import unquote_plus
except ImportError:
from urllib.parse import unquote_plus
PRIORITY = 0
def detect(code):
"""Detects if a scriptlet is urlencoded."""
# the fact that script doesn't contain any space, but has %20 instead
# should be sufficient check for now.
return ' ' not in code and ('%20' in code or code.count('%') > 3)
def unpack(code):
"""URL decode `code` source string."""
return unquote_plus(code) if detect(code) else code