OLD | NEW |
(Empty) | |
| 1 # Use of this source code is governed by a BSD-style license (MIT) |
| 2 ''' |
| 3 Created on 20/01/2011 |
| 4 |
| 5 v0.1 (C) Gerald Storer |
| 6 MIT License |
| 7 |
| 8 Based on JSON.minify.js: |
| 9 https://github.com/getify/JSON.minify |
| 10 ''' |
| 11 |
| 12 import re |
| 13 |
| 14 def json_minify(json,strip_space=True): |
| 15 tokenizer=re.compile('"|(/\*)|(\*/)|(//)|\n|\r') |
| 16 in_string = False |
| 17 in_multiline_comment = False |
| 18 in_singleline_comment = False |
| 19 |
| 20 new_str = [] |
| 21 from_index = 0 # from is a keyword in Python |
| 22 |
| 23 for match in re.finditer(tokenizer,json): |
| 24 |
| 25 if not in_multiline_comment and not in_singleline_comment: |
| 26 tmp2 = json[from_index:match.start()] |
| 27 if not in_string and strip_space: |
| 28 tmp2 = re.sub('[ \t\n\r]*','',tmp2) # replace only white space d
efined in standard |
| 29 new_str.append(tmp2) |
| 30 |
| 31 from_index = match.end() |
| 32 |
| 33 if match.group() == '"' and not in_multiline_comment and not in_singleli
ne_comment: |
| 34 escaped = re.search('(\\\\)*$',json[:match.start()]) |
| 35 if not in_string or escaped is None or len(escaped.group()) % 2 == 0
: |
| 36 # start of string with ", or unescaped " character found to end
string |
| 37 in_string = not in_string |
| 38 from_index -= 1 # include " character in next catch |
| 39 |
| 40 elif match.group() == '/*' and not in_string and not in_multiline_commen
t and not in_singleline_comment: |
| 41 in_multiline_comment = True |
| 42 elif match.group() == '*/' and not in_string and in_multiline_comment an
d not in_singleline_comment: |
| 43 in_multiline_comment = False |
| 44 elif match.group() == '//' and not in_string and not in_multiline_commen
t and not in_singleline_comment: |
| 45 in_singleline_comment = True |
| 46 elif (match.group() == '\n' or match.group() == '\r') and not in_string
and not in_multiline_comment and in_singleline_comment: |
| 47 in_singleline_comment = False |
| 48 elif not in_multiline_comment and not in_singleline_comment and ( |
| 49 match.group() not in ['\n','\r',' ','\t'] or not strip_space): |
| 50 new_str.append(match.group()) |
| 51 |
| 52 new_str.append(json[from_index:]) |
| 53 return ''.join(new_str) |
| 54 |
| 55 if __name__ == '__main__': |
| 56 import json # requires Python 2.6+ to run tests |
| 57 |
| 58 def test_json(s): |
| 59 return json.loads(json_minify(s)) |
| 60 |
| 61 test1 = '''// this is a JSON file with comments |
| 62 { |
| 63 "foo": "bar", // this is cool |
| 64 "bar": [ |
| 65 "baz", "bum", "zam" |
| 66 ], |
| 67 /* the rest of this document is just fluff |
| 68 in case you are interested. */ |
| 69 "something": 10, |
| 70 "else": 20 |
| 71 } |
| 72 |
| 73 /* NOTE: You can easily strip the whitespace and comments |
| 74 from such a file with the JSON.minify() project hosted |
| 75 here on github at http://github.com/getify/JSON.minify |
| 76 */ |
| 77 ''' |
| 78 |
| 79 test1_res = '''{"foo":"bar","bar":["baz","bum","zam"],"something":10,"else":
20}''' |
| 80 |
| 81 test2 = ''' |
| 82 {"/*":"*/","//":"",/*"//"*/"/*/":// |
| 83 "//"} |
| 84 |
| 85 ''' |
| 86 test2_res = '''{"/*":"*/","//":"","/*/":"//"}''' |
| 87 |
| 88 test3 = r'''/* |
| 89 this is a |
| 90 multi line comment */{ |
| 91 |
| 92 "foo" |
| 93 : |
| 94 "bar/*"// something |
| 95 , "b\"az":/* |
| 96 something else */"blah" |
| 97 |
| 98 } |
| 99 ''' |
| 100 test3_res = r'''{"foo":"bar/*","b\"az":"blah"}''' |
| 101 |
| 102 test4 = r'''{"foo": "ba\"r//", "bar\\": "b\\\"a/*z", |
| 103 "baz\\\\": /* yay */ "fo\\\\\"*/o" |
| 104 } |
| 105 ''' |
| 106 test4_res = r'''{"foo":"ba\"r//","bar\\":"b\\\"a/*z","baz\\\\":"fo\\\\\"*/o"
}''' |
| 107 |
| 108 assert test_json(test1) == json.loads(test1_res),'Failed test 1' |
| 109 assert test_json(test2) == json.loads(test2_res),'Failed test 2' |
| 110 assert test_json(test3) == json.loads(test3_res),'Failed test 3' |
| 111 assert test_json(test4) == json.loads(test4_res),'Failed test 4' |
| 112 if __debug__: # Don't print passed message if the asserts didn't run |
| 113 print 'Passed all tests' |
OLD | NEW |