| OLD | NEW |
| (Empty) | |
| 1 #!/usr/bin/env python2 |
| 2 # Copyright 2017 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 """Utilities for generating IDN test cases. |
| 7 |
| 8 Either use the command-line interface (see --help) or directly call make_case |
| 9 from Python shell (see make_case documentation). |
| 10 """ |
| 11 |
| 12 import argparse |
| 13 import codecs |
| 14 import doctest |
| 15 import sys |
| 16 |
| 17 |
| 18 def str_to_c_string(string): |
| 19 """Converts a Python str (ASCII) to a C string literal. |
| 20 |
| 21 >>> str_to_c_string('abc\x8c') |
| 22 '"abc\\\\x8c"' |
| 23 """ |
| 24 return repr(string).replace("'", '"') |
| 25 |
| 26 |
| 27 def ishexdigit(c): |
| 28 """ |
| 29 >>> ishexdigit('0') |
| 30 True |
| 31 >>> ishexdigit('9') |
| 32 True |
| 33 >>> ishexdigit('/') |
| 34 False |
| 35 >>> ishexdigit(':') |
| 36 False |
| 37 >>> ishexdigit('a') |
| 38 True |
| 39 >>> ishexdigit('f') |
| 40 True |
| 41 >>> ishexdigit('g') |
| 42 False |
| 43 >>> ishexdigit('A') |
| 44 True |
| 45 >>> ishexdigit('F') |
| 46 True |
| 47 >>> ishexdigit('G') |
| 48 False |
| 49 """ |
| 50 return c.isdigit() or ord('a') <= ord(c.lower()) <= ord('f') |
| 51 |
| 52 |
| 53 def unicode_to_c_wstring(string): |
| 54 """Converts a Python str or unicode to a C wide-string literal. |
| 55 |
| 56 >>> unicode_to_c_wstring(u'b\u00fccher.de') |
| 57 'L"b\\\\x00fc" L"cher.de"' |
| 58 """ |
| 59 result = ['L"'] |
| 60 for c in string: |
| 61 # If the previous character was \x-escaped, and the next character is a |
| 62 # hex digit, we need to end and restart the string literal. Otherwise, |
| 63 # the next character will extend the \x escape sequence. |
| 64 if result[-1].startswith('\\x') and ishexdigit(c): |
| 65 result.append('" L"') |
| 66 escaped = repr(c)[2:-1] |
| 67 # Convert '\u' to '\x', and also force a minimum of 4 digits (this isn't |
| 68 # necessary but is preferred style for these test cases). |
| 69 if escaped[:2] in ('\\x', '\\u'): |
| 70 escaped = '\\x%04x' % ord(c) |
| 71 result.append(escaped) |
| 72 result.append('"') |
| 73 return ''.join(result) |
| 74 |
| 75 |
| 76 def make_case(unicode_domain, unicode_allowed=True, case_name=None): |
| 77 """Generates a C++ test case for an IDN domain test. |
| 78 |
| 79 This is designed specifically for the IDNTestCase struct in the file |
| 80 components/url_formatter/url_formatter_unittest.cc. It generates a row of |
| 81 the idn_cases array, specifying a test for a particular domain. |
| 82 |
| 83 |unicode_domain| is a Unicode string of the domain (NOT IDNA-encoded). |
| 84 |unicode_allowed| specifies whether the test case should expect the domain |
| 85 to be displayed in Unicode form (True) or in IDNA/Punycode ASCII encoding |
| 86 (False). |case_name| is just for the comment. |
| 87 |
| 88 This function will automatically convert the domain to its IDNA format, and |
| 89 prepare the test case in C++ syntax. |
| 90 |
| 91 >>> make_case(u'\u5317\u4eac\u5927\u5b78.cn', True, 'Hanzi (Chinese)') |
| 92 // Hanzi (Chinese) |
| 93 {"xn--1lq90ic7f1rc.cn", L"\\x5317\\x4eac\\x5927\\x5b78.cn", true}, |
| 94 >>> make_case(u'b\u00fccher.de', True) |
| 95 {"xn--bcher-kva.de", L"b\\x00fc" L"cher.de", true}, |
| 96 |
| 97 This will also apply normalization to the Unicode domain, as required by the |
| 98 IDNA algorithm. This example shows U+210F normalized to U+0127 (this |
| 99 generates the exact same test case as u'\u0127ello'): |
| 100 |
| 101 >>> make_case(u'\u210fello', True) |
| 102 {"xn--ello-4xa", L"\\x0127" L"ello", true}, |
| 103 """ |
| 104 idna_input = codecs.encode(unicode_domain, 'idna') |
| 105 # Round-trip to ensure normalization. |
| 106 unicode_output = codecs.decode(idna_input, 'idna') |
| 107 if case_name: |
| 108 print(' // %s' % case_name) |
| 109 print(' {%s, %s, %s},' % |
| 110 (str_to_c_string(idna_input), unicode_to_c_wstring(unicode_output), |
| 111 repr(unicode_allowed).lower())) |
| 112 |
| 113 |
| 114 def main(args=None): |
| 115 if args is None: |
| 116 args = sys.argv[1:] |
| 117 |
| 118 parser = argparse.ArgumentParser(description='Generate an IDN test case.') |
| 119 parser.add_argument('domain', metavar='DOMAIN', nargs='?', |
| 120 help='the Unicode domain (not encoded)') |
| 121 parser.add_argument('--name', metavar='NAME', |
| 122 help='the name of the test case') |
| 123 parser.add_argument('--no-unicode', action='store_false', |
| 124 dest='unicode_allowed', default=True, |
| 125 help='expect the domain to be Punycoded') |
| 126 parser.add_argument('--test', action='store_true', dest='run_tests', |
| 127 help='run unit tests') |
| 128 |
| 129 args = parser.parse_args(args) |
| 130 |
| 131 if args.run_tests: |
| 132 import doctest |
| 133 doctest.testmod() |
| 134 return |
| 135 |
| 136 if not args.domain: |
| 137 parser.error('Required argument: DOMAIN') |
| 138 |
| 139 # Assume stdin.encoding is the encoding used for command-line arguments. |
| 140 domain = args.domain.decode(sys.stdin.encoding) |
| 141 make_case(domain, unicode_allowed=args.unicode_allowed, case_name=args.name) |
| 142 |
| 143 |
| 144 if __name__ == '__main__': |
| 145 sys.exit(main()) |
| OLD | NEW |