Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(222)

Side by Side Diff: tools/security/idn_test_case_generator.py

Issue 2880983002: Added a Python script in tools/ to generate IDN test cases. (Closed)
Patch Set: Minor refactor. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/url_formatter/url_formatter_unittest.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python2
2 # Copyright 2017 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Utilities for generating IDN test cases.
7
8 Either use the command-line interface (see --help) or directly call make_case
9 from Python shell (see make_case documentation).
10 """
11
12 import argparse
13 import codecs
14 import doctest
15 import sys
16
17
18 def str_to_c_string(string):
19 """Converts a Python str (ASCII) to a C string literal.
20
21 >>> str_to_c_string('abc\x8c')
22 '"abc\\\\x8c"'
23 """
24 return repr(string).replace("'", '"')
25
26
27 def ishexdigit(c):
28 """
29 >>> ishexdigit('0')
30 True
31 >>> ishexdigit('9')
32 True
33 >>> ishexdigit('/')
34 False
35 >>> ishexdigit(':')
36 False
37 >>> ishexdigit('a')
38 True
39 >>> ishexdigit('f')
40 True
41 >>> ishexdigit('g')
42 False
43 >>> ishexdigit('A')
44 True
45 >>> ishexdigit('F')
46 True
47 >>> ishexdigit('G')
48 False
49 """
50 return c.isdigit() or ord('a') <= ord(c.lower()) <= ord('f')
51
52
53 def unicode_to_c_wstring(string):
54 """Converts a Python str or unicode to a C wide-string literal.
55
56 >>> unicode_to_c_wstring(u'b\u00fccher.de')
57 'L"b\\\\x00fc" L"cher.de"'
58 """
59 result = ['L"']
60 for c in string:
61 # If the previous character was \x-escaped, and the next character is a
62 # hex digit, we need to end and restart the string literal. Otherwise,
63 # the next character will extend the \x escape sequence.
64 if result[-1].startswith('\\x') and ishexdigit(c):
65 result.append('" L"')
66 escaped = repr(c)[2:-1]
67 # Convert '\u' to '\x', and also force a minimum of 4 digits (this isn't
68 # necessary but is preferred style for these test cases).
69 if escaped[:2] in ('\\x', '\\u'):
70 escaped = '\\x%04x' % ord(c)
71 result.append(escaped)
72 result.append('"')
73 return ''.join(result)
74
75
76 def make_case(unicode_domain, unicode_allowed=True, case_name=None):
77 """Generates a C++ test case for an IDN domain test.
78
79 This is designed specifically for the IDNTestCase struct in the file
80 components/url_formatter/url_formatter_unittest.cc. It generates a row of
81 the idn_cases array, specifying a test for a particular domain.
82
83 |unicode_domain| is a Unicode string of the domain (NOT IDNA-encoded).
84 |unicode_allowed| specifies whether the test case should expect the domain
85 to be displayed in Unicode form (True) or in IDNA/Punycode ASCII encoding
86 (False). |case_name| is just for the comment.
87
88 This function will automatically convert the domain to its IDNA format, and
89 prepare the test case in C++ syntax.
90
91 >>> make_case(u'\u5317\u4eac\u5927\u5b78.cn', True, 'Hanzi (Chinese)')
92 // Hanzi (Chinese)
93 {"xn--1lq90ic7f1rc.cn", L"\\x5317\\x4eac\\x5927\\x5b78.cn", true},
94 >>> make_case(u'b\u00fccher.de', True)
95 {"xn--bcher-kva.de", L"b\\x00fc" L"cher.de", true},
96
97 This will also apply normalization to the Unicode domain, as required by the
98 IDNA algorithm. This example shows U+210F normalized to U+0127 (this
99 generates the exact same test case as u'\u0127ello'):
100
101 >>> make_case(u'\u210fello', True)
102 {"xn--ello-4xa", L"\\x0127" L"ello", true},
103 """
104 idna_input = codecs.encode(unicode_domain, 'idna')
105 # Round-trip to ensure normalization.
106 unicode_output = codecs.decode(idna_input, 'idna')
107 if case_name:
108 print(' // %s' % case_name)
109 print(' {%s, %s, %s},' %
110 (str_to_c_string(idna_input), unicode_to_c_wstring(unicode_output),
111 repr(unicode_allowed).lower()))
112
113
114 def main(args=None):
115 if args is None:
116 args = sys.argv[1:]
117
118 parser = argparse.ArgumentParser(description='Generate an IDN test case.')
119 parser.add_argument('domain', metavar='DOMAIN', nargs='?',
120 help='the Unicode domain (not encoded)')
121 parser.add_argument('--name', metavar='NAME',
122 help='the name of the test case')
123 parser.add_argument('--no-unicode', action='store_false',
124 dest='unicode_allowed', default=True,
125 help='expect the domain to be Punycoded')
126 parser.add_argument('--test', action='store_true', dest='run_tests',
127 help='run unit tests')
128
129 args = parser.parse_args(args)
130
131 if args.run_tests:
132 import doctest
133 doctest.testmod()
134 return
135
136 if not args.domain:
137 parser.error('Required argument: DOMAIN')
138
139 # Assume stdin.encoding is the encoding used for command-line arguments.
140 domain = args.domain.decode(sys.stdin.encoding)
141 make_case(domain, unicode_allowed=args.unicode_allowed, case_name=args.name)
142
143
144 if __name__ == '__main__':
145 sys.exit(main())
OLDNEW
« no previous file with comments | « components/url_formatter/url_formatter_unittest.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698