tools/security/idn_test_case_generator.py - Issue 2880983002: Added a Python script in tools/ to generate IDN test cases.

Side by Side Diff: tools/security/idn_test_case_generator.py

Issue 2880983002: Added a Python script in tools/ to generate IDN test cases. (Closed)

Patch Set: Minor refactor. Created 3 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 #!/usr/bin/env python2

	2 # Copyright 2017 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 """Utilities for generating IDN test cases.

	7

	8 Either use the command-line interface (see --help) or directly call make_case

	9 from Python shell (see make_case documentation).

	10 """

	11

	12 import argparse

	13 import codecs

	14 import doctest

	15 import sys

	16

	17

	18 def str_to_c_string(string):

	19 """Converts a Python str (ASCII) to a C string literal.

	20

	21 >>> str_to_c_string('abc\x8c')

	22 '"abc\\\\x8c"'

	23 """

	24 return repr(string).replace("'", '"')

	25

	26

	27 def ishexdigit(c):

	28 """

	29 >>> ishexdigit('0')

	30 True

	31 >>> ishexdigit('9')

	32 True

	33 >>> ishexdigit('/')

	34 False

	35 >>> ishexdigit(':')

	36 False

	37 >>> ishexdigit('a')

	38 True

	39 >>> ishexdigit('f')

	40 True

	41 >>> ishexdigit('g')

	42 False

	43 >>> ishexdigit('A')

	44 True

	45 >>> ishexdigit('F')

	46 True

	47 >>> ishexdigit('G')

	48 False

	49 """

	50 return c.isdigit() or ord('a') <= ord(c.lower()) <= ord('f')

	51

	52

	53 def unicode_to_c_wstring(string):

	54 """Converts a Python str or unicode to a C wide-string literal.

	55

	56 >>> unicode_to_c_wstring(u'b\u00fccher.de')

	57 'L"b\\\\x00fc" L"cher.de"'

	58 """

	59 result = ['L"']

	60 for c in string:

	61 # If the previous character was \x-escaped, and the next character is a

	62 # hex digit, we need to end and restart the string literal. Otherwise,

	63 # the next character will extend the \x escape sequence.

	64 if result[-1].startswith('\\x') and ishexdigit(c):

	65 result.append('" L"')

	66 escaped = repr(c)[2:-1]

	67 # Convert '\u' to '\x', and also force a minimum of 4 digits (this isn't

	68 # necessary but is preferred style for these test cases).

	69 if escaped[:2] in ('\\x', '\\u'):

	70 escaped = '\\x%04x' % ord(c)

	71 result.append(escaped)

	72 result.append('"')

	73 return ''.join(result)

	74

	75

	76 def make_case(unicode_domain, unicode_allowed=True, case_name=None):

	77 """Generates a C++ test case for an IDN domain test.

	78

	79 This is designed specifically for the IDNTestCase struct in the file

	80 components/url_formatter/url_formatter_unittest.cc. It generates a row of

	81 the idn_cases array, specifying a test for a particular domain.

	82

	83 \|unicode_domain\| is a Unicode string of the domain (NOT IDNA-encoded).

	84 \|unicode_allowed\| specifies whether the test case should expect the domain

	85 to be displayed in Unicode form (True) or in IDNA/Punycode ASCII encoding

	86 (False). \|case_name\| is just for the comment.

	87

	88 This function will automatically convert the domain to its IDNA format, and

	89 prepare the test case in C++ syntax.

	90

	91 >>> make_case(u'\u5317\u4eac\u5927\u5b78.cn', True, 'Hanzi (Chinese)')

	92 // Hanzi (Chinese)

	93 {"xn--1lq90ic7f1rc.cn", L"\\x5317\\x4eac\\x5927\\x5b78.cn", true},

	94 >>> make_case(u'b\u00fccher.de', True)

	95 {"xn--bcher-kva.de", L"b\\x00fc" L"cher.de", true},

	96

	97 This will also apply normalization to the Unicode domain, as required by the

	98 IDNA algorithm. This example shows U+210F normalized to U+0127 (this

	99 generates the exact same test case as u'\u0127ello'):

	100

	101 >>> make_case(u'\u210fello', True)

	102 {"xn--ello-4xa", L"\\x0127" L"ello", true},

	103 """

	104 idna_input = codecs.encode(unicode_domain, 'idna')

	105 # Round-trip to ensure normalization.

	106 unicode_output = codecs.decode(idna_input, 'idna')

	107 if case_name:

	108 print(' // %s' % case_name)

	109 print(' {%s, %s, %s},' %

	110 (str_to_c_string(idna_input), unicode_to_c_wstring(unicode_output),

	111 repr(unicode_allowed).lower()))

	112

	113

	114 def main(args=None):

	115 if args is None:

	116 args = sys.argv[1:]

	117

	118 parser = argparse.ArgumentParser(description='Generate an IDN test case.')

	119 parser.add_argument('domain', metavar='DOMAIN', nargs='?',

	120 help='the Unicode domain (not encoded)')

	121 parser.add_argument('--name', metavar='NAME',

	122 help='the name of the test case')

	123 parser.add_argument('--no-unicode', action='store_false',

	124 dest='unicode_allowed', default=True,

	125 help='expect the domain to be Punycoded')

	126 parser.add_argument('--test', action='store_true', dest='run_tests',

	127 help='run unit tests')

	128

	129 args = parser.parse_args(args)

	130

	131 if args.run_tests:

	132 import doctest

	133 doctest.testmod()

	134 return

	135

	136 if not args.domain:

	137 parser.error('Required argument: DOMAIN')

	138

	139 # Assume stdin.encoding is the encoding used for command-line arguments.

	140 domain = args.domain.decode(sys.stdin.encoding)

	141 make_case(domain, unicode_allowed=args.unicode_allowed, case_name=args.name)

	142

	143

	144 if __name__ == '__main__':

	145 sys.exit(main())

OLD	NEW

« no previous file with comments | « components/url_formatter/url_formatter_unittest.cc ('k') | no next file » | no next file with comments »