Source/wtf/text/TextEncoding.cpp - Issue 19845004: Do not normalize into NFC the values of form fields

Side by Side Diff: Source/wtf/text/TextEncoding.cpp

Issue 19845004: Do not normalize into NFC the values of form fields (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.	2 * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.

3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>	3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>

4 * Copyright (C) 2007-2009 Torch Mobile, Inc.	4 * Copyright (C) 2007-2009 Torch Mobile, Inc.

5 *	5 *

6 * Redistribution and use in source and binary forms, with or without	6 * Redistribution and use in source and binary forms, with or without

7 * modification, are permitted provided that the following conditions	7 * modification, are permitted provided that the following conditions

8 * are met:	8 * are met:

9 * 1. Redistributions of source code must retain the above copyright	9 * 1. Redistributions of source code must retain the above copyright

10 * notice, this list of conditions and the following disclaimer.	10 * notice, this list of conditions and the following disclaimer.

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
59 String TextEncoding::decode(const char* data, size_t length, bool stopOnError, b ool& sawError) const	59 String TextEncoding::decode(const char* data, size_t length, bool stopOnError, b ool& sawError) const

60 {	60 {

61 if (!m_name)	61 if (!m_name)

62 return String();	62 return String();

63	63

64 return newTextCodec(*this)->decode(data, length, true, stopOnError, sawError );	64 return newTextCodec(*this)->decode(data, length, true, stopOnError, sawError );

65 }	65 }

66	66

67 CString TextEncoding::encode(const String& string, UnencodableHandling handling) const	67 CString TextEncoding::encode(const String& string, UnencodableHandling handling) const

68 {	68 {

	69 return encode(string, NFCNormalization, handling);

	70 }

	71

	72 CString TextEncoding::encode(const String& string, NormalizationMode mode, Unenc odableHandling handling) const

	73 {

69 if (!m_name)	74 if (!m_name)

70 return CString();	75 return CString();

71	76

72 if (string.isEmpty())	77 if (string.isEmpty())

73 return "";	78 return "";

74	79

75 // Text exclusively containing Latin-1 characters (U+0000..U+00FF) is left	80 // Text exclusively containing Latin-1 characters (U+0000..U+00FF) is left

76 // unaffected by NFC. This is effectively the same as saying that all	81 // unaffected by NFC. This is effectively the same as saying that all

77 // Latin-1 text is already normalized to NFC.	82 // Latin-1 text is already normalized to NFC.

78 // Source: http://unicode.org/reports/tr15/	83 // Source: http://unicode.org/reports/tr15/

79 if (string.is8Bit())	84 if ((mode == NoNormalization \|\| mode == NFCNormalization)

	85 && string.is8Bit())

80 return newTextCodec(*this)->encode(string.characters8(), string.length() , handling);	86 return newTextCodec(*this)->encode(string.characters8(), string.length() , handling);

81	87

82 // FIXME: What's the right place to do normalization?

83 // It's a little strange to do it inside the encode function.

84 // Perhaps normalization should be an explicit step done before calling enco de.

85

86 const UChar* source = string.characters16();	88 const UChar* source = string.characters16();

87 size_t length = string.length();	89 size_t length = string.length();

88	90

89 Vector<UChar> normalizedCharacters;	91 Vector<UChar> normalizedCharacters;

90	92

	93 UNormalizationMode unormMode;

	94 switch (mode) {

	95 case NoNormalization:

	96 unormMode = UNORM_NONE;

	97 break;

	98 case NFCNormalization:

	99 unormMode = UNORM_NFC;

	100 break;

	101 }

	102

91 UErrorCode err = U_ZERO_ERROR;	103 UErrorCode err = U_ZERO_ERROR;

92 if (unorm_quickCheck(source, length, UNORM_NFC, &err) != UNORM_YES) {	104 if (mode != NoNormalization

93 // First try using the length of the original string, since normalizatio n to NFC rarely increases length.	105 && unorm_quickCheck(source, length, unormMode, &err) != UNORM_YES) {

	106 // First try using the length of the original string, since normalizatio n might not increase the length.

94 normalizedCharacters.grow(length);	107 normalizedCharacters.grow(length);

95 int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), length, &err);	108 int32_t normalizedLength = unorm_normalize(source, length, unormMode, 0, normalizedCharacters.data(), length, &err);

96 if (err == U_BUFFER_OVERFLOW_ERROR) {	109 if (err == U_BUFFER_OVERFLOW_ERROR) {

97 err = U_ZERO_ERROR;	110 err = U_ZERO_ERROR;

98 normalizedCharacters.resize(normalizedLength);	111 normalizedCharacters.resize(normalizedLength);

99 normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, nor malizedCharacters.data(), normalizedLength, &err);	112 normalizedLength = unorm_normalize(source, length, unormMode, 0, nor malizedCharacters.data(), normalizedLength, &err);

100 }	113 }

101 ASSERT(U_SUCCESS(err));	114 ASSERT(U_SUCCESS(err));

102	115

103 source = normalizedCharacters.data();	116 source = normalizedCharacters.data();

104 length = normalizedLength;	117 length = normalizedLength;

105 }	118 }

106	119

107 return newTextCodec(*this)->encode(source, length, handling);	120 return newTextCodec(*this)->encode(source, length, handling);

108 }	121 }

109	122

(...skipping 117 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
227 return globalUTF8Encoding;	240 return globalUTF8Encoding;

228 }	241 }

229	242

230 const TextEncoding& WindowsLatin1Encoding()	243 const TextEncoding& WindowsLatin1Encoding()

231 {	244 {

232 static TextEncoding globalWindowsLatin1Encoding("WinLatin1");	245 static TextEncoding globalWindowsLatin1Encoding("WinLatin1");

233 return globalWindowsLatin1Encoding;	246 return globalWindowsLatin1Encoding;

234 }	247 }

235	248

236 } // namespace WTF	249 } // namespace WTF

OLD	NEW

« Source/wtf/text/TextEncoding.h ('K') | « Source/wtf/text/TextEncoding.h ('k') | no next file » | no next file with comments »