third_party/WebKit/Source/platform/text/DecodeEscapeSequences.h - Issue 2385283002: reflow comments in platform/{testing,text}

Side by Side Diff: third_party/WebKit/Source/platform/text/DecodeEscapeSequences.h

Issue 2385283002: reflow comments in platform/{testing,text} (Closed)

Patch Set: idunnolol Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2011 Daniel Bates (dbates@intudata.com). All Rights Reserved.	2 * Copyright (C) 2011 Daniel Bates (dbates@intudata.com). All Rights Reserved.

3 * Copyright (c) 2012 Google, inc. All Rights Reserved.	3 * Copyright (c) 2012 Google, inc. All Rights Reserved.

4 *	4 *

5 * Redistribution and use in source and binary forms, with or without	5 * Redistribution and use in source and binary forms, with or without

6 * modification, are permitted provided that the following conditions	6 * modification, are permitted provided that the following conditions

7 * are met:	7 * are met:

8 * 1. Redistributions of source code must retain the above copyright	8 * 1. Redistributions of source code must retain the above copyright

9 * notice, this list of conditions and the following disclaimer.	9 * notice, this list of conditions and the following disclaimer.

10 * 2. Redistributions in binary form must reproduce the above copyright	10 * 2. Redistributions in binary form must reproduce the above copyright

(...skipping 20 matching lines...) Expand all Loading...
31 #define DecodeEscapeSequences_h	31 #define DecodeEscapeSequences_h

32	32

33 #include "wtf/ASCIICType.h"	33 #include "wtf/ASCIICType.h"

34 #include "wtf/Allocator.h"	34 #include "wtf/Allocator.h"

35 #include "wtf/Assertions.h"	35 #include "wtf/Assertions.h"

36 #include "wtf/text/StringBuilder.h"	36 #include "wtf/text/StringBuilder.h"

37 #include "wtf/text/TextEncoding.h"	37 #include "wtf/text/TextEncoding.h"

38	38

39 namespace blink {	39 namespace blink {

40	40

41 // See <http://en.wikipedia.org/wiki/Percent-encoding#Non-standard_implementatio ns>.	41 // See

	42 // <http://en.wikipedia.org/wiki/Percent-encoding#Non-standard_implementations>.

42 struct Unicode16BitEscapeSequence {	43 struct Unicode16BitEscapeSequence {

43 STATIC_ONLY(Unicode16BitEscapeSequence);	44 STATIC_ONLY(Unicode16BitEscapeSequence);

44 enum { sequenceSize = 6 }; // e.g. %u26C4	45 enum { sequenceSize = 6 }; // e.g. %u26C4

45 static size_t findInString(const String& string, size_t startPosition) {	46 static size_t findInString(const String& string, size_t startPosition) {

46 return string.find("%u", startPosition);	47 return string.find("%u", startPosition);

47 }	48 }

48 static size_t findEndOfRun(const String& string,	49 static size_t findEndOfRun(const String& string,

49 size_t startPosition,	50 size_t startPosition,

50 size_t endPosition) {	51 size_t endPosition) {

51 size_t runEnd = startPosition;	52 size_t runEnd = startPosition;

52 while (endPosition - runEnd >= sequenceSize && string[runEnd] == '%' &&	53 while (endPosition - runEnd >= sequenceSize && string[runEnd] == '%' &&

53 string[runEnd + 1] == 'u' && isASCIIHexDigit(string[runEnd + 2]) &&	54 string[runEnd + 1] == 'u' && isASCIIHexDigit(string[runEnd + 2]) &&

54 isASCIIHexDigit(string[runEnd + 3]) &&	55 isASCIIHexDigit(string[runEnd + 3]) &&

55 isASCIIHexDigit(string[runEnd + 4]) &&	56 isASCIIHexDigit(string[runEnd + 4]) &&

56 isASCIIHexDigit(string[runEnd + 5])) {	57 isASCIIHexDigit(string[runEnd + 5])) {

57 runEnd += sequenceSize;	58 runEnd += sequenceSize;

58 }	59 }

59 return runEnd;	60 return runEnd;

60 }	61 }

61	62

62 template <typename CharType>	63 template <typename CharType>

63 static String decodeRun(const CharType* run,	64 static String decodeRun(const CharType* run,

64 size_t runLength,	65 size_t runLength,

65 const WTF::TextEncoding&) {	66 const WTF::TextEncoding&) {

66 // Each %u-escape sequence represents a UTF-16 code unit.	67 // Each %u-escape sequence represents a UTF-16 code unit. See

67 // See <http://www.w3.org/International/iri-edit/draft-duerst-iri.html#ancho r29>.	68 // <http://www.w3.org/International/iri-edit/draft-duerst-iri.html#anchor29> .

68 // For 16-bit escape sequences, we know that findEndOfRun() has given us a c ontiguous run of sequences	69 // For 16-bit escape sequences, we know that findEndOfRun() has given us a

69 // without any intervening characters, so decode the run without additional checks.	70 // contiguous run of sequences without any intervening characters, so decode

	71 // the run without additional checks.

70 size_t numberOfSequences = runLength / sequenceSize;	72 size_t numberOfSequences = runLength / sequenceSize;

71 StringBuilder builder;	73 StringBuilder builder;

72 builder.reserveCapacity(numberOfSequences);	74 builder.reserveCapacity(numberOfSequences);

73 while (numberOfSequences--) {	75 while (numberOfSequences--) {

74 UChar codeUnit = (toASCIIHexValue(run[2]) << 12) \|	76 UChar codeUnit = (toASCIIHexValue(run[2]) << 12) \|

75 (toASCIIHexValue(run[3]) << 8) \|	77 (toASCIIHexValue(run[3]) << 8) \|

76 (toASCIIHexValue(run[4]) << 4) \| toASCIIHexValue(run[5]);	78 (toASCIIHexValue(run[4]) << 4) \| toASCIIHexValue(run[5]);

77 builder.append(codeUnit);	79 builder.append(codeUnit);

78 run += sequenceSize;	80 run += sequenceSize;

79 }	81 }

80 return builder.toString();	82 return builder.toString();

81 }	83 }

82 };	84 };

83	85

84 struct URLEscapeSequence {	86 struct URLEscapeSequence {

85 enum { sequenceSize = 3 }; // e.g. %41	87 enum { sequenceSize = 3 }; // e.g. %41

86 static size_t findInString(const String& string, size_t startPosition) {	88 static size_t findInString(const String& string, size_t startPosition) {

87 return string.find('%', startPosition);	89 return string.find('%', startPosition);

88 }	90 }

89 static size_t findEndOfRun(const String& string,	91 static size_t findEndOfRun(const String& string,

90 size_t startPosition,	92 size_t startPosition,

91 size_t endPosition) {	93 size_t endPosition) {

92 // Make the simplifying assumption that supported encodings may have up to t wo unescaped characters	94 // Make the simplifying assumption that supported encodings may have up to

93 // in the range 0x40 - 0x7F as the trailing bytes of their sequences which n eed to be passed into the	95 // two unescaped characters in the range 0x40 - 0x7F as the trailing bytes

94 // decoder as part of the run. In other words, we end the run at the first v alue outside of the	96 // of their sequences which need to be passed into the decoder as part of

95 // 0x40 - 0x7F range, after two values in this range, or at a %-sign that do es not introduce a valid	97 // the run. In other words, we end the run at the first value outside of the

96 // escape sequence.	98 // 0x40 - 0x7F range, after two values in this range, or at a %-sign that

	99 // does not introduce a valid escape sequence.

97 size_t runEnd = startPosition;	100 size_t runEnd = startPosition;

98 int numberOfTrailingCharacters = 0;	101 int numberOfTrailingCharacters = 0;

99 while (runEnd < endPosition) {	102 while (runEnd < endPosition) {

100 if (string[runEnd] == '%') {	103 if (string[runEnd] == '%') {

101 if (endPosition - runEnd >= sequenceSize &&	104 if (endPosition - runEnd >= sequenceSize &&

102 isASCIIHexDigit(string[runEnd + 1]) &&	105 isASCIIHexDigit(string[runEnd + 1]) &&

103 isASCIIHexDigit(string[runEnd + 2])) {	106 isASCIIHexDigit(string[runEnd + 2])) {

104 runEnd += sequenceSize;	107 runEnd += sequenceSize;

105 numberOfTrailingCharacters = 0;	108 numberOfTrailingCharacters = 0;

106 } else	109 } else

107 break;	110 break;

108 } else if (string[runEnd] >= 0x40 && string[runEnd] <= 0x7F &&	111 } else if (string[runEnd] >= 0x40 && string[runEnd] <= 0x7F &&

109 numberOfTrailingCharacters < 2) {	112 numberOfTrailingCharacters < 2) {

110 runEnd += 1;	113 runEnd += 1;

111 numberOfTrailingCharacters += 1;	114 numberOfTrailingCharacters += 1;

112 } else	115 } else

113 break;	116 break;

114 }	117 }

115 return runEnd;	118 return runEnd;

116 }	119 }

117	120

118 template <typename CharType>	121 template <typename CharType>

119 static String decodeRun(const CharType* run,	122 static String decodeRun(const CharType* run,

120 size_t runLength,	123 size_t runLength,

121 const WTF::TextEncoding& encoding) {	124 const WTF::TextEncoding& encoding) {

122 // For URL escape sequences, we know that findEndOfRun() has given us a run where every %-sign introduces	125 // For URL escape sequences, we know that findEndOfRun() has given us a run

123 // a valid escape sequence, but there may be characters between the sequence s.	126 // where every %-sign introduces a valid escape sequence, but there may be

	127 // characters between the sequences.

124 Vector<char, 512> buffer;	128 Vector<char, 512> buffer;

125 buffer.resize(	129 buffer.resize(

126 runLength); // Unescaping hex sequences only makes the length smaller.	130 runLength); // Unescaping hex sequences only makes the length smaller.

127 char* p = buffer.data();	131 char* p = buffer.data();

128 const CharType* runEnd = run + runLength;	132 const CharType* runEnd = run + runLength;

129 while (run < runEnd) {	133 while (run < runEnd) {

130 if (run[0] == '%') {	134 if (run[0] == '%') {

131 *p++ = (toASCIIHexValue(run[1]) << 4) \| toASCIIHexValue(run[2]);	135 *p++ = (toASCIIHexValue(run[1]) << 4) \| toASCIIHexValue(run[2]);

132 run += sequenceSize;	136 run += sequenceSize;

133 } else {	137 } else {

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
177 result.append(decoded);	181 result.append(decoded);

178 decodedPosition = encodedRunEnd;	182 decodedPosition = encodedRunEnd;

179 }	183 }

180 result.append(string, decodedPosition, length - decodedPosition);	184 result.append(string, decodedPosition, length - decodedPosition);

181 return result.toString();	185 return result.toString();

182 }	186 }

183	187

184 } // namespace blink	188 } // namespace blink

185	189

186 #endif // DecodeEscapeSequences_h	190 #endif // DecodeEscapeSequences_h

OLD	NEW