html/lib/src/encoding_parser.dart - Issue 1400473008: Roll Observatory packages and add a roll script

Side by Side Diff: html/lib/src/encoding_parser.dart

Issue 1400473008: Roll Observatory packages and add a roll script (Closed) Base URL: git@github.com:dart-lang/observatory_pub_packages.git@master

Patch Set: Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 library encoding_parser;

2

3 import 'constants.dart';

4 import 'inputstream.dart';

5

6 // TODO(jmesserly): I converted StopIteration to StateError("No more elements").

7 // Seems strange to throw this from outside of an iterator though.

8 /// String-like object with an associated position and various extra methods

9 /// If the position is ever greater than the string length then an exception is

10 /// raised.

11 class EncodingBytes {

12 final String _bytes;

13 int _position = -1;

14

15 EncodingBytes(this._bytes);

16

17 int get length => _bytes.length;

18

19 String next() {

20 var p = _position = _position + 1;

21 if (p >= length) {

22 throw new StateError("No more elements");

23 } else if (p < 0) {

24 throw new RangeError(p);

25 }

26 return _bytes[p];

27 }

28

29 String previous() {

30 var p = _position;

31 if (p >= length) {

32 throw new StateError("No more elements");

33 } else if (p < 0) {

34 throw new RangeError(p);

35 }

36 _position = p = p - 1;

37 return _bytes[p];

38 }

39

40 set position(int value) {

41 if (_position >= length) {

42 throw new StateError("No more elements");

43 }

44 _position = value;

45 }

46

47 int get position {

48 if (_position >= length) {

49 throw new StateError("No more elements");

50 }

51 if (_position >= 0) {

52 return _position;

53 } else {

54 return 0;

55 }

56 }

57

58 String get currentByte => _bytes[position];

59

60 /// Skip past a list of characters. Defaults to skipping [isWhitespace].

61 String skipChars([CharPreciate skipChars]) {

62 if (skipChars == null) skipChars = isWhitespace;

63 var p = position; // use property for the error-checking

64 while (p < length) {

65 var c = _bytes[p];

66 if (!skipChars(c)) {

67 _position = p;

68 return c;

69 }

70 p += 1;

71 }

72 _position = p;

73 return null;

74 }

75

76 String skipUntil(CharPreciate untilChars) {

77 var p = position;

78 while (p < length) {

79 var c = _bytes[p];

80 if (untilChars(c)) {

81 _position = p;

82 return c;

83 }

84 p += 1;

85 }

86 return null;

87 }

88

89 /// Look for a sequence of bytes at the start of a string. If the bytes

90 /// are found return true and advance the position to the byte after the

91 /// match. Otherwise return false and leave the position alone.

92 bool matchBytes(String bytes) {

93 var p = position;

94 if (_bytes.length < p + bytes.length) {

95 return false;

96 }

97 var data = _bytes.substring(p, p + bytes.length);

98 if (data == bytes) {

99 position += bytes.length;

100 return true;

101 }

102 return false;

103 }

104

105 /// Look for the next sequence of bytes matching a given sequence. If

106 /// a match is found advance the position to the last byte of the match

107 bool jumpTo(String bytes) {

108 var newPosition = _bytes.indexOf(bytes, position);

109 if (newPosition >= 0) {

110 _position = newPosition + bytes.length - 1;

111 return true;

112 } else {

113 throw new StateError("No more elements");

114 }

115 }

116

117 String slice(int start, [int end]) {

118 if (end == null) end = length;

119 if (end < 0) end += length;

120 return _bytes.substring(start, end - start);

121 }

122 }

123

124 /// Mini parser for detecting character encoding from meta elements.

125 class EncodingParser {

126 final EncodingBytes data;

127 String encoding;

128

129 /// [bytes] - the data to work on for encoding detection.

130 EncodingParser(List<int> bytes)

131 // Note: this is intentionally interpreting bytes as codepoints.

132 : data = new EncodingBytes(new String.fromCharCodes(bytes).toLowerCase());

133

134 String getEncoding() {

135 final methodDispatch = [

136 ["<!--", handleComment],

137 ["<meta", handleMeta],

138 ["</", handlePossibleEndTag],

139 ["<!", handleOther],

140 ["<?", handleOther],

141 ["<", handlePossibleStartTag]

142 ];

143

144 try {

145 for (;;) {

146 for (var dispatch in methodDispatch) {

147 if (data.matchBytes(dispatch[0])) {

148 var keepParsing = dispatch[1]();

149 if (keepParsing) break;

150

151 // We found an encoding. Stop.

152 return encoding;

153 }

154 }

155 data.position += 1;

156 }

157 } on StateError catch (e) {

158 // Catch this here to match behavior of Python's StopIteration

159 // TODO(jmesserly): refactor to not use exceptions

160 }

161 return encoding;

162 }

163

164 /// Skip over comments.

165 bool handleComment() => data.jumpTo("-->");

166

167 bool handleMeta() {

168 if (!isWhitespace(data.currentByte)) {

169 // if we have <meta not followed by a space so just keep going

170 return true;

171 }

172 // We have a valid meta element we want to search for attributes

173 while (true) {

174 // Try to find the next attribute after the current position

175 var attr = getAttribute();

176 if (attr == null) return true;

177

178 if (attr[0] == "charset") {

179 var tentativeEncoding = attr[1];

180 var codec = codecName(tentativeEncoding);

181 if (codec != null) {

182 encoding = codec;

183 return false;

184 }

185 } else if (attr[0] == "content") {

186 var contentParser = new ContentAttrParser(new EncodingBytes(attr[1]));

187 var tentativeEncoding = contentParser.parse();

188 var codec = codecName(tentativeEncoding);

189 if (codec != null) {

190 encoding = codec;

191 return false;

192 }

193 }

194 }

195 return true; // unreachable

196 }

197

198 bool handlePossibleStartTag() => handlePossibleTag(false);

199

200 bool handlePossibleEndTag() {

201 data.next();

202 return handlePossibleTag(true);

203 }

204

205 bool handlePossibleTag(bool endTag) {

206 if (!isLetter(data.currentByte)) {

207 //If the next byte is not an ascii letter either ignore this

208 //fragment (possible start tag case) or treat it according to

209 //handleOther

210 if (endTag) {

211 data.previous();

212 handleOther();

213 }

214 return true;

215 }

216

217 var c = data.skipUntil(isSpaceOrAngleBracket);

218 if (c == "<") {

219 // return to the first step in the overall "two step" algorithm

220 // reprocessing the < byte

221 data.previous();

222 } else {

223 //Read all attributes

224 var attr = getAttribute();

225 while (attr != null) {

226 attr = getAttribute();

227 }

228 }

229 return true;

230 }

231

232 bool handleOther() => data.jumpTo(">");

233

234 /// Return a name,value pair for the next attribute in the stream,

235 /// if one is found, or null

236 List<String> getAttribute() {

237 // Step 1 (skip chars)

238 var c = data.skipChars((x) => x == "/" \|\| isWhitespace(x));

239 // Step 2

240 if (c == ">" \|\| c == null) {

241 return null;

242 }

243 // Step 3

244 var attrName = [];

245 var attrValue = [];

246 // Step 4 attribute name

247 while (true) {

248 if (c == null) {

249 return null;

250 } else if (c == "=" && attrName.length > 0) {

251 break;

252 } else if (isWhitespace(c)) {

253 // Step 6!

254 c = data.skipChars();

255 c = data.next();

256 break;

257 } else if (c == "/" \|\| c == ">") {

258 return [attrName.join(), ""];

259 } else if (isLetter(c)) {

260 attrName.add(c.toLowerCase());

261 } else {

262 attrName.add(c);

263 }

264 // Step 5

265 c = data.next();

266 }

267 // Step 7

268 if (c != "=") {

269 data.previous();

270 return [attrName.join(), ""];

271 }

272 // Step 8

273 data.next();

274 // Step 9

275 c = data.skipChars();

276 // Step 10

277 if (c == "'" \|\| c == '"') {

278 // 10.1

279 var quoteChar = c;

280 while (true) {

281 // 10.2

282 c = data.next();

283 if (c == quoteChar) {

284 // 10.3

285 data.next();

286 return [attrName.join(), attrValue.join()];

287 } else if (isLetter(c)) {

288 // 10.4

289 attrValue.add(c.toLowerCase());

290 } else {

291 // 10.5

292 attrValue.add(c);

293 }

294 }

295 } else if (c == ">") {

296 return [attrName.join(), ""];

297 } else if (c == null) {

298 return null;

299 } else if (isLetter(c)) {

300 attrValue.add(c.toLowerCase());

301 } else {

302 attrValue.add(c);

303 }

304 // Step 11

305 while (true) {

306 c = data.next();

307 if (isSpaceOrAngleBracket(c)) {

308 return [attrName.join(), attrValue.join()];

309 } else if (c == null) {

310 return null;

311 } else if (isLetter(c)) {

312 attrValue.add(c.toLowerCase());

313 } else {

314 attrValue.add(c);

315 }

316 }

317 return null; // unreachable

318 }

319 }

320

321 class ContentAttrParser {

322 final EncodingBytes data;

323

324 ContentAttrParser(this.data);

325

326 String parse() {

327 try {

328 // Check if the attr name is charset

329 // otherwise return

330 data.jumpTo("charset");

331 data.position += 1;

332 data.skipChars();

333 if (data.currentByte != "=") {

334 // If there is no = sign keep looking for attrs

335 return null;

336 }

337 data.position += 1;

338 data.skipChars();

339 // Look for an encoding between matching quote marks

340 if (data.currentByte == '"' \|\| data.currentByte == "'") {

341 var quoteMark = data.currentByte;

342 data.position += 1;

343 var oldPosition = data.position;

344 if (data.jumpTo(quoteMark)) {

345 return data.slice(oldPosition, data.position);

346 } else {

347 return null;

348 }

349 } else {

350 // Unquoted value

351 var oldPosition = data.position;

352 try {

353 data.skipUntil(isWhitespace);

354 return data.slice(oldPosition, data.position);

355 } on StateError catch (e) {

356 //Return the whole remaining value

357 return data.slice(oldPosition);

358 }

359 }

360 } on StateError catch (e) {

361 return null;

362 }

363 }

364 }

365

366 bool isSpaceOrAngleBracket(String char) {

367 return char == ">" \|\| char == "<" \|\| isWhitespace(char);

368 }

369

370 typedef bool CharPreciate(String char);

OLD	NEW

« no previous file with comments | « html/lib/src/css_class_set.dart ('k') | html/lib/src/inputstream.dart » ('j') | no next file with comments »