Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(682)

Unified Diff: sky/engine/core/html/parser/create-html-entity-table

Issue 680173002: Remove HTML entity crazy (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « sky/engine/core/html/parser/HTMLEntityTable.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: sky/engine/core/html/parser/create-html-entity-table
diff --git a/sky/engine/core/html/parser/create-html-entity-table b/sky/engine/core/html/parser/create-html-entity-table
deleted file mode 100755
index 9812f0b1e93561d80e0086e1983c8e07ab9735cf..0000000000000000000000000000000000000000
--- a/sky/engine/core/html/parser/create-html-entity-table
+++ /dev/null
@@ -1,242 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2010 Google Inc. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-# * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""This python script creates the raw data that is our entity
-database. The representation is one string database containing all
-strings we could need, and then a mapping from offset+length -> entity
-data. That is compact, easy to use and efficient."""
-
-import csv
-import os.path
-import string
-import sys
-
-ENTITY = 0
-VALUE = 1
-
-def convert_value_to_int(value):
- if not value:
- return "0";
- assert(value[0] == "U")
- assert(value[1] == "+")
- return "0x" + value[2:]
-
-
-def offset_table_entry(offset):
- return " &staticEntityTable[%s]," % offset
-
-
-program_name = os.path.basename(__file__)
-if len(sys.argv) < 4 or sys.argv[1] != "-o":
- # Python 3, change to: print("Usage: %s -o OUTPUT_FILE INPUT_FILE" % program_name, file=sys.stderr)
- sys.stderr.write("Usage: %s -o OUTPUT_FILE INPUT_FILE\n" % program_name)
- exit(1)
-
-output_path = sys.argv[2]
-input_path = sys.argv[3]
-
-with open(input_path) as html_entity_names_file:
- entries = list(csv.reader(html_entity_names_file))
-
-entries.sort(key = lambda entry: entry[ENTITY])
-entity_count = len(entries)
-
-output_file = open(output_path, "w")
-
-output_file.write("""/*
- * Copyright (C) 2010 Google, Inc. All Rights Reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// THIS FILE IS GENERATED BY core/html/parser/create-html-entity-table
-// DO NOT EDIT (unless you are a ninja)!
-
-#include "config.h"
-#include "core/html/parser/HTMLEntityTable.h"
-
-namespace blink {
-
-namespace {
-""")
-
-assert len(entries) > 0, "Code assumes a non-empty entity array."
-def check_ascii(entity_string):
- for c in entity_string:
- code = ord(c)
- assert 0 <= code <= 127, (c + " is not ASCII. Need to change type " +
- "of storage from LChar to UChar to support " +
- "this entity.")
-
-output_file.write("static const LChar staticEntityStringStorage[] = {\n")
-output_file.write("'")
-all_data = ""
-entity_offset = 0
-first_output = True
-saved_by_reusing = 0
-for entry in entries:
- check_ascii(entry[ENTITY])
- # Reuse substrings from earlier entries. This saves 1-2000
- # characters, but it's O(n^2) and not very smart. The optimal
- # solution has to solve the "Shortest Common Superstring" problem
- # and that is NP-Complete or worse.
- #
- # This would be even more efficient if we didn't store the
- # semi-colon in the array but as a bit in the entry.
- entity = entry[ENTITY]
- already_existing_offset = all_data.find(entity)
- if already_existing_offset != -1:
- # Reusing space.
- this_offset = already_existing_offset
- saved_by_reusing += len(entity)
- else:
- if not first_output:
- output_file.write(",\n'")
- first_output = False
-
- # Try the end of the string and see if we can reuse that to
- # fit the start of the new entity.
- data_to_add = entity
- this_offset = entity_offset
- for truncated_len in range(len(entity) - 1, 0, -1):
- if all_data.endswith(entity[:truncated_len]):
- data_to_add = entity[truncated_len:]
- this_offset = entity_offset - truncated_len
- saved_by_reusing += truncated_len
- break
-
- output_file.write("', '".join(data_to_add))
- all_data += data_to_add
- output_file.write("'")
- entity_offset += len(data_to_add)
- assert len(entry) == 2, "We will use slot [2] in the list for the offset."
- assert this_offset < 32768 # Stored in a 16 bit short.
- entry.append(this_offset)
-
-output_file.write("};\n")
-
-index = {}
-for offset, entry in enumerate(entries):
- starting_letter = entry[ENTITY][0]
- if starting_letter not in index:
- index[starting_letter] = offset
-
-output_file.write("""
-static const HTMLEntityTableEntry staticEntityTable[%s] = {\n""" % entity_count)
-
-for entry in entries:
- values = entry[VALUE].split(' ')
- assert len(values) <= 2, values
- output_file.write(' { %s, %s, %s, %s }, // &%s\n' % (
- convert_value_to_int(values[0]),
- convert_value_to_int(values[1] if len(values) >= 2 else ""),
- entry[2],
- len(entry[ENTITY]),
- entry[ENTITY],
- ))
-
-output_file.write("""};
-
-""")
-
-output_file.write("""
-}
-""")
-
-output_file.write("static const short uppercaseOffset[] = {\n")
-for letter in string.ascii_uppercase:
- output_file.write("%d,\n" % index[letter])
-output_file.write("%d\n" % index['a'])
-output_file.write("""};
-
-static const short lowercaseOffset[] = {\n""")
-for letter in string.ascii_lowercase:
- output_file.write("%d,\n" % index[letter])
-output_file.write("%d\n" % entity_count)
-output_file.write("""};
-
-const LChar* HTMLEntityTable::entityString(const HTMLEntityTableEntry& entry)
-{
- return staticEntityStringStorage + entry.entityOffset;
-}
-
-LChar HTMLEntityTableEntry::lastCharacter() const
-{
- return HTMLEntityTable::entityString(*this)[length - 1];
-}
-
-const HTMLEntityTableEntry* HTMLEntityTable::firstEntryStartingWith(UChar c)
-{
- if (c >= 'A' && c <= 'Z')
- return &staticEntityTable[uppercaseOffset[c - 'A']];
- if (c >= 'a' && c <= 'z')
- return &staticEntityTable[lowercaseOffset[c - 'a']];
- return 0;
-}
-
-const HTMLEntityTableEntry* HTMLEntityTable::lastEntryStartingWith(UChar c)
-{
- if (c >= 'A' && c <= 'Z')
- return &staticEntityTable[uppercaseOffset[c - 'A' + 1]] - 1;
- if (c >= 'a' && c <= 'z')
- return &staticEntityTable[lowercaseOffset[c - 'a' + 1]] - 1;
- return 0;
-}
-
-const HTMLEntityTableEntry* HTMLEntityTable::firstEntry()
-{
- return &staticEntityTable[0];
-}
-
-const HTMLEntityTableEntry* HTMLEntityTable::lastEntry()
-{
- return &staticEntityTable[%s - 1];
-}
-
-}
-""" % entity_count)
« no previous file with comments | « sky/engine/core/html/parser/HTMLEntityTable.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698