| OLD | NEW |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // This command-line program converts an effective-TLD data file in UTF-8 from | 5 // This command-line program converts an effective-TLD data file in UTF-8 from |
| 6 // the format provided by Mozilla to the format expected by Chrome. Any errors | 6 // the format provided by Mozilla to the format expected by Chrome. Any errors |
| 7 // or warnings are recorded in tld_cleanup.log. | 7 // or warnings are recorded in tld_cleanup.log. |
| 8 // | 8 // |
| 9 // In particular, it | 9 // In particular, it |
| 10 // * Strips blank lines and comments, as well as notes for individual rules. | 10 // * Strips blank lines and comments, as well as notes for individual rules. |
| 11 // * Changes all line endings to LF. | 11 // * Changes all line endings to LF. |
| 12 // * Strips a single leading and/or trailing dot from each rule, if present. | 12 // * Strips a single leading and/or trailing dot from each rule, if present. |
| 13 // * Logs a warning if a rule contains '!' or '*.' other than at the beginning | 13 // * Logs a warning if a rule contains '!' or '*.' other than at the beginning |
| 14 // of the rule. (This also catches multiple ! or *. at the start of a rule.) | 14 // of the rule. (This also catches multiple ! or *. at the start of a rule.) |
| 15 // * Logs a warning if GURL reports a rule as invalid, but keeps the rule. | 15 // * Logs a warning if GURL reports a rule as invalid, but keeps the rule. |
| 16 // * Canonicalizes each rule's domain by converting it to a GURL and back. | 16 // * Canonicalizes each rule's domain by converting it to a GURL and back. |
| 17 // * Adds explicit rules for true TLDs found in any rule. | 17 // * Adds explicit rules for true TLDs found in any rule. |
| 18 | 18 |
| 19 #include <windows.h> | 19 #include <windows.h> |
| 20 #include <set> | 20 #include <set> |
| 21 #include <string> | 21 #include <string> |
| 22 | 22 |
| 23 #include "base/at_exit.h" | 23 #include "base/at_exit.h" |
| 24 #include "base/file_util.h" | 24 #include "base/file_util.h" |
| 25 #include "base/icu_util.h" | 25 #include "base/icu_util.h" |
| 26 #include "base/logging.h" | 26 #include "base/logging.h" |
| 27 #include "base/path_service.h" | 27 #include "base/path_service.h" |
| 28 #include "base/process_util.h" |
| 28 #include "base/string_util.h" | 29 #include "base/string_util.h" |
| 29 #include "googleurl/src/gurl.h" | 30 #include "googleurl/src/gurl.h" |
| 30 #include "googleurl/src/url_parse.h" | 31 #include "googleurl/src/url_parse.h" |
| 31 | 32 |
| 32 static const wchar_t* const kLogFileName = L"tld_cleanup.log"; | 33 static const wchar_t* const kLogFileName = L"tld_cleanup.log"; |
| 33 typedef std::set<std::string> StringSet; | 34 typedef std::set<std::string> StringSet; |
| 34 | 35 |
| 35 // Writes the list of domain rules contained in the 'rules' set to the | 36 // Writes the list of domain rules contained in the 'rules' set to the |
| 36 // 'outfile', with each rule terminated by a LF. The file must already have | 37 // 'outfile', with each rule terminated by a LF. The file must already have |
| 37 // been created with write access. | 38 // been created with write access. |
| (...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 198 | 199 |
| 199 if (!WriteRules(rules, outfile)) { | 200 if (!WriteRules(rules, outfile)) { |
| 200 LOG(ERROR) << "Error(s) writing " << out_filename; | 201 LOG(ERROR) << "Error(s) writing " << out_filename; |
| 201 result = kError; | 202 result = kError; |
| 202 } | 203 } |
| 203 | 204 |
| 204 return result; | 205 return result; |
| 205 } | 206 } |
| 206 | 207 |
| 207 int main(int argc, const char* argv[]) { | 208 int main(int argc, const char* argv[]) { |
| 209 process_util::EnableTerminationOnHeapCorruption(); |
| 208 if (argc != 3) { | 210 if (argc != 3) { |
| 209 fprintf(stderr, "Normalizes and verifies UTF-8 TLD data files\n"); | 211 fprintf(stderr, "Normalizes and verifies UTF-8 TLD data files\n"); |
| 210 fprintf(stderr, "Usage: %s <input> <output>\n", argv[0]); | 212 fprintf(stderr, "Usage: %s <input> <output>\n", argv[0]); |
| 211 return 1; | 213 return 1; |
| 212 } | 214 } |
| 213 | 215 |
| 214 // Manages the destruction of singletons. | 216 // Manages the destruction of singletons. |
| 215 base::AtExitManager exit_manager; | 217 base::AtExitManager exit_manager; |
| 216 | 218 |
| 217 // Only use OutputDebugString in debug mode. | 219 // Only use OutputDebugString in debug mode. |
| (...skipping 19 matching lines...) Expand all Loading... |
| 237 if (result != kSuccess) { | 239 if (result != kSuccess) { |
| 238 fwprintf(stderr, L"Errors or warnings processing file. See log in %ls.", | 240 fwprintf(stderr, L"Errors or warnings processing file. See log in %ls.", |
| 239 kLogFileName); | 241 kLogFileName); |
| 240 } | 242 } |
| 241 | 243 |
| 242 if (result == kError) | 244 if (result == kError) |
| 243 return 1; | 245 return 1; |
| 244 return 0; | 246 return 0; |
| 245 } | 247 } |
| 246 | 248 |
| OLD | NEW |