OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. | 2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 15 matching lines...) Expand all Loading... | |
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 */ | 29 */ |
30 | 30 |
31 #include "platform/text/TextEncodingDetector.h" | 31 #include "platform/text/TextEncodingDetector.h" |
32 | 32 |
33 #include "wtf/text/TextEncoding.h" | 33 #include "wtf/text/TextEncoding.h" |
34 #include <unicode/ucnv.h> | 34 #include <unicode/ucnv.h> |
35 #include <unicode/ucsdet.h> | 35 #include <unicode/ucsdet.h> |
36 #include <unicode/utf8.h> | |
36 | 37 |
37 namespace blink { | 38 namespace blink { |
38 | 39 |
39 bool detectTextEncoding(const char* data, size_t length, | 40 bool detectTextEncoding(const char* data, size_t length, |
40 const char* hintEncodingName, WTF::TextEncoding* detectedEncoding) | 41 const char* hintEncodingName, WTF::TextEncoding* detectedEncoding) |
41 { | 42 { |
42 *detectedEncoding = WTF::TextEncoding(); | 43 *detectedEncoding = WTF::TextEncoding(); |
43 int matchesCount = 0; | 44 int matchesCount = 0; |
44 UErrorCode status = U_ZERO_ERROR; | 45 UErrorCode status = U_ZERO_ERROR; |
45 UCharsetDetector* detector = ucsdet_open(&status); | 46 UCharsetDetector* detector = ucsdet_open(&status); |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
105 encoding = ucsdet_getName(matches[0], &status); | 106 encoding = ucsdet_getName(matches[0], &status); |
106 if (U_SUCCESS(status)) { | 107 if (U_SUCCESS(status)) { |
107 *detectedEncoding = WTF::TextEncoding(encoding); | 108 *detectedEncoding = WTF::TextEncoding(encoding); |
108 ucsdet_close(detector); | 109 ucsdet_close(detector); |
109 return true; | 110 return true; |
110 } | 111 } |
111 ucsdet_close(detector); | 112 ucsdet_close(detector); |
112 return false; | 113 return false; |
113 } | 114 } |
114 | 115 |
116 bool isUTF8Encoded(const char* data, size_t length) | |
117 { | |
118 int32_t srcLen = static_cast<int32_t>(length); | |
119 int32_t charIndex = 0; | |
120 bool markDetected = false; | |
121 | |
122 while (charIndex < srcLen) { | |
123 int32_t codePoint; | |
124 if ((uint8_t)(data[charIndex]) >= 0x80) | |
125 markDetected = true; | |
126 U8_NEXT(data, charIndex, srcLen, codePoint); | |
127 if (!U_IS_UNICODE_CHAR(codePoint)) | |
aelias_OOO_until_Jul13
2016/02/24 04:37:54
According to http://icu-project.org/apiref/icu4c/u
Jinsuk Kim
2016/02/24 06:54:54
Thanks for looking into the detail. Ran the unitte
| |
128 return false; | |
129 } | |
130 return markDetected; | |
131 } | |
132 | |
115 } // namespace blink | 133 } // namespace blink |
OLD | NEW |