OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2013 Google Inc. All rights reserved. | 2 * Copyright (C) 2013 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 24 matching lines...) Expand all Loading... |
35 #include "wtf/text/TextEncoding.h" | 35 #include "wtf/text/TextEncoding.h" |
36 #include "wtf/text/WTFString.h" | 36 #include "wtf/text/WTFString.h" |
37 | 37 |
38 namespace WTF { | 38 namespace WTF { |
39 | 39 |
40 // This class lets you get UTF-8 data out of a String without mallocing a | 40 // This class lets you get UTF-8 data out of a String without mallocing a |
41 // separate buffer to hold the data if the String happens to be 8 bit and | 41 // separate buffer to hold the data if the String happens to be 8 bit and |
42 // contain only ASCII characters. | 42 // contain only ASCII characters. |
43 class StringUTF8Adaptor { | 43 class StringUTF8Adaptor { |
44 public: | 44 public: |
45 enum ShouldNormalize { | 45 explicit StringUTF8Adaptor(const String& string) |
46 DoNotNormalize, | |
47 Normalize | |
48 }; | |
49 | |
50 explicit StringUTF8Adaptor(const String& string, ShouldNormalize normalize =
DoNotNormalize, UnencodableHandling handling = EntitiesForUnencodables) | |
51 : m_data(0) | 46 : m_data(0) |
52 , m_length(0) | 47 , m_length(0) |
53 { | 48 { |
54 if (string.isEmpty()) | 49 if (string.isEmpty()) |
55 return; | 50 return; |
56 // Unfortunately, 8 bit WTFStrings are encoded in Latin-1 and GURL uses
UTF-8 | 51 // Unfortunately, 8 bit WTFStrings are encoded in Latin-1 and GURL uses
UTF-8 |
57 // when processing 8 bit strings. If |relative| is entirely ASCII, we lu
ck out | 52 // when processing 8 bit strings. If |relative| is entirely ASCII, we lu
ck out |
58 // and can avoid mallocing a new buffer to hold the UTF-8 data because U
TF-8 | 53 // and can avoid mallocing a new buffer to hold the UTF-8 data because U
TF-8 |
59 // and Latin-1 use the same code units for ASCII code points. | 54 // and Latin-1 use the same code units for ASCII code points. |
60 if (string.is8Bit() && string.containsOnlyASCII()) { | 55 if (string.is8Bit() && string.containsOnlyASCII()) { |
61 m_data = reinterpret_cast<const char*>(string.characters8()); | 56 m_data = reinterpret_cast<const char*>(string.characters8()); |
62 m_length = string.length(); | 57 m_length = string.length(); |
63 } else { | 58 } else { |
64 if (normalize == Normalize) | 59 m_utf8Buffer = string.utf8(); |
65 m_utf8Buffer = UTF8Encoding().normalizeAndEncode(string, handlin
g); | |
66 else | |
67 m_utf8Buffer = string.utf8(); | |
68 m_data = m_utf8Buffer.data(); | 60 m_data = m_utf8Buffer.data(); |
69 m_length = m_utf8Buffer.length(); | 61 m_length = m_utf8Buffer.length(); |
70 } | 62 } |
71 } | 63 } |
72 | 64 |
73 const char* data() const { return m_data; } | 65 const char* data() const { return m_data; } |
74 size_t length() const { return m_length; } | 66 size_t length() const { return m_length; } |
75 | 67 |
76 private: | 68 private: |
77 CString m_utf8Buffer; | 69 CString m_utf8Buffer; |
78 const char* m_data; | 70 const char* m_data; |
79 size_t m_length; | 71 size_t m_length; |
80 }; | 72 }; |
81 | 73 |
82 } // namespace WTF | 74 } // namespace WTF |
83 | 75 |
84 using WTF::StringUTF8Adaptor; | 76 using WTF::StringUTF8Adaptor; |
85 | 77 |
86 #endif // StringUTF8Adaptor_h | 78 #endif // StringUTF8Adaptor_h |
OLD | NEW |