OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef BASE_UTF_OFFSET_STRING_CONVERSIONS_H_ | 5 #ifndef BASE_UTF_OFFSET_STRING_CONVERSIONS_H_ |
6 #define BASE_UTF_OFFSET_STRING_CONVERSIONS_H_ | 6 #define BASE_UTF_OFFSET_STRING_CONVERSIONS_H_ |
7 #pragma once | 7 #pragma once |
8 | 8 |
9 #include <string> | 9 #include <string> |
10 #include <vector> | 10 #include <vector> |
11 | 11 |
12 #include "base/base_api.h" | 12 #include "base/base_api.h" |
13 #include "base/string16.h" | 13 #include "base/string16.h" |
14 | 14 |
15 namespace base { | 15 namespace base { |
16 class StringPiece; | 16 class StringPiece; |
17 } | 17 } |
18 | 18 |
19 // Like the conversions in utf_string_conversions.h, but also takes one or more | 19 // Like the conversions in utf_string_conversions.h, but also takes one or more |
20 // offsets (|offset[s]_for_adjustment|) into the source strings, each offset | 20 // offsets (|offset[s]_for_adjustment|) into the source strings, each offset |
21 // will be adjusted to point at the same logical place in the result strings. | 21 // will be adjusted to point at the same logical place in the result strings. |
22 // If this isn't possible because an offset points past the end of the source | 22 // If this isn't possible because an offset points past the end of the source |
23 // strings or into the middle of a multibyte sequence, the offending offset will | 23 // strings or into the middle of a multibyte sequence, the offending offset will |
24 // be set to std::wstring::npos. |offset[s]_for_adjustment| may be NULL. | 24 // be set to string16::npos. |offset[s]_for_adjustment| may be NULL. |
25 BASE_API bool UTF8ToWideAndAdjustOffset(const char* src, | 25 BASE_API bool UTF8ToUTF16AndAdjustOffset(const char* src, |
26 size_t src_len, | 26 size_t src_len, |
27 std::wstring* output, | 27 string16* output, |
28 size_t* offset_for_adjustment); | 28 size_t* offset_for_adjustment); |
29 BASE_API bool UTF8ToWideAndAdjustOffsets( | 29 BASE_API bool UTF8ToUTF16AndAdjustOffsets( |
30 const char* src, | 30 const char* src, |
31 size_t src_len, | 31 size_t src_len, |
32 std::wstring* output, | 32 string16* output, |
33 std::vector<size_t>* offsets_for_adjustment); | 33 std::vector<size_t>* offsets_for_adjustment); |
34 | 34 |
35 BASE_API std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8, | 35 BASE_API string16 UTF8ToUTF16AndAdjustOffset(const base::StringPiece& utf8, |
36 size_t* offset_for_adjustment); | 36 size_t* offset_for_adjustment); |
37 BASE_API std::wstring UTF8ToWideAndAdjustOffsets( | 37 BASE_API string16 UTF8ToUTF16AndAdjustOffsets( |
38 const base::StringPiece& utf8, | 38 const base::StringPiece& utf8, |
39 std::vector<size_t>* offsets_for_adjustment); | 39 std::vector<size_t>* offsets_for_adjustment); |
40 | 40 |
41 BASE_API bool UTF16ToWideAndAdjustOffset(const char16* src, | |
42 size_t src_len, | |
43 std::wstring* output, | |
44 size_t* offset_for_adjustment); | |
45 BASE_API bool UTF16ToWideAndAdjustOffsets( | |
46 const char16* src, | |
47 size_t src_len, | |
48 std::wstring* output, | |
49 std::vector<size_t>* offsets_for_adjustment); | |
50 | |
51 BASE_API std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16, | |
52 size_t* offset_for_adjustment); | |
53 BASE_API std::wstring UTF16ToWideAndAdjustOffsets( | |
54 const string16& utf16, | |
55 std::vector<size_t>* offsets_for_adjustment); | |
56 | |
57 // Limiting function callable by std::for_each which will replace any value | 41 // Limiting function callable by std::for_each which will replace any value |
58 // which is equal to or greater than |limit| with npos. | 42 // which is equal to or greater than |limit| with npos. |
59 template <typename T> | 43 template <typename T> |
60 struct LimitOffset { | 44 struct LimitOffset { |
61 explicit LimitOffset(size_t limit) | 45 explicit LimitOffset(size_t limit) |
62 : limit_(limit) {} | 46 : limit_(limit) {} |
63 | 47 |
64 void operator()(size_t& offset) { | 48 void operator()(size_t& offset) { |
65 if (offset >= limit_) | 49 if (offset >= limit_) |
66 offset = T::npos; | 50 offset = T::npos; |
67 } | 51 } |
68 | 52 |
69 size_t limit_; | 53 size_t limit_; |
70 }; | 54 }; |
71 | 55 |
72 // Adjustment function called by std::transform which will adjust any offset | 56 // Stack object which, on destruction, will update a vector of offsets based on |
73 // that occurs after one or more modified substrings. To use, create any | 57 // any supplied adjustments. To use, declare one of these, providing the |
74 // number of AdjustOffset::Adjustments, drop them into a vector, then call | 58 // address of the offset vector to adjust. Then Add() any number of Adjustments |
75 // std::transform with the transform function being something similar to | 59 // (each Adjustment gives the |original_offset| of a substring and the lengths |
76 // AdjustOffset(adjustments). Each Adjustment gives the original |location| | 60 // of the substring before and after transforming). When the OffsetAdjuster |
77 // of the encoded section and the |old_length| and |new_length| of the section | 61 // goes out of scope, all the offsets in the provided vector will be updated. |
78 // before and after decoding. | 62 class BASE_API OffsetAdjuster { |
79 struct BASE_API AdjustOffset { | 63 public: |
80 // Helper structure which indicates where an encoded character occurred | |
81 // and how long that encoding was. | |
82 struct BASE_API Adjustment { | 64 struct BASE_API Adjustment { |
83 Adjustment(size_t location, size_t old_length, size_t new_length); | 65 Adjustment(size_t original_offset, |
| 66 size_t original_length, |
| 67 size_t output_length); |
84 | 68 |
85 size_t location; | 69 size_t original_offset; |
86 size_t old_length; | 70 size_t original_length; |
87 size_t new_length; | 71 size_t output_length; |
88 }; | 72 }; |
89 | 73 |
90 typedef std::vector<Adjustment> Adjustments; | 74 explicit OffsetAdjuster(std::vector<size_t>* offsets_for_adjustment); |
| 75 ~OffsetAdjuster(); |
91 | 76 |
92 explicit AdjustOffset(const Adjustments& adjustments); | 77 void Add(const Adjustment& adjustment); |
93 void operator()(size_t& offset); | |
94 | 78 |
95 const Adjustments& adjustments_; | 79 private: |
| 80 void AdjustOffset(std::vector<size_t>::iterator offset); |
| 81 |
| 82 std::vector<size_t>* offsets_for_adjustment_; |
| 83 std::vector<Adjustment> adjustments_; |
96 }; | 84 }; |
97 | 85 |
98 #endif // BASE_UTF_OFFSET_STRING_CONVERSIONS_H_ | 86 #endif // BASE_UTF_OFFSET_STRING_CONVERSIONS_H_ |
OLD | NEW |