| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ | |
| 6 #define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ | |
| 7 | |
| 8 #include <string> | |
| 9 #include <vector> | |
| 10 | |
| 11 #include "base/base_export.h" | |
| 12 #include "base/strings/string16.h" | |
| 13 #include "base/strings/string_piece.h" | |
| 14 | |
| 15 namespace base { | |
| 16 | |
| 17 // A helper class and associated data structures to adjust offsets into a | |
| 18 // string in response to various adjustments one might do to that string | |
| 19 // (e.g., eliminating a range). For details on offsets, see the comments by | |
| 20 // the AdjustOffsets() function below. | |
| 21 class BASE_EXPORT OffsetAdjuster { | |
| 22 public: | |
| 23 struct BASE_EXPORT Adjustment { | |
| 24 Adjustment(size_t original_offset, | |
| 25 size_t original_length, | |
| 26 size_t output_length); | |
| 27 | |
| 28 size_t original_offset; | |
| 29 size_t original_length; | |
| 30 size_t output_length; | |
| 31 }; | |
| 32 typedef std::vector<Adjustment> Adjustments; | |
| 33 | |
| 34 // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments | |
| 35 // recorded in |adjustments|. | |
| 36 // | |
| 37 // Offsets represents insertion/selection points between characters: if |src| | |
| 38 // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the | |
| 39 // end of the string. Valid input offsets range from 0 to |src_len|. On | |
| 40 // exit, each offset will have been modified to point at the same logical | |
| 41 // position in the output string. If an offset cannot be successfully | |
| 42 // adjusted (e.g., because it points into the middle of a multibyte sequence), | |
| 43 // it will be set to string16::npos. | |
| 44 static void AdjustOffsets(const Adjustments& adjustments, | |
| 45 std::vector<size_t>* offsets_for_adjustment); | |
| 46 | |
| 47 // Adjusts the single |offset| to reflect the adjustments recorded in | |
| 48 // |adjustments|. | |
| 49 static void AdjustOffset(const Adjustments& adjustments, | |
| 50 size_t* offset); | |
| 51 | |
| 52 // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse | |
| 53 // of the adjustments recorded in |adjustments|. In other words, the offsets | |
| 54 // provided represent offsets into an adjusted string and the caller wants | |
| 55 // to know the offsets they correspond to in the original string. If an | |
| 56 // offset cannot be successfully unadjusted (e.g., because it points into | |
| 57 // the middle of a multibyte sequence), it will be set to string16::npos. | |
| 58 static void UnadjustOffsets(const Adjustments& adjustments, | |
| 59 std::vector<size_t>* offsets_for_unadjustment); | |
| 60 | |
| 61 // Adjusts the single |offset| to reflect the reverse of the adjustments | |
| 62 // recorded in |adjustments|. | |
| 63 static void UnadjustOffset(const Adjustments& adjustments, | |
| 64 size_t* offset); | |
| 65 | |
| 66 // Combines two sequential sets of adjustments, storing the combined revised | |
| 67 // adjustments in |adjustments_on_adjusted_string|. That is, suppose a | |
| 68 // string was altered in some way, with the alterations recorded as | |
| 69 // adjustments in |first_adjustments|. Then suppose the resulting string is | |
| 70 // further altered, with the alterations recorded as adjustments scored in | |
| 71 // |adjustments_on_adjusted_string|, with the offsets recorded in these | |
| 72 // adjustments being with respect to the intermediate string. This function | |
| 73 // combines the two sets of adjustments into one, storing the result in | |
| 74 // |adjustments_on_adjusted_string|, whose offsets are correct with respect | |
| 75 // to the original string. | |
| 76 // | |
| 77 // Assumes both parameters are sorted by increasing offset. | |
| 78 // | |
| 79 // WARNING: Only supports |first_adjustments| that involve collapsing ranges | |
| 80 // of text, not expanding ranges. | |
| 81 static void MergeSequentialAdjustments( | |
| 82 const Adjustments& first_adjustments, | |
| 83 Adjustments* adjustments_on_adjusted_string); | |
| 84 }; | |
| 85 | |
| 86 // Like the conversions in utf_string_conversions.h, but also fills in an | |
| 87 // |adjustments| parameter that reflects the alterations done to the string. | |
| 88 // It may be NULL. | |
| 89 BASE_EXPORT bool UTF8ToUTF16WithAdjustments( | |
| 90 const char* src, | |
| 91 size_t src_len, | |
| 92 string16* output, | |
| 93 base::OffsetAdjuster::Adjustments* adjustments); | |
| 94 BASE_EXPORT string16 UTF8ToUTF16WithAdjustments( | |
| 95 const base::StringPiece& utf8, | |
| 96 base::OffsetAdjuster::Adjustments* adjustments); | |
| 97 // As above, but instead internally examines the adjustments and applies them | |
| 98 // to |offsets_for_adjustment|. See comments by AdjustOffsets(). | |
| 99 BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffsets( | |
| 100 const base::StringPiece& utf8, | |
| 101 std::vector<size_t>* offsets_for_adjustment); | |
| 102 | |
| 103 BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets( | |
| 104 const base::StringPiece16& utf16, | |
| 105 std::vector<size_t>* offsets_for_adjustment); | |
| 106 | |
| 107 // Limiting function callable by std::for_each which will replace any value | |
| 108 // which is greater than |limit| with npos. Typically this is called with a | |
| 109 // string length to clamp offsets into the string to [0, length] (as opposed to | |
| 110 // [0, length); see comments above). | |
| 111 template <typename T> | |
| 112 struct LimitOffset { | |
| 113 explicit LimitOffset(size_t limit) | |
| 114 : limit_(limit) {} | |
| 115 | |
| 116 void operator()(size_t& offset) { | |
| 117 if (offset > limit_) | |
| 118 offset = T::npos; | |
| 119 } | |
| 120 | |
| 121 size_t limit_; | |
| 122 }; | |
| 123 | |
| 124 } // namespace base | |
| 125 | |
| 126 #endif // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ | |
| OLD | NEW |