| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include <algorithm> | |
| 6 | |
| 7 #include "base/logging.h" | |
| 8 #include "base/strings/string_piece.h" | |
| 9 #include "base/strings/utf_offset_string_conversions.h" | |
| 10 #include "testing/gtest/include/gtest/gtest.h" | |
| 11 | |
| 12 namespace base { | |
| 13 | |
| 14 namespace { | |
| 15 | |
| 16 static const size_t kNpos = string16::npos; | |
| 17 | |
| 18 } // namespace | |
| 19 | |
| 20 TEST(UTFOffsetStringConversionsTest, AdjustOffset) { | |
| 21 struct UTF8ToUTF16Case { | |
| 22 const char* utf8; | |
| 23 size_t input_offset; | |
| 24 size_t output_offset; | |
| 25 } utf8_to_utf16_cases[] = { | |
| 26 {"", 0, 0}, | |
| 27 {"", kNpos, kNpos}, | |
| 28 {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, kNpos}, | |
| 29 {"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1}, | |
| 30 {"\xed\xb0\x80z", 3, 1}, | |
| 31 {"A\xF0\x90\x8C\x80z", 1, 1}, | |
| 32 {"A\xF0\x90\x8C\x80z", 2, kNpos}, | |
| 33 {"A\xF0\x90\x8C\x80z", 5, 3}, | |
| 34 {"A\xF0\x90\x8C\x80z", 6, 4}, | |
| 35 {"A\xF0\x90\x8C\x80z", kNpos, kNpos}, | |
| 36 }; | |
| 37 for (size_t i = 0; i < arraysize(utf8_to_utf16_cases); ++i) { | |
| 38 const size_t offset = utf8_to_utf16_cases[i].input_offset; | |
| 39 std::vector<size_t> offsets; | |
| 40 offsets.push_back(offset); | |
| 41 UTF8ToUTF16AndAdjustOffsets(utf8_to_utf16_cases[i].utf8, &offsets); | |
| 42 EXPECT_EQ(utf8_to_utf16_cases[i].output_offset, offsets[0]); | |
| 43 } | |
| 44 | |
| 45 struct UTF16ToUTF8Case { | |
| 46 char16 utf16[10]; | |
| 47 size_t input_offset; | |
| 48 size_t output_offset; | |
| 49 } utf16_to_utf8_cases[] = { | |
| 50 {{}, 0, 0}, | |
| 51 // Converted to 3-byte utf-8 sequences | |
| 52 {{0x5909, 0x63DB}, 3, kNpos}, | |
| 53 {{0x5909, 0x63DB}, 2, 6}, | |
| 54 {{0x5909, 0x63DB}, 1, 3}, | |
| 55 {{0x5909, 0x63DB}, 0, 0}, | |
| 56 // Converted to 2-byte utf-8 sequences | |
| 57 {{'A', 0x00bc, 0x00be, 'z'}, 1, 1}, | |
| 58 {{'A', 0x00bc, 0x00be, 'z'}, 2, 3}, | |
| 59 {{'A', 0x00bc, 0x00be, 'z'}, 3, 5}, | |
| 60 {{'A', 0x00bc, 0x00be, 'z'}, 4, 6}, | |
| 61 // Surrogate pair | |
| 62 {{'A', 0xd800, 0xdf00, 'z'}, 1, 1}, | |
| 63 {{'A', 0xd800, 0xdf00, 'z'}, 2, kNpos}, | |
| 64 {{'A', 0xd800, 0xdf00, 'z'}, 3, 5}, | |
| 65 {{'A', 0xd800, 0xdf00, 'z'}, 4, 6}, | |
| 66 }; | |
| 67 for (size_t i = 0; i < arraysize(utf16_to_utf8_cases); ++i) { | |
| 68 size_t offset = utf16_to_utf8_cases[i].input_offset; | |
| 69 std::vector<size_t> offsets; | |
| 70 offsets.push_back(offset); | |
| 71 UTF16ToUTF8AndAdjustOffsets(utf16_to_utf8_cases[i].utf16, &offsets); | |
| 72 EXPECT_EQ(utf16_to_utf8_cases[i].output_offset, offsets[0]) << i; | |
| 73 } | |
| 74 } | |
| 75 | |
| 76 TEST(UTFOffsetStringConversionsTest, LimitOffsets) { | |
| 77 const size_t kLimit = 10; | |
| 78 const size_t kItems = 20; | |
| 79 std::vector<size_t> size_ts; | |
| 80 for (size_t t = 0; t < kItems; ++t) | |
| 81 size_ts.push_back(t); | |
| 82 std::for_each(size_ts.begin(), size_ts.end(), | |
| 83 LimitOffset<string16>(kLimit)); | |
| 84 size_t unlimited_count = 0; | |
| 85 for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end(); | |
| 86 ++ti) { | |
| 87 if (*ti != kNpos) | |
| 88 ++unlimited_count; | |
| 89 } | |
| 90 EXPECT_EQ(11U, unlimited_count); | |
| 91 | |
| 92 // Reverse the values in the vector and try again. | |
| 93 size_ts.clear(); | |
| 94 for (size_t t = kItems; t > 0; --t) | |
| 95 size_ts.push_back(t - 1); | |
| 96 std::for_each(size_ts.begin(), size_ts.end(), | |
| 97 LimitOffset<string16>(kLimit)); | |
| 98 unlimited_count = 0; | |
| 99 for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end(); | |
| 100 ++ti) { | |
| 101 if (*ti != kNpos) | |
| 102 ++unlimited_count; | |
| 103 } | |
| 104 EXPECT_EQ(11U, unlimited_count); | |
| 105 } | |
| 106 | |
| 107 TEST(UTFOffsetStringConversionsTest, AdjustOffsets) { | |
| 108 // Imagine we have strings as shown in the following cases where the | |
| 109 // X's represent encoded characters. | |
| 110 // 1: abcXXXdef ==> abcXdef | |
| 111 { | |
| 112 std::vector<size_t> offsets; | |
| 113 for (size_t t = 0; t <= 9; ++t) | |
| 114 offsets.push_back(t); | |
| 115 OffsetAdjuster::Adjustments adjustments; | |
| 116 adjustments.push_back(OffsetAdjuster::Adjustment(3, 3, 1)); | |
| 117 OffsetAdjuster::AdjustOffsets(adjustments, &offsets); | |
| 118 size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6, 7}; | |
| 119 EXPECT_EQ(offsets.size(), arraysize(expected_1)); | |
| 120 for (size_t i = 0; i < arraysize(expected_1); ++i) | |
| 121 EXPECT_EQ(expected_1[i], offsets[i]); | |
| 122 } | |
| 123 | |
| 124 // 2: XXXaXXXXbcXXXXXXXdefXXX ==> XaXXbcXXXXdefX | |
| 125 { | |
| 126 std::vector<size_t> offsets; | |
| 127 for (size_t t = 0; t <= 23; ++t) | |
| 128 offsets.push_back(t); | |
| 129 OffsetAdjuster::Adjustments adjustments; | |
| 130 adjustments.push_back(OffsetAdjuster::Adjustment(0, 3, 1)); | |
| 131 adjustments.push_back(OffsetAdjuster::Adjustment(4, 4, 2)); | |
| 132 adjustments.push_back(OffsetAdjuster::Adjustment(10, 7, 4)); | |
| 133 adjustments.push_back(OffsetAdjuster::Adjustment(20, 3, 1)); | |
| 134 OffsetAdjuster::AdjustOffsets(adjustments, &offsets); | |
| 135 size_t expected_2[] = { | |
| 136 0, kNpos, kNpos, 1, 2, kNpos, kNpos, kNpos, 4, 5, 6, kNpos, kNpos, kNpos, | |
| 137 kNpos, kNpos, kNpos, 10, 11, 12, 13, kNpos, kNpos, 14 | |
| 138 }; | |
| 139 EXPECT_EQ(offsets.size(), arraysize(expected_2)); | |
| 140 for (size_t i = 0; i < arraysize(expected_2); ++i) | |
| 141 EXPECT_EQ(expected_2[i], offsets[i]); | |
| 142 } | |
| 143 | |
| 144 // 3: XXXaXXXXbcdXXXeXX ==> aXXXXbcdXXXe | |
| 145 { | |
| 146 std::vector<size_t> offsets; | |
| 147 for (size_t t = 0; t <= 17; ++t) | |
| 148 offsets.push_back(t); | |
| 149 OffsetAdjuster::Adjustments adjustments; | |
| 150 adjustments.push_back(OffsetAdjuster::Adjustment(0, 3, 0)); | |
| 151 adjustments.push_back(OffsetAdjuster::Adjustment(4, 4, 4)); | |
| 152 adjustments.push_back(OffsetAdjuster::Adjustment(11, 3, 3)); | |
| 153 adjustments.push_back(OffsetAdjuster::Adjustment(15, 2, 0)); | |
| 154 OffsetAdjuster::AdjustOffsets(adjustments, &offsets); | |
| 155 size_t expected_3[] = { | |
| 156 0, kNpos, kNpos, 0, 1, kNpos, kNpos, kNpos, 5, 6, 7, 8, kNpos, kNpos, 11, | |
| 157 12, kNpos, 12 | |
| 158 }; | |
| 159 EXPECT_EQ(offsets.size(), arraysize(expected_3)); | |
| 160 for (size_t i = 0; i < arraysize(expected_3); ++i) | |
| 161 EXPECT_EQ(expected_3[i], offsets[i]); | |
| 162 } | |
| 163 } | |
| 164 | |
| 165 TEST(UTFOffsetStringConversionsTest, UnadjustOffsets) { | |
| 166 // Imagine we have strings as shown in the following cases where the | |
| 167 // X's represent encoded characters. | |
| 168 // 1: abcXXXdef ==> abcXdef | |
| 169 { | |
| 170 std::vector<size_t> offsets; | |
| 171 for (size_t t = 0; t <= 7; ++t) | |
| 172 offsets.push_back(t); | |
| 173 OffsetAdjuster::Adjustments adjustments; | |
| 174 adjustments.push_back(OffsetAdjuster::Adjustment(3, 3, 1)); | |
| 175 OffsetAdjuster::UnadjustOffsets(adjustments, &offsets); | |
| 176 size_t expected_1[] = {0, 1, 2, 3, 6, 7, 8, 9}; | |
| 177 EXPECT_EQ(offsets.size(), arraysize(expected_1)); | |
| 178 for (size_t i = 0; i < arraysize(expected_1); ++i) | |
| 179 EXPECT_EQ(expected_1[i], offsets[i]); | |
| 180 } | |
| 181 | |
| 182 // 2: XXXaXXXXbcXXXXXXXdefXXX ==> XaXXbcXXXXdefX | |
| 183 { | |
| 184 std::vector<size_t> offsets; | |
| 185 for (size_t t = 0; t <= 14; ++t) | |
| 186 offsets.push_back(t); | |
| 187 OffsetAdjuster::Adjustments adjustments; | |
| 188 adjustments.push_back(OffsetAdjuster::Adjustment(0, 3, 1)); | |
| 189 adjustments.push_back(OffsetAdjuster::Adjustment(4, 4, 2)); | |
| 190 adjustments.push_back(OffsetAdjuster::Adjustment(10, 7, 4)); | |
| 191 adjustments.push_back(OffsetAdjuster::Adjustment(20, 3, 1)); | |
| 192 OffsetAdjuster::UnadjustOffsets(adjustments, &offsets); | |
| 193 size_t expected_2[] = { | |
| 194 0, 3, 4, kNpos, 8, 9, 10, kNpos, kNpos, kNpos, 17, 18, 19, 20, 23 | |
| 195 }; | |
| 196 EXPECT_EQ(offsets.size(), arraysize(expected_2)); | |
| 197 for (size_t i = 0; i < arraysize(expected_2); ++i) | |
| 198 EXPECT_EQ(expected_2[i], offsets[i]); | |
| 199 } | |
| 200 | |
| 201 // 3: XXXaXXXXbcdXXXeXX ==> aXXXXbcdXXXe | |
| 202 { | |
| 203 std::vector<size_t> offsets; | |
| 204 for (size_t t = 0; t <= 12; ++t) | |
| 205 offsets.push_back(t); | |
| 206 OffsetAdjuster::Adjustments adjustments; | |
| 207 adjustments.push_back(OffsetAdjuster::Adjustment(0, 3, 0)); | |
| 208 adjustments.push_back(OffsetAdjuster::Adjustment(4, 4, 4)); | |
| 209 adjustments.push_back(OffsetAdjuster::Adjustment(11, 3, 3)); | |
| 210 adjustments.push_back(OffsetAdjuster::Adjustment(15, 2, 0)); | |
| 211 OffsetAdjuster::UnadjustOffsets(adjustments, &offsets); | |
| 212 size_t expected_3[] = { | |
| 213 0, // this could just as easily be 3 | |
| 214 4, kNpos, kNpos, kNpos, 8, 9, 10, 11, kNpos, kNpos, 14, | |
| 215 15 // this could just as easily be 17 | |
| 216 }; | |
| 217 EXPECT_EQ(offsets.size(), arraysize(expected_3)); | |
| 218 for (size_t i = 0; i < arraysize(expected_3); ++i) | |
| 219 EXPECT_EQ(expected_3[i], offsets[i]); | |
| 220 } | |
| 221 } | |
| 222 | |
| 223 // MergeSequentialAdjustments is used by net/base/escape.{h,cc} and | |
| 224 // net/base/net_util.{h,cc}. The two tests EscapeTest.AdjustOffset and | |
| 225 // NetUtilTest.FormatUrlWithOffsets test its behavior extensively. This | |
| 226 // is simply a short, additional test. | |
| 227 TEST(UTFOffsetStringConversionsTest, MergeSequentialAdjustments) { | |
| 228 // Pretend the input string is "abcdefghijklmnopqrstuvwxyz". | |
| 229 | |
| 230 // Set up |first_adjustments| to | |
| 231 // - remove the leading "a" | |
| 232 // - combine the "bc" into one character (call it ".") | |
| 233 // - remove the "f" | |
| 234 // - remove the "tuv" | |
| 235 // The resulting string should be ".deghijklmnopqrswxyz". | |
| 236 OffsetAdjuster::Adjustments first_adjustments; | |
| 237 first_adjustments.push_back(OffsetAdjuster::Adjustment(0, 1, 0)); | |
| 238 first_adjustments.push_back(OffsetAdjuster::Adjustment(1, 2, 1)); | |
| 239 first_adjustments.push_back(OffsetAdjuster::Adjustment(5, 1, 0)); | |
| 240 first_adjustments.push_back(OffsetAdjuster::Adjustment(19, 3, 0)); | |
| 241 | |
| 242 // Set up |adjustments_on_adjusted_string| to | |
| 243 // - combine the "." character that replaced "bc" with "d" into one character | |
| 244 // (call it "?") | |
| 245 // - remove the "egh" | |
| 246 // - expand the "i" into two characters (call them "12") | |
| 247 // - combine the "jkl" into one character (call it "@") | |
| 248 // - expand the "z" into two characters (call it "34") | |
| 249 // The resulting string should be "?12@mnopqrswxy34". | |
| 250 OffsetAdjuster::Adjustments adjustments_on_adjusted_string; | |
| 251 adjustments_on_adjusted_string.push_back(OffsetAdjuster::Adjustment( | |
| 252 0, 2, 1)); | |
| 253 adjustments_on_adjusted_string.push_back(OffsetAdjuster::Adjustment( | |
| 254 2, 3, 0)); | |
| 255 adjustments_on_adjusted_string.push_back(OffsetAdjuster::Adjustment( | |
| 256 5, 1, 2)); | |
| 257 adjustments_on_adjusted_string.push_back(OffsetAdjuster::Adjustment( | |
| 258 6, 3, 1)); | |
| 259 adjustments_on_adjusted_string.push_back(OffsetAdjuster::Adjustment( | |
| 260 19, 1, 2)); | |
| 261 | |
| 262 // Now merge the adjustments and check the results. | |
| 263 OffsetAdjuster::MergeSequentialAdjustments(first_adjustments, | |
| 264 &adjustments_on_adjusted_string); | |
| 265 // The merged adjustments should look like | |
| 266 // - combine abcd into "?" | |
| 267 // - note: it's also reasonable for the Merge function to instead produce | |
| 268 // two adjustments instead of this, one to remove a and another to | |
| 269 // combine bcd into "?". This test verifies the current behavior. | |
| 270 // - remove efgh | |
| 271 // - expand i into "12" | |
| 272 // - combine jkl into "@" | |
| 273 // - remove tuv | |
| 274 // - expand z into "34" | |
| 275 ASSERT_EQ(6u, adjustments_on_adjusted_string.size()); | |
| 276 EXPECT_EQ(0u, adjustments_on_adjusted_string[0].original_offset); | |
| 277 EXPECT_EQ(4u, adjustments_on_adjusted_string[0].original_length); | |
| 278 EXPECT_EQ(1u, adjustments_on_adjusted_string[0].output_length); | |
| 279 EXPECT_EQ(4u, adjustments_on_adjusted_string[1].original_offset); | |
| 280 EXPECT_EQ(4u, adjustments_on_adjusted_string[1].original_length); | |
| 281 EXPECT_EQ(0u, adjustments_on_adjusted_string[1].output_length); | |
| 282 EXPECT_EQ(8u, adjustments_on_adjusted_string[2].original_offset); | |
| 283 EXPECT_EQ(1u, adjustments_on_adjusted_string[2].original_length); | |
| 284 EXPECT_EQ(2u, adjustments_on_adjusted_string[2].output_length); | |
| 285 EXPECT_EQ(9u, adjustments_on_adjusted_string[3].original_offset); | |
| 286 EXPECT_EQ(3u, adjustments_on_adjusted_string[3].original_length); | |
| 287 EXPECT_EQ(1u, adjustments_on_adjusted_string[3].output_length); | |
| 288 EXPECT_EQ(19u, adjustments_on_adjusted_string[4].original_offset); | |
| 289 EXPECT_EQ(3u, adjustments_on_adjusted_string[4].original_length); | |
| 290 EXPECT_EQ(0u, adjustments_on_adjusted_string[4].output_length); | |
| 291 EXPECT_EQ(25u, adjustments_on_adjusted_string[5].original_offset); | |
| 292 EXPECT_EQ(1u, adjustments_on_adjusted_string[5].original_length); | |
| 293 EXPECT_EQ(2u, adjustments_on_adjusted_string[5].output_length); | |
| 294 } | |
| 295 | |
| 296 } // namespace base | |
| OLD | NEW |