| OLD | NEW |
| (Empty) |
| 1 // Copyright 2015 PDFium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "core/include/fxcrt/fx_basic.h" | |
| 6 #include "public/fpdf_text.h" | |
| 7 #include "public/fpdfview.h" | |
| 8 #include "testing/embedder_test.h" | |
| 9 #include "testing/gtest/include/gtest/gtest.h" | |
| 10 #include "testing/test_support.h" | |
| 11 | |
| 12 namespace { | |
| 13 | |
| 14 bool check_unsigned_shorts(const char* expected, | |
| 15 const unsigned short* actual, | |
| 16 size_t length) { | |
| 17 if (length > strlen(expected) + 1) { | |
| 18 return false; | |
| 19 } | |
| 20 for (size_t i = 0; i < length; ++i) { | |
| 21 if (actual[i] != static_cast<unsigned short>(expected[i])) { | |
| 22 return false; | |
| 23 } | |
| 24 } | |
| 25 return true; | |
| 26 } | |
| 27 | |
| 28 } // namespace | |
| 29 | |
| 30 class FPDFTextEmbeddertest : public EmbedderTest {}; | |
| 31 | |
| 32 TEST_F(FPDFTextEmbeddertest, Text) { | |
| 33 EXPECT_TRUE(OpenDocument("hello_world.pdf")); | |
| 34 FPDF_PAGE page = LoadPage(0); | |
| 35 EXPECT_NE(nullptr, page); | |
| 36 | |
| 37 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
| 38 EXPECT_NE(nullptr, textpage); | |
| 39 | |
| 40 static const char expected[] = "Hello, world!\r\nGoodbye, world!"; | |
| 41 unsigned short fixed_buffer[128]; | |
| 42 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
| 43 | |
| 44 // Check includes the terminating NUL that is provided. | |
| 45 int num_chars = FPDFText_GetText(textpage, 0, 128, fixed_buffer); | |
| 46 ASSERT_GE(num_chars, 0); | |
| 47 EXPECT_EQ(sizeof(expected), static_cast<size_t>(num_chars)); | |
| 48 EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, sizeof(expected))); | |
| 49 | |
| 50 // Count does not include the terminating NUL in the string literal. | |
| 51 EXPECT_EQ(sizeof(expected) - 1, FPDFText_CountChars(textpage)); | |
| 52 for (size_t i = 0; i < sizeof(expected) - 1; ++i) { | |
| 53 EXPECT_EQ(static_cast<unsigned int>(expected[i]), | |
| 54 FPDFText_GetUnicode(textpage, i)) | |
| 55 << " at " << i; | |
| 56 } | |
| 57 | |
| 58 EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0)); | |
| 59 EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15)); | |
| 60 | |
| 61 double left = 0.0; | |
| 62 double right = 0.0; | |
| 63 double bottom = 0.0; | |
| 64 double top = 0.0; | |
| 65 FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top); | |
| 66 EXPECT_NEAR(41.071, left, 0.001); | |
| 67 EXPECT_NEAR(46.243, right, 0.001); | |
| 68 EXPECT_NEAR(49.844, bottom, 0.001); | |
| 69 EXPECT_NEAR(55.520, top, 0.001); | |
| 70 | |
| 71 EXPECT_EQ(4, FPDFText_GetCharIndexAtPos(textpage, 42.0, 50.0, 1.0, 1.0)); | |
| 72 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 0.0, 0.0, 1.0, 1.0)); | |
| 73 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 199.0, 199.0, 1.0, 1.0)); | |
| 74 | |
| 75 // Test out of range indicies. | |
| 76 EXPECT_EQ(-1, | |
| 77 FPDFText_GetCharIndexAtPos(textpage, 42.0, 10000000.0, 1.0, 1.0)); | |
| 78 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, -1.0, 50.0, 1.0, 1.0)); | |
| 79 | |
| 80 // Count does not include the terminating NUL in the string literal. | |
| 81 EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, sizeof(expected) - 1)); | |
| 82 | |
| 83 left = 0.0; | |
| 84 right = 0.0; | |
| 85 bottom = 0.0; | |
| 86 top = 0.0; | |
| 87 FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom); | |
| 88 EXPECT_NEAR(20.847, left, 0.001); | |
| 89 EXPECT_NEAR(135.167, right, 0.001); | |
| 90 EXPECT_NEAR(96.655, bottom, 0.001); | |
| 91 EXPECT_NEAR(116.000, top, 0.001); | |
| 92 | |
| 93 // Test out of range indicies set outputs to (0.0, 0.0, 0.0, 0.0). | |
| 94 left = -1.0; | |
| 95 right = -1.0; | |
| 96 bottom = -1.0; | |
| 97 top = -1.0; | |
| 98 FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom); | |
| 99 EXPECT_EQ(0.0, left); | |
| 100 EXPECT_EQ(0.0, right); | |
| 101 EXPECT_EQ(0.0, bottom); | |
| 102 EXPECT_EQ(0.0, top); | |
| 103 | |
| 104 left = -2.0; | |
| 105 right = -2.0; | |
| 106 bottom = -2.0; | |
| 107 top = -2.0; | |
| 108 FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom); | |
| 109 EXPECT_EQ(0.0, left); | |
| 110 EXPECT_EQ(0.0, right); | |
| 111 EXPECT_EQ(0.0, bottom); | |
| 112 EXPECT_EQ(0.0, top); | |
| 113 | |
| 114 EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 0, 0)); | |
| 115 | |
| 116 // Extract starting at character 4 as above. | |
| 117 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
| 118 EXPECT_EQ(1, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, | |
| 119 fixed_buffer, 1)); | |
| 120 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 1)); | |
| 121 EXPECT_EQ(0xbdbd, fixed_buffer[1]); | |
| 122 | |
| 123 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
| 124 EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, | |
| 125 fixed_buffer, 9)); | |
| 126 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); | |
| 127 EXPECT_EQ(0xbdbd, fixed_buffer[9]); | |
| 128 | |
| 129 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
| 130 EXPECT_EQ(10, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, | |
| 131 fixed_buffer, 128)); | |
| 132 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); | |
| 133 EXPECT_EQ(0u, fixed_buffer[9]); | |
| 134 EXPECT_EQ(0xbdbd, fixed_buffer[10]); | |
| 135 | |
| 136 FPDFText_ClosePage(textpage); | |
| 137 UnloadPage(page); | |
| 138 } | |
| 139 | |
| 140 TEST_F(FPDFTextEmbeddertest, TextSearch) { | |
| 141 EXPECT_TRUE(OpenDocument("hello_world.pdf")); | |
| 142 FPDF_PAGE page = LoadPage(0); | |
| 143 EXPECT_NE(nullptr, page); | |
| 144 | |
| 145 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
| 146 EXPECT_NE(nullptr, textpage); | |
| 147 | |
| 148 std::unique_ptr<unsigned short, pdfium::FreeDeleter> nope = | |
| 149 GetFPDFWideString(L"nope"); | |
| 150 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world = | |
| 151 GetFPDFWideString(L"world"); | |
| 152 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_caps = | |
| 153 GetFPDFWideString(L"WORLD"); | |
| 154 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_substr = | |
| 155 GetFPDFWideString(L"orld"); | |
| 156 | |
| 157 // No occurences of "nope" in test page. | |
| 158 FPDF_SCHHANDLE search = FPDFText_FindStart(textpage, nope.get(), 0, 0); | |
| 159 EXPECT_NE(nullptr, search); | |
| 160 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
| 161 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
| 162 | |
| 163 // Advancing finds nothing. | |
| 164 EXPECT_FALSE(FPDFText_FindNext(search)); | |
| 165 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
| 166 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
| 167 | |
| 168 // Retreating finds nothing. | |
| 169 EXPECT_FALSE(FPDFText_FindPrev(search)); | |
| 170 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
| 171 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
| 172 FPDFText_FindClose(search); | |
| 173 | |
| 174 // Two occurences of "world" in test page. | |
| 175 search = FPDFText_FindStart(textpage, world.get(), 0, 2); | |
| 176 EXPECT_NE(nullptr, search); | |
| 177 | |
| 178 // Remains not found until advanced. | |
| 179 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
| 180 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
| 181 | |
| 182 // First occurence of "world" in this test page. | |
| 183 EXPECT_TRUE(FPDFText_FindNext(search)); | |
| 184 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
| 185 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
| 186 | |
| 187 // Last occurence of "world" in this test page. | |
| 188 EXPECT_TRUE(FPDFText_FindNext(search)); | |
| 189 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); | |
| 190 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
| 191 | |
| 192 // Found position unchanged when fails to advance. | |
| 193 EXPECT_FALSE(FPDFText_FindNext(search)); | |
| 194 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); | |
| 195 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
| 196 | |
| 197 // Back to first occurence. | |
| 198 EXPECT_TRUE(FPDFText_FindPrev(search)); | |
| 199 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
| 200 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
| 201 | |
| 202 // Found position unchanged when fails to retreat. | |
| 203 EXPECT_FALSE(FPDFText_FindPrev(search)); | |
| 204 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
| 205 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
| 206 FPDFText_FindClose(search); | |
| 207 | |
| 208 // Exact search unaffected by case sensitiity and whole word flags. | |
| 209 search = FPDFText_FindStart(textpage, world.get(), | |
| 210 FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0); | |
| 211 EXPECT_NE(nullptr, search); | |
| 212 EXPECT_TRUE(FPDFText_FindNext(search)); | |
| 213 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
| 214 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
| 215 FPDFText_FindClose(search); | |
| 216 | |
| 217 // Default is case-insensitive, so matching agaist caps works. | |
| 218 search = FPDFText_FindStart(textpage, world_caps.get(), 0, 0); | |
| 219 EXPECT_NE(nullptr, search); | |
| 220 EXPECT_TRUE(FPDFText_FindNext(search)); | |
| 221 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
| 222 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
| 223 FPDFText_FindClose(search); | |
| 224 | |
| 225 // But can be made case sensitive, in which case this fails. | |
| 226 search = FPDFText_FindStart(textpage, world_caps.get(), FPDF_MATCHCASE, 0); | |
| 227 EXPECT_FALSE(FPDFText_FindNext(search)); | |
| 228 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
| 229 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
| 230 FPDFText_FindClose(search); | |
| 231 | |
| 232 // Default is match anywhere within word, so matching substirng works. | |
| 233 search = FPDFText_FindStart(textpage, world_substr.get(), 0, 0); | |
| 234 EXPECT_TRUE(FPDFText_FindNext(search)); | |
| 235 EXPECT_EQ(8, FPDFText_GetSchResultIndex(search)); | |
| 236 EXPECT_EQ(4, FPDFText_GetSchCount(search)); | |
| 237 FPDFText_FindClose(search); | |
| 238 | |
| 239 // But can be made to mach word boundaries, in which case this fails. | |
| 240 search = | |
| 241 FPDFText_FindStart(textpage, world_substr.get(), FPDF_MATCHWHOLEWORD, 0); | |
| 242 EXPECT_FALSE(FPDFText_FindNext(search)); | |
| 243 // TODO(tsepez): investigate strange index/count values in this state. | |
| 244 FPDFText_FindClose(search); | |
| 245 | |
| 246 FPDFText_ClosePage(textpage); | |
| 247 UnloadPage(page); | |
| 248 } | |
| 249 | |
| 250 // Test that the page has characters despite a bad stream length. | |
| 251 TEST_F(FPDFTextEmbeddertest, StreamLengthPastEndOfFile) { | |
| 252 EXPECT_TRUE(OpenDocument("bug_57.pdf")); | |
| 253 FPDF_PAGE page = LoadPage(0); | |
| 254 EXPECT_NE(nullptr, page); | |
| 255 | |
| 256 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
| 257 EXPECT_NE(nullptr, textpage); | |
| 258 EXPECT_EQ(13, FPDFText_CountChars(textpage)); | |
| 259 | |
| 260 FPDFText_ClosePage(textpage); | |
| 261 UnloadPage(page); | |
| 262 } | |
| 263 | |
| 264 TEST_F(FPDFTextEmbeddertest, WebLinks) { | |
| 265 EXPECT_TRUE(OpenDocument("weblinks.pdf")); | |
| 266 FPDF_PAGE page = LoadPage(0); | |
| 267 EXPECT_NE(nullptr, page); | |
| 268 | |
| 269 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
| 270 EXPECT_NE(nullptr, textpage); | |
| 271 | |
| 272 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage); | |
| 273 EXPECT_NE(nullptr, pagelink); | |
| 274 | |
| 275 // Page contains two HTTP-style URLs. | |
| 276 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink)); | |
| 277 | |
| 278 // Only a terminating NUL required for bogus links. | |
| 279 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 2, nullptr, 0)); | |
| 280 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 1400, nullptr, 0)); | |
| 281 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, -1, nullptr, 0)); | |
| 282 | |
| 283 // Query the number of characters required for each link (incl NUL). | |
| 284 EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0)); | |
| 285 EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0)); | |
| 286 | |
| 287 static const char expected_url[] = "http://example.com?q=foo"; | |
| 288 unsigned short fixed_buffer[128]; | |
| 289 | |
| 290 // Retrieve a link with too small a buffer. Buffer will not be | |
| 291 // NUL-terminated, but must not be modified past indicated length, | |
| 292 // so pre-fill with a pattern to check write bounds. | |
| 293 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
| 294 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 1)); | |
| 295 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 1)); | |
| 296 EXPECT_EQ(0xbdbd, fixed_buffer[1]); | |
| 297 | |
| 298 // Check buffer that doesn't have space for a terminating NUL. | |
| 299 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
| 300 EXPECT_EQ( | |
| 301 sizeof(expected_url) - 1, | |
| 302 FPDFLink_GetURL(pagelink, 0, fixed_buffer, sizeof(expected_url) - 1)); | |
| 303 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, | |
| 304 sizeof(expected_url) - 1)); | |
| 305 EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url) - 1]); | |
| 306 | |
| 307 // Retreive link with exactly-sized buffer. | |
| 308 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
| 309 EXPECT_EQ(sizeof(expected_url), | |
| 310 FPDFLink_GetURL(pagelink, 0, fixed_buffer, sizeof(expected_url))); | |
| 311 EXPECT_TRUE( | |
| 312 check_unsigned_shorts(expected_url, fixed_buffer, sizeof(expected_url))); | |
| 313 EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]); | |
| 314 EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]); | |
| 315 | |
| 316 // Retreive link with ample-sized-buffer. | |
| 317 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
| 318 EXPECT_EQ(sizeof(expected_url), | |
| 319 FPDFLink_GetURL(pagelink, 0, fixed_buffer, 128)); | |
| 320 EXPECT_TRUE( | |
| 321 check_unsigned_shorts(expected_url, fixed_buffer, sizeof(expected_url))); | |
| 322 EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]); | |
| 323 EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]); | |
| 324 | |
| 325 // Each link rendered in a single rect in this test page. | |
| 326 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0)); | |
| 327 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1)); | |
| 328 | |
| 329 // Each link rendered in a single rect in this test page. | |
| 330 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1)); | |
| 331 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2)); | |
| 332 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000)); | |
| 333 | |
| 334 // Check boundary of valid link index with valid rect index. | |
| 335 double left = 0.0; | |
| 336 double right = 0.0; | |
| 337 double top = 0.0; | |
| 338 double bottom = 0.0; | |
| 339 FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom); | |
| 340 EXPECT_NEAR(50.791, left, 0.001); | |
| 341 EXPECT_NEAR(187.963, right, 0.001); | |
| 342 EXPECT_NEAR(97.624, bottom, 0.001); | |
| 343 EXPECT_NEAR(108.736, top, 0.001); | |
| 344 | |
| 345 // Check that valid link with invalid rect index leaves parameters unchanged. | |
| 346 left = -1.0; | |
| 347 right = -1.0; | |
| 348 top = -1.0; | |
| 349 bottom = -1.0; | |
| 350 FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom); | |
| 351 EXPECT_EQ(-1.0, left); | |
| 352 EXPECT_EQ(-1.0, right); | |
| 353 EXPECT_EQ(-1.0, bottom); | |
| 354 EXPECT_EQ(-1.0, top); | |
| 355 | |
| 356 // Check that invalid link index leaves parameters unchanged. | |
| 357 left = -2.0; | |
| 358 right = -2.0; | |
| 359 top = -2.0; | |
| 360 bottom = -2.0; | |
| 361 FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom); | |
| 362 EXPECT_EQ(-2.0, left); | |
| 363 EXPECT_EQ(-2.0, right); | |
| 364 EXPECT_EQ(-2.0, bottom); | |
| 365 EXPECT_EQ(-2.0, top); | |
| 366 | |
| 367 FPDFLink_CloseWebLinks(pagelink); | |
| 368 FPDFText_ClosePage(textpage); | |
| 369 UnloadPage(page); | |
| 370 } | |
| 371 | |
| 372 TEST_F(FPDFTextEmbeddertest, GetFontSize) { | |
| 373 EXPECT_TRUE(OpenDocument("hello_world.pdf")); | |
| 374 FPDF_PAGE page = LoadPage(0); | |
| 375 EXPECT_NE(nullptr, page); | |
| 376 | |
| 377 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
| 378 EXPECT_NE(nullptr, textpage); | |
| 379 | |
| 380 const double kExpectedFontsSizes[] = {12, 12, 12, 12, 12, 12, 12, 12, 12, 12, | |
| 381 12, 12, 12, 1, 1, 16, 16, 16, 16, 16, | |
| 382 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}; | |
| 383 | |
| 384 int count = FPDFText_CountChars(textpage); | |
| 385 ASSERT_EQ(FX_ArraySize(kExpectedFontsSizes), count); | |
| 386 for (int i = 0; i < count; ++i) | |
| 387 EXPECT_EQ(kExpectedFontsSizes[i], FPDFText_GetFontSize(textpage, i)) << i; | |
| 388 | |
| 389 FPDFText_ClosePage(textpage); | |
| 390 UnloadPage(page); | |
| 391 } | |
| OLD | NEW |