OLD | NEW |
| (Empty) |
1 // Copyright 2015 PDFium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "core/include/fxcrt/fx_basic.h" | |
6 #include "public/fpdf_text.h" | |
7 #include "public/fpdfview.h" | |
8 #include "testing/embedder_test.h" | |
9 #include "testing/gtest/include/gtest/gtest.h" | |
10 #include "testing/test_support.h" | |
11 | |
12 namespace { | |
13 | |
14 bool check_unsigned_shorts(const char* expected, | |
15 const unsigned short* actual, | |
16 size_t length) { | |
17 if (length > strlen(expected) + 1) { | |
18 return false; | |
19 } | |
20 for (size_t i = 0; i < length; ++i) { | |
21 if (actual[i] != static_cast<unsigned short>(expected[i])) { | |
22 return false; | |
23 } | |
24 } | |
25 return true; | |
26 } | |
27 | |
28 } // namespace | |
29 | |
30 class FPDFTextEmbeddertest : public EmbedderTest {}; | |
31 | |
32 TEST_F(FPDFTextEmbeddertest, Text) { | |
33 EXPECT_TRUE(OpenDocument("hello_world.pdf")); | |
34 FPDF_PAGE page = LoadPage(0); | |
35 EXPECT_NE(nullptr, page); | |
36 | |
37 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
38 EXPECT_NE(nullptr, textpage); | |
39 | |
40 static const char expected[] = "Hello, world!\r\nGoodbye, world!"; | |
41 unsigned short fixed_buffer[128]; | |
42 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
43 | |
44 // Check includes the terminating NUL that is provided. | |
45 int num_chars = FPDFText_GetText(textpage, 0, 128, fixed_buffer); | |
46 ASSERT_GE(num_chars, 0); | |
47 EXPECT_EQ(sizeof(expected), static_cast<size_t>(num_chars)); | |
48 EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, sizeof(expected))); | |
49 | |
50 // Count does not include the terminating NUL in the string literal. | |
51 EXPECT_EQ(sizeof(expected) - 1, FPDFText_CountChars(textpage)); | |
52 for (size_t i = 0; i < sizeof(expected) - 1; ++i) { | |
53 EXPECT_EQ(static_cast<unsigned int>(expected[i]), | |
54 FPDFText_GetUnicode(textpage, i)) | |
55 << " at " << i; | |
56 } | |
57 | |
58 EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0)); | |
59 EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15)); | |
60 | |
61 double left = 0.0; | |
62 double right = 0.0; | |
63 double bottom = 0.0; | |
64 double top = 0.0; | |
65 FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top); | |
66 EXPECT_NEAR(41.071, left, 0.001); | |
67 EXPECT_NEAR(46.243, right, 0.001); | |
68 EXPECT_NEAR(49.844, bottom, 0.001); | |
69 EXPECT_NEAR(55.520, top, 0.001); | |
70 | |
71 EXPECT_EQ(4, FPDFText_GetCharIndexAtPos(textpage, 42.0, 50.0, 1.0, 1.0)); | |
72 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 0.0, 0.0, 1.0, 1.0)); | |
73 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 199.0, 199.0, 1.0, 1.0)); | |
74 | |
75 // Test out of range indicies. | |
76 EXPECT_EQ(-1, | |
77 FPDFText_GetCharIndexAtPos(textpage, 42.0, 10000000.0, 1.0, 1.0)); | |
78 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, -1.0, 50.0, 1.0, 1.0)); | |
79 | |
80 // Count does not include the terminating NUL in the string literal. | |
81 EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, sizeof(expected) - 1)); | |
82 | |
83 left = 0.0; | |
84 right = 0.0; | |
85 bottom = 0.0; | |
86 top = 0.0; | |
87 FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom); | |
88 EXPECT_NEAR(20.847, left, 0.001); | |
89 EXPECT_NEAR(135.167, right, 0.001); | |
90 EXPECT_NEAR(96.655, bottom, 0.001); | |
91 EXPECT_NEAR(116.000, top, 0.001); | |
92 | |
93 // Test out of range indicies set outputs to (0.0, 0.0, 0.0, 0.0). | |
94 left = -1.0; | |
95 right = -1.0; | |
96 bottom = -1.0; | |
97 top = -1.0; | |
98 FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom); | |
99 EXPECT_EQ(0.0, left); | |
100 EXPECT_EQ(0.0, right); | |
101 EXPECT_EQ(0.0, bottom); | |
102 EXPECT_EQ(0.0, top); | |
103 | |
104 left = -2.0; | |
105 right = -2.0; | |
106 bottom = -2.0; | |
107 top = -2.0; | |
108 FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom); | |
109 EXPECT_EQ(0.0, left); | |
110 EXPECT_EQ(0.0, right); | |
111 EXPECT_EQ(0.0, bottom); | |
112 EXPECT_EQ(0.0, top); | |
113 | |
114 EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 0, 0)); | |
115 | |
116 // Extract starting at character 4 as above. | |
117 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
118 EXPECT_EQ(1, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, | |
119 fixed_buffer, 1)); | |
120 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 1)); | |
121 EXPECT_EQ(0xbdbd, fixed_buffer[1]); | |
122 | |
123 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
124 EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, | |
125 fixed_buffer, 9)); | |
126 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); | |
127 EXPECT_EQ(0xbdbd, fixed_buffer[9]); | |
128 | |
129 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
130 EXPECT_EQ(10, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, | |
131 fixed_buffer, 128)); | |
132 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); | |
133 EXPECT_EQ(0u, fixed_buffer[9]); | |
134 EXPECT_EQ(0xbdbd, fixed_buffer[10]); | |
135 | |
136 FPDFText_ClosePage(textpage); | |
137 UnloadPage(page); | |
138 } | |
139 | |
140 TEST_F(FPDFTextEmbeddertest, TextSearch) { | |
141 EXPECT_TRUE(OpenDocument("hello_world.pdf")); | |
142 FPDF_PAGE page = LoadPage(0); | |
143 EXPECT_NE(nullptr, page); | |
144 | |
145 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
146 EXPECT_NE(nullptr, textpage); | |
147 | |
148 std::unique_ptr<unsigned short, pdfium::FreeDeleter> nope = | |
149 GetFPDFWideString(L"nope"); | |
150 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world = | |
151 GetFPDFWideString(L"world"); | |
152 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_caps = | |
153 GetFPDFWideString(L"WORLD"); | |
154 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_substr = | |
155 GetFPDFWideString(L"orld"); | |
156 | |
157 // No occurences of "nope" in test page. | |
158 FPDF_SCHHANDLE search = FPDFText_FindStart(textpage, nope.get(), 0, 0); | |
159 EXPECT_NE(nullptr, search); | |
160 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
161 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
162 | |
163 // Advancing finds nothing. | |
164 EXPECT_FALSE(FPDFText_FindNext(search)); | |
165 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
166 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
167 | |
168 // Retreating finds nothing. | |
169 EXPECT_FALSE(FPDFText_FindPrev(search)); | |
170 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
171 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
172 FPDFText_FindClose(search); | |
173 | |
174 // Two occurences of "world" in test page. | |
175 search = FPDFText_FindStart(textpage, world.get(), 0, 2); | |
176 EXPECT_NE(nullptr, search); | |
177 | |
178 // Remains not found until advanced. | |
179 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
180 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
181 | |
182 // First occurence of "world" in this test page. | |
183 EXPECT_TRUE(FPDFText_FindNext(search)); | |
184 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
185 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
186 | |
187 // Last occurence of "world" in this test page. | |
188 EXPECT_TRUE(FPDFText_FindNext(search)); | |
189 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); | |
190 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
191 | |
192 // Found position unchanged when fails to advance. | |
193 EXPECT_FALSE(FPDFText_FindNext(search)); | |
194 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); | |
195 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
196 | |
197 // Back to first occurence. | |
198 EXPECT_TRUE(FPDFText_FindPrev(search)); | |
199 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
200 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
201 | |
202 // Found position unchanged when fails to retreat. | |
203 EXPECT_FALSE(FPDFText_FindPrev(search)); | |
204 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
205 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
206 FPDFText_FindClose(search); | |
207 | |
208 // Exact search unaffected by case sensitiity and whole word flags. | |
209 search = FPDFText_FindStart(textpage, world.get(), | |
210 FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0); | |
211 EXPECT_NE(nullptr, search); | |
212 EXPECT_TRUE(FPDFText_FindNext(search)); | |
213 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
214 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
215 FPDFText_FindClose(search); | |
216 | |
217 // Default is case-insensitive, so matching agaist caps works. | |
218 search = FPDFText_FindStart(textpage, world_caps.get(), 0, 0); | |
219 EXPECT_NE(nullptr, search); | |
220 EXPECT_TRUE(FPDFText_FindNext(search)); | |
221 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
222 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
223 FPDFText_FindClose(search); | |
224 | |
225 // But can be made case sensitive, in which case this fails. | |
226 search = FPDFText_FindStart(textpage, world_caps.get(), FPDF_MATCHCASE, 0); | |
227 EXPECT_FALSE(FPDFText_FindNext(search)); | |
228 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
229 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
230 FPDFText_FindClose(search); | |
231 | |
232 // Default is match anywhere within word, so matching substirng works. | |
233 search = FPDFText_FindStart(textpage, world_substr.get(), 0, 0); | |
234 EXPECT_TRUE(FPDFText_FindNext(search)); | |
235 EXPECT_EQ(8, FPDFText_GetSchResultIndex(search)); | |
236 EXPECT_EQ(4, FPDFText_GetSchCount(search)); | |
237 FPDFText_FindClose(search); | |
238 | |
239 // But can be made to mach word boundaries, in which case this fails. | |
240 search = | |
241 FPDFText_FindStart(textpage, world_substr.get(), FPDF_MATCHWHOLEWORD, 0); | |
242 EXPECT_FALSE(FPDFText_FindNext(search)); | |
243 // TODO(tsepez): investigate strange index/count values in this state. | |
244 FPDFText_FindClose(search); | |
245 | |
246 FPDFText_ClosePage(textpage); | |
247 UnloadPage(page); | |
248 } | |
249 | |
250 // Test that the page has characters despite a bad stream length. | |
251 TEST_F(FPDFTextEmbeddertest, StreamLengthPastEndOfFile) { | |
252 EXPECT_TRUE(OpenDocument("bug_57.pdf")); | |
253 FPDF_PAGE page = LoadPage(0); | |
254 EXPECT_NE(nullptr, page); | |
255 | |
256 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
257 EXPECT_NE(nullptr, textpage); | |
258 EXPECT_EQ(13, FPDFText_CountChars(textpage)); | |
259 | |
260 FPDFText_ClosePage(textpage); | |
261 UnloadPage(page); | |
262 } | |
263 | |
264 TEST_F(FPDFTextEmbeddertest, WebLinks) { | |
265 EXPECT_TRUE(OpenDocument("weblinks.pdf")); | |
266 FPDF_PAGE page = LoadPage(0); | |
267 EXPECT_NE(nullptr, page); | |
268 | |
269 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
270 EXPECT_NE(nullptr, textpage); | |
271 | |
272 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage); | |
273 EXPECT_NE(nullptr, pagelink); | |
274 | |
275 // Page contains two HTTP-style URLs. | |
276 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink)); | |
277 | |
278 // Only a terminating NUL required for bogus links. | |
279 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 2, nullptr, 0)); | |
280 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 1400, nullptr, 0)); | |
281 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, -1, nullptr, 0)); | |
282 | |
283 // Query the number of characters required for each link (incl NUL). | |
284 EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0)); | |
285 EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0)); | |
286 | |
287 static const char expected_url[] = "http://example.com?q=foo"; | |
288 unsigned short fixed_buffer[128]; | |
289 | |
290 // Retrieve a link with too small a buffer. Buffer will not be | |
291 // NUL-terminated, but must not be modified past indicated length, | |
292 // so pre-fill with a pattern to check write bounds. | |
293 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
294 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 1)); | |
295 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 1)); | |
296 EXPECT_EQ(0xbdbd, fixed_buffer[1]); | |
297 | |
298 // Check buffer that doesn't have space for a terminating NUL. | |
299 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
300 EXPECT_EQ( | |
301 sizeof(expected_url) - 1, | |
302 FPDFLink_GetURL(pagelink, 0, fixed_buffer, sizeof(expected_url) - 1)); | |
303 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, | |
304 sizeof(expected_url) - 1)); | |
305 EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url) - 1]); | |
306 | |
307 // Retreive link with exactly-sized buffer. | |
308 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
309 EXPECT_EQ(sizeof(expected_url), | |
310 FPDFLink_GetURL(pagelink, 0, fixed_buffer, sizeof(expected_url))); | |
311 EXPECT_TRUE( | |
312 check_unsigned_shorts(expected_url, fixed_buffer, sizeof(expected_url))); | |
313 EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]); | |
314 EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]); | |
315 | |
316 // Retreive link with ample-sized-buffer. | |
317 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
318 EXPECT_EQ(sizeof(expected_url), | |
319 FPDFLink_GetURL(pagelink, 0, fixed_buffer, 128)); | |
320 EXPECT_TRUE( | |
321 check_unsigned_shorts(expected_url, fixed_buffer, sizeof(expected_url))); | |
322 EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]); | |
323 EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]); | |
324 | |
325 // Each link rendered in a single rect in this test page. | |
326 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0)); | |
327 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1)); | |
328 | |
329 // Each link rendered in a single rect in this test page. | |
330 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1)); | |
331 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2)); | |
332 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000)); | |
333 | |
334 // Check boundary of valid link index with valid rect index. | |
335 double left = 0.0; | |
336 double right = 0.0; | |
337 double top = 0.0; | |
338 double bottom = 0.0; | |
339 FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom); | |
340 EXPECT_NEAR(50.791, left, 0.001); | |
341 EXPECT_NEAR(187.963, right, 0.001); | |
342 EXPECT_NEAR(97.624, bottom, 0.001); | |
343 EXPECT_NEAR(108.736, top, 0.001); | |
344 | |
345 // Check that valid link with invalid rect index leaves parameters unchanged. | |
346 left = -1.0; | |
347 right = -1.0; | |
348 top = -1.0; | |
349 bottom = -1.0; | |
350 FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom); | |
351 EXPECT_EQ(-1.0, left); | |
352 EXPECT_EQ(-1.0, right); | |
353 EXPECT_EQ(-1.0, bottom); | |
354 EXPECT_EQ(-1.0, top); | |
355 | |
356 // Check that invalid link index leaves parameters unchanged. | |
357 left = -2.0; | |
358 right = -2.0; | |
359 top = -2.0; | |
360 bottom = -2.0; | |
361 FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom); | |
362 EXPECT_EQ(-2.0, left); | |
363 EXPECT_EQ(-2.0, right); | |
364 EXPECT_EQ(-2.0, bottom); | |
365 EXPECT_EQ(-2.0, top); | |
366 | |
367 FPDFLink_CloseWebLinks(pagelink); | |
368 FPDFText_ClosePage(textpage); | |
369 UnloadPage(page); | |
370 } | |
371 | |
372 TEST_F(FPDFTextEmbeddertest, GetFontSize) { | |
373 EXPECT_TRUE(OpenDocument("hello_world.pdf")); | |
374 FPDF_PAGE page = LoadPage(0); | |
375 EXPECT_NE(nullptr, page); | |
376 | |
377 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
378 EXPECT_NE(nullptr, textpage); | |
379 | |
380 const double kExpectedFontsSizes[] = {12, 12, 12, 12, 12, 12, 12, 12, 12, 12, | |
381 12, 12, 12, 1, 1, 16, 16, 16, 16, 16, | |
382 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}; | |
383 | |
384 int count = FPDFText_CountChars(textpage); | |
385 ASSERT_EQ(FX_ArraySize(kExpectedFontsSizes), count); | |
386 for (int i = 0; i < count; ++i) | |
387 EXPECT_EQ(kExpectedFontsSizes[i], FPDFText_GetFontSize(textpage, i)) << i; | |
388 | |
389 FPDFText_ClosePage(textpage); | |
390 UnloadPage(page); | |
391 } | |
OLD | NEW |