OLD | NEW |
---|---|
1 // Copyright 2015 PDFium Authors. All rights reserved. | 1 // Copyright 2015 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "../../testing/embedder_test.h" | 5 #include "../../testing/embedder_test.h" |
6 #include "../../fpdfsdk/include/fpdfview.h" | 6 #include "../../fpdfsdk/include/fpdfview.h" |
7 #include "../../fpdfsdk/include/fpdftext.h" | 7 #include "../../fpdfsdk/include/fpdftext.h" |
8 #include "testing/gtest/include/gtest/gtest.h" | 8 #include "testing/gtest/include/gtest/gtest.h" |
9 | 9 |
10 namespace { | |
11 | |
12 static bool check_unsigned_shorts(const char* expected, | |
13 const unsigned short* actual, | |
14 size_t length) { | |
15 if (length > strlen(expected) + 1) { | |
16 return false; | |
17 } | |
18 for (size_t i = 0; i < length; ++i) { | |
19 if (actual[i] != static_cast<unsigned short>(expected[i])) { | |
20 return false; | |
21 } | |
22 } | |
23 return true; | |
24 } | |
25 | |
26 } // namespace | |
27 | |
10 class FPDFTextEmbeddertest : public EmbedderTest { | 28 class FPDFTextEmbeddertest : public EmbedderTest { |
11 }; | 29 }; |
12 | 30 |
31 TEST_F(FPDFTextEmbeddertest, Text) { | |
32 EXPECT_TRUE(OpenDocument("testing/resources/hello_world.pdf")); | |
33 FPDF_FORMHANDLE form_handle = SetFormFillEnvironment(); | |
34 FPDF_PAGE page = LoadPage(0, form_handle); | |
35 EXPECT_NE(nullptr, page); | |
36 | |
37 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
38 EXPECT_NE(nullptr, textpage); | |
39 | |
40 const char expected[] = "Hello, world!\r\nGoodbye, world!"; | |
41 unsigned short fixed_buffer[128]; | |
42 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
43 | |
44 // Check includes the terminating NUL that is provided. | |
45 EXPECT_EQ(sizeof(expected), FPDFText_GetText(textpage, 0, 128, fixed_buffer)); | |
46 EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, sizeof(expected))); | |
47 | |
48 // Count does not include the terminating NUL in the string literal. | |
49 EXPECT_EQ(sizeof(expected) - 1, FPDFText_CountChars(textpage)); | |
50 for (size_t i = 0; i < sizeof(expected) - 1; ++i) { | |
51 EXPECT_EQ(expected[i], FPDFText_GetUnicode(textpage, i)) << " at " << i; | |
52 } | |
53 | |
54 EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0)); | |
55 EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15)); | |
56 | |
57 double left = 0.0; | |
58 double right = 0.0; | |
59 double bottom = 0.0; | |
60 double top = 0.0; | |
61 FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top); | |
62 EXPECT_NEAR(41.071, left, 0.001); | |
63 EXPECT_NEAR(46.243, right, 0.001); | |
64 EXPECT_NEAR(49.844, bottom, 0.001); | |
65 EXPECT_NEAR(55.520, top, 0.001); | |
66 | |
67 EXPECT_EQ(4, FPDFText_GetCharIndexAtPos( | |
68 textpage, 42.0, 50.0, 1.0, 1.0)); | |
69 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos( | |
70 textpage, 0.0, 0.0, 1.0, 1.0)); | |
71 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos( | |
72 textpage, 199.0, 199.0, 1.0, 1.0)); | |
73 | |
74 // Test out of range indicies. | |
75 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos( | |
76 textpage, 42.0, 10000000.0, 1.0, 1.0)); | |
77 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos( | |
78 textpage, -1.0, 50.0, 1.0, 1.0)); | |
79 | |
80 // Count does not include the terminating NUL in the string literal. | |
81 EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, sizeof(expected) - 1)); | |
82 | |
83 left = 0.0; | |
84 right = 0.0; | |
85 bottom = 0.0; | |
86 top = 0.0; | |
87 FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom); | |
88 EXPECT_NEAR(20.847, left, 0.001); | |
89 EXPECT_NEAR(135.167, right, 0.001); | |
90 EXPECT_NEAR(96.655, bottom, 0.001); | |
91 EXPECT_NEAR(116.000, top, 0.001); | |
92 | |
93 // Test out of range indicies. | |
94 left = 0.0; | |
Lei Zhang
2015/01/28 18:35:02
Maybe set these to some value other than 0? Ditto
Tom Sepez
2015/01/28 18:50:34
Sadly, I think these are left unchanged in face of
Lei Zhang
2015/01/28 19:27:18
Right, given the API as is, I'm just saying it wou
Tom Sepez
2015/01/28 20:18:03
Done. Fascinating thing is that this API sets the
| |
95 right = 0.0; | |
96 bottom = 0.0; | |
97 top = 0.0; | |
98 FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom); | |
99 EXPECT_EQ(0.0, left); | |
100 EXPECT_EQ(0.0, right); | |
101 EXPECT_EQ(0.0, bottom); | |
102 EXPECT_EQ(0.0, top); | |
103 | |
104 FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom); | |
105 EXPECT_EQ(0.0, left); | |
106 EXPECT_EQ(0.0, right); | |
107 EXPECT_EQ(0.0, bottom); | |
108 EXPECT_EQ(0.0, top); | |
109 | |
110 EXPECT_EQ(9, FPDFText_GetBoundedText( | |
111 textpage, 41.0, 56.0, 82.0, 48.0, 0, 0)); | |
112 | |
113 // Extract starting at character 4 as above. | |
114 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
115 EXPECT_EQ(1, FPDFText_GetBoundedText( | |
116 textpage, 41.0, 56.0, 82.0, 48.0, fixed_buffer, 1)); | |
117 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 1)); | |
118 EXPECT_EQ(0xbdbd, fixed_buffer[1]); | |
119 | |
120 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
121 EXPECT_EQ(9, FPDFText_GetBoundedText( | |
122 textpage, 41.0, 56.0, 82.0, 48.0, fixed_buffer, 9)); | |
123 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); | |
124 EXPECT_EQ(0xbdbd, fixed_buffer[9]); | |
125 | |
126 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
127 EXPECT_EQ(10, FPDFText_GetBoundedText( | |
128 textpage, 41.0, 56.0, 82.0, 48.0, fixed_buffer, 128)); | |
129 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); | |
130 EXPECT_EQ(0u, fixed_buffer[9]); | |
131 EXPECT_EQ(0xbdbd, fixed_buffer[10]); | |
132 | |
133 FPDFText_ClosePage(textpage); | |
134 ClearFormFillEnvironment(form_handle); | |
135 } | |
136 | |
137 TEST_F(FPDFTextEmbeddertest, TextSearch) { | |
138 EXPECT_TRUE(OpenDocument("testing/resources/hello_world.pdf")); | |
139 FPDF_FORMHANDLE form_handle = SetFormFillEnvironment(); | |
140 FPDF_PAGE page = LoadPage(0, form_handle); | |
141 EXPECT_NE(nullptr, page); | |
142 | |
143 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
144 EXPECT_NE(nullptr, textpage); | |
145 | |
146 // Avoid issues with system wchar_t width vs. FPDF_WideString. | |
147 const unsigned short nope[] = { 'n', 'o', 'p', 'e', '\0' }; | |
148 const unsigned short world[] = { 'w', 'o', 'r', 'l', 'd', '\0' }; | |
149 const unsigned short world_caps[] = { 'W', 'O', 'R', 'L', 'D', '\0' }; | |
150 const unsigned short world_substr[] = { 'o', 'r', 'l', 'd', '\0' }; | |
151 | |
152 // No occurences of "nope" in test page. | |
153 FPDF_SCHHANDLE search = FPDFText_FindStart(textpage, nope, 0, 0); | |
154 EXPECT_NE(nullptr, search); | |
155 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
156 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
157 | |
158 // Advancing finds nothing. | |
159 EXPECT_FALSE(FPDFText_FindNext(search)); | |
160 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
161 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
162 | |
163 // Retreating finds nothing. | |
164 EXPECT_FALSE(FPDFText_FindPrev(search)); | |
165 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
166 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
167 FPDFText_FindClose(search); | |
168 | |
169 // Two occurences of "world" in test page. | |
170 search = FPDFText_FindStart(textpage, world, 0, 2); | |
171 EXPECT_NE(nullptr, search); | |
172 | |
173 // Remains not found until advanced. | |
174 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
175 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
176 | |
177 // First occurence of "world" in this test page. | |
178 EXPECT_TRUE(FPDFText_FindNext(search)); | |
179 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
180 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
181 | |
182 // Last occurence of "world" in this test page. | |
183 EXPECT_TRUE(FPDFText_FindNext(search)); | |
184 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); | |
185 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
186 | |
187 // Found position unchanged when fails to advance. | |
188 EXPECT_FALSE(FPDFText_FindNext(search)); | |
189 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); | |
190 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
191 | |
192 // Back to first occurence. | |
193 EXPECT_TRUE(FPDFText_FindPrev(search)); | |
194 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
195 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
196 | |
197 // Found position unchanged when fails to retreat. | |
198 EXPECT_FALSE(FPDFText_FindPrev(search)); | |
199 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
200 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
201 FPDFText_FindClose(search); | |
202 | |
203 // Exact search unaffected by case sensitiity and whole word flags. | |
204 search = FPDFText_FindStart( | |
205 textpage, world, FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0); | |
206 EXPECT_NE(nullptr, search); | |
207 EXPECT_TRUE(FPDFText_FindNext(search)); | |
208 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
209 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
210 FPDFText_FindClose(search); | |
211 | |
212 // Default is case-insensitive, so matching agaist caps works. | |
213 search = FPDFText_FindStart(textpage, world_caps, 0, 0); | |
214 EXPECT_NE(nullptr, search); | |
215 EXPECT_TRUE(FPDFText_FindNext(search)); | |
216 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
217 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
218 FPDFText_FindClose(search); | |
219 | |
220 // But can be made case sensitive, in which case this fails. | |
221 search = FPDFText_FindStart(textpage, world_caps, FPDF_MATCHCASE, 0); | |
222 EXPECT_FALSE(FPDFText_FindNext(search)); | |
223 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
224 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
225 FPDFText_FindClose(search); | |
226 | |
227 // Default is match anywhere within word, so matching substirng works. | |
228 search = FPDFText_FindStart(textpage, world_substr, 0, 0); | |
229 EXPECT_TRUE(FPDFText_FindNext(search)); | |
230 EXPECT_EQ(8, FPDFText_GetSchResultIndex(search)); | |
231 EXPECT_EQ(4, FPDFText_GetSchCount(search)); | |
232 FPDFText_FindClose(search); | |
233 | |
234 // But can be made to mach word boundaries, in which case this fails. | |
235 search = FPDFText_FindStart(textpage, world_substr, FPDF_MATCHWHOLEWORD, 0); | |
236 EXPECT_FALSE(FPDFText_FindNext(search)); | |
237 // TODO(tsepez): investigate strange index/count values in this state. | |
238 FPDFText_FindClose(search); | |
239 | |
240 FPDFText_ClosePage(textpage); | |
241 ClearFormFillEnvironment(form_handle); | |
242 } | |
243 | |
13 // Test that the page has characters despite a bad stream length. | 244 // Test that the page has characters despite a bad stream length. |
14 TEST_F(FPDFTextEmbeddertest, StreamLengthPastEndOfFile) { | 245 TEST_F(FPDFTextEmbeddertest, StreamLengthPastEndOfFile) { |
15 EXPECT_TRUE(OpenDocument("testing/resources/bug_57.pdf")); | 246 EXPECT_TRUE(OpenDocument("testing/resources/bug_57.pdf")); |
16 FPDF_FORMHANDLE form_handle = SetFormFillEnvironment(); | 247 FPDF_FORMHANDLE form_handle = SetFormFillEnvironment(); |
17 FPDF_PAGE page = LoadPage(0, form_handle); | 248 FPDF_PAGE page = LoadPage(0, form_handle); |
18 EXPECT_NE(nullptr, page); | 249 EXPECT_NE(nullptr, page); |
250 | |
19 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | 251 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); |
20 EXPECT_NE(nullptr, textpage); | 252 EXPECT_NE(nullptr, textpage); |
21 EXPECT_EQ(13, FPDFText_CountChars(textpage)); | 253 EXPECT_EQ(13, FPDFText_CountChars(textpage)); |
254 | |
255 FPDFText_ClosePage(textpage); | |
22 ClearFormFillEnvironment(form_handle); | 256 ClearFormFillEnvironment(form_handle); |
23 } | 257 } |
258 | |
259 TEST_F(FPDFTextEmbeddertest, WebLinks) { | |
260 EXPECT_TRUE(OpenDocument("testing/resources/weblinks.pdf")); | |
261 FPDF_FORMHANDLE form_handle = SetFormFillEnvironment(); | |
262 FPDF_PAGE page = LoadPage(0, form_handle); | |
263 EXPECT_NE(nullptr, page); | |
264 | |
265 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
266 EXPECT_NE(nullptr, textpage); | |
267 | |
268 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage); | |
269 EXPECT_NE(nullptr, pagelink); | |
270 | |
271 // Page contains two HTTP-style URLs. | |
272 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink)); | |
273 | |
274 // Only a terminating NUL required for bogus links. | |
275 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 2, nullptr, 0)); | |
276 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 1400, nullptr, 0)); | |
277 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, -1, nullptr, 0)); | |
278 | |
279 // Query the number of characters required for each link (incl NUL). | |
280 EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0)); | |
281 EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0)); | |
282 | |
283 const char expected_url[] = "http://example.com?q=foo"; | |
284 unsigned short fixed_buffer[128]; | |
285 | |
286 // Retrieve a link with too small a buffer. Buffer will not be | |
287 // NUL-terminated, but must not be modified past indicated length, | |
288 // so pre-fill with a pattern to check write bounds. | |
289 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
290 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 1)); | |
291 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 1)); | |
292 EXPECT_EQ(0xbdbd, fixed_buffer[1]); | |
293 | |
294 // Check buffer that doesn't have space for a terminating NUL. | |
295 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
296 EXPECT_EQ(sizeof(expected_url) - 1, FPDFLink_GetURL( | |
297 pagelink, 0, fixed_buffer, sizeof(expected_url) - 1)); | |
298 EXPECT_TRUE(check_unsigned_shorts( | |
299 expected_url, fixed_buffer, sizeof(expected_url) - 1)); | |
300 EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url) - 1]); | |
301 | |
302 // Retreive link with exactly-sized buffer. | |
303 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
304 EXPECT_EQ(sizeof(expected_url), FPDFLink_GetURL( | |
305 pagelink, 0, fixed_buffer, sizeof(expected_url))); | |
306 EXPECT_TRUE(check_unsigned_shorts( | |
307 expected_url, fixed_buffer, sizeof(expected_url))); | |
308 EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]); | |
309 EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]); | |
310 | |
311 // Retreive link with ample-sized-buffer. | |
312 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
313 EXPECT_EQ(sizeof(expected_url), FPDFLink_GetURL( | |
314 pagelink, 0, fixed_buffer, 128)); | |
315 EXPECT_TRUE(check_unsigned_shorts( | |
316 expected_url, fixed_buffer, sizeof(expected_url))); | |
317 EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]); | |
318 EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]); | |
319 | |
320 // Each link rendered in a single rect in this test page. | |
321 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0)); | |
322 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1)); | |
323 | |
324 // Each link rendered in a single rect in this test page. | |
325 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1)); | |
326 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2)); | |
327 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000)); | |
328 | |
329 // Check boundary of valid link with valid rect index. | |
330 double left = 0.0; | |
331 double right = 0.0; | |
332 double top = 0.0; | |
333 double bottom = 0.0; | |
334 FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom); | |
335 EXPECT_NEAR(50.791, left, 0.001); | |
336 EXPECT_NEAR(187.963, right, 0.001); | |
337 EXPECT_NEAR(97.624, bottom, 0.001); | |
338 EXPECT_NEAR(108.736, top, 0.001); | |
339 | |
340 // Check boundary of valid link with invalid rect index. | |
341 left = 0.0; | |
342 right = 0.0; | |
343 top = 0.0; | |
344 bottom = 0.0; | |
345 FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom); | |
346 EXPECT_EQ(0.0, left); | |
347 EXPECT_EQ(0.0, right); | |
348 EXPECT_EQ(0.0, bottom); | |
349 EXPECT_EQ(0.0, top); | |
350 | |
351 // Check bounardy of invalid link. | |
352 left = 0.0; | |
353 right = 0.0; | |
354 top = 0.0; | |
355 bottom = 0.0; | |
356 FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom); | |
357 EXPECT_EQ(0.0, left); | |
358 EXPECT_EQ(0.0, right); | |
359 EXPECT_EQ(0.0, bottom); | |
360 EXPECT_EQ(0.0, top); | |
361 | |
362 FPDFLink_CloseWebLinks(pagelink); | |
363 FPDFText_ClosePage(textpage); | |
364 ClearFormFillEnvironment(form_handle); | |
365 } | |
OLD | NEW |