OLD | NEW |
---|---|
1 // Copyright 2015 PDFium Authors. All rights reserved. | 1 // Copyright 2015 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "../../testing/embedder_test.h" | 5 #include "../../testing/embedder_test.h" |
6 #include "../../fpdfsdk/include/fpdfview.h" | 6 #include "../../fpdfsdk/include/fpdfview.h" |
7 #include "../../fpdfsdk/include/fpdftext.h" | 7 #include "../../fpdfsdk/include/fpdftext.h" |
8 #include "testing/gtest/include/gtest/gtest.h" | 8 #include "testing/gtest/include/gtest/gtest.h" |
9 | 9 |
10 namespace { | |
11 | |
12 static bool check_unsigned_shorts(const char* chars, | |
Lei Zhang
2015/01/28 02:27:52
Maybe rename this to "expected" and rename "buffer
Tom Sepez
2015/01/28 18:12:12
Done.
| |
13 const unsigned short* buffer, | |
14 int length) { | |
Lei Zhang
2015/01/28 02:27:52
Why not make |length| a size_t and avoid the cast
Tom Sepez
2015/01/28 18:12:13
Done.
| |
15 if (length > static_cast<int>(strlen(chars)) + 1) { | |
16 return false; | |
17 } | |
18 for (int i = 0; i < length; ++i) { | |
19 if (buffer[i] != static_cast<unsigned short>(chars[i])) { | |
20 return false; | |
21 } | |
22 } | |
23 return true; | |
24 } | |
25 | |
26 static int three_places(double value) { | |
27 return static_cast<int>(1000.0 * value); | |
28 } | |
29 | |
30 } // namespace | |
31 | |
10 class FPDFTextEmbeddertest : public EmbedderTest { | 32 class FPDFTextEmbeddertest : public EmbedderTest { |
11 }; | 33 }; |
12 | 34 |
35 TEST_F(FPDFTextEmbeddertest, Text) { | |
36 EXPECT_TRUE(OpenDocument("testing/resources/hello_world.pdf")); | |
37 FPDF_FORMHANDLE form_handle = SetFormFillEnvironment(); | |
Lei Zhang
2015/01/28 02:27:51
Maybe the FPDFTextEmbeddertest class should have S
Tom Sepez
2015/01/28 18:12:12
Problem is that creating a form_handle requires a
| |
38 FPDF_PAGE page = LoadPage(0, form_handle); | |
39 EXPECT_NE(nullptr, page); | |
40 | |
41 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
42 EXPECT_NE(nullptr, textpage); | |
43 EXPECT_EQ(30, FPDFText_CountChars(textpage)); | |
Lei Zhang
2015/01/28 02:27:52
Can we use sizeof(expected) instead of 30 and 31?
Tom Sepez
2015/01/28 18:12:13
Done.
| |
44 | |
45 const char* expected = "Hello, world!\r\nGoodbye, world!"; | |
Lei Zhang
2015/01/28 02:27:52
const char expected[] = "...";
Tom Sepez
2015/01/28 18:12:13
Ok, was trying to avoid a copy into the stack var,
| |
46 unsigned short fixed_buffer[128]; | |
47 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
48 EXPECT_EQ(31, FPDFText_GetText(textpage, 0, 128, fixed_buffer)); | |
49 EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, 31)); | |
50 | |
51 for (int i = 0; i < 31; ++i) { | |
52 EXPECT_EQ(expected[i], FPDFText_GetUnicode(textpage, i)) << " at " << i; | |
53 } | |
54 | |
55 EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0)); | |
56 EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15)); | |
57 | |
58 double left = 0.0; | |
59 double right = 0.0; | |
60 double bottom = 0.0; | |
61 double top = 0.0; | |
62 FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top); | |
63 EXPECT_EQ(three_places(41.071), three_places(left)); | |
Lei Zhang
2015/01/28 02:27:52
Why not just use EXPECT_FLOAT_EQ() and drop three_
Tom Sepez
2015/01/28 18:12:13
Ah, that's almost the magic I was looking for. EXP
| |
64 EXPECT_EQ(three_places(46.243), three_places(right)); | |
65 EXPECT_EQ(three_places(49.844), three_places(bottom)); | |
66 EXPECT_EQ(three_places(55.520), three_places(top)); | |
67 | |
68 EXPECT_EQ(4, FPDFText_GetCharIndexAtPos( | |
69 textpage, 42.0, 50.0, 1.0, 1.0)); | |
70 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos( | |
71 textpage, 0.0, 0.0, 1.0, 1.0)); | |
72 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos( | |
73 textpage, 199.0, 199.0, 1.0, 1.0)); | |
74 | |
75 // Test out of range indicies. | |
76 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos( | |
77 textpage, 42.0, 10000000.0, 1.0, 1.0)); | |
78 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos( | |
79 textpage, -1.0, 50.0, 1.0, 1.0)); | |
80 | |
81 EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, 30)); | |
82 | |
83 left = 0.0; | |
84 right = 0.0; | |
85 bottom = 0.0; | |
86 top = 0.0; | |
87 FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom); | |
88 EXPECT_EQ(three_places(20.847), three_places(left)); | |
89 EXPECT_EQ(three_places(135.167), three_places(right)); | |
90 EXPECT_EQ(three_places(96.655), three_places(bottom)); | |
91 EXPECT_EQ(three_places(116.000), three_places(top)); | |
92 | |
93 // Test out of range indicies. | |
94 left = 0.0; | |
95 right = 0.0; | |
96 bottom = 0.0; | |
97 top = 0.0; | |
98 FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom); | |
99 EXPECT_EQ(0.0, left); | |
100 EXPECT_EQ(0.0, right); | |
101 EXPECT_EQ(0.0, bottom); | |
102 EXPECT_EQ(0.0, top); | |
103 | |
104 FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom); | |
105 EXPECT_EQ(0.0, left); | |
106 EXPECT_EQ(0.0, right); | |
107 EXPECT_EQ(0.0, bottom); | |
108 EXPECT_EQ(0.0, top); | |
109 | |
110 EXPECT_EQ(9, FPDFText_GetBoundedText( | |
111 textpage, 41.0, 56.0, 82.0, 48.0, 0, 0)); | |
112 | |
113 // Extract starting at character 4 as above. | |
114 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
115 EXPECT_EQ(1, FPDFText_GetBoundedText( | |
116 textpage, 41.0, 56.0, 82.0, 48.0, fixed_buffer, 1)); | |
117 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 1)); | |
118 EXPECT_EQ(0xbdbd, fixed_buffer[1]); | |
119 | |
120 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
121 EXPECT_EQ(9, FPDFText_GetBoundedText( | |
122 textpage, 41.0, 56.0, 82.0, 48.0, fixed_buffer, 9)); | |
123 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); | |
124 EXPECT_EQ(0xbdbd, fixed_buffer[9]); | |
125 | |
126 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
127 EXPECT_EQ(10, FPDFText_GetBoundedText( | |
128 textpage, 41.0, 56.0, 82.0, 48.0, fixed_buffer, 128)); | |
129 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); | |
130 EXPECT_EQ(0u, fixed_buffer[9]); | |
131 EXPECT_EQ(0xbdbd, fixed_buffer[10]); | |
132 | |
133 FPDFText_ClosePage(textpage); | |
134 ClearFormFillEnvironment(form_handle); | |
135 } | |
136 | |
137 TEST_F(FPDFTextEmbeddertest, TextSearch) { | |
138 EXPECT_TRUE(OpenDocument("testing/resources/hello_world.pdf")); | |
139 FPDF_FORMHANDLE form_handle = SetFormFillEnvironment(); | |
140 FPDF_PAGE page = LoadPage(0, form_handle); | |
141 EXPECT_NE(nullptr, page); | |
142 | |
143 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
144 EXPECT_NE(nullptr, textpage); | |
145 | |
146 // Avoid issues with system wchar_t width vs. FPDF_WideString. | |
147 const unsigned short nope[] = { 'n', 'o', 'p', 'e', '\0' }; | |
148 const unsigned short world[] = { 'w', 'o', 'r', 'l', 'd', '\0' }; | |
149 const unsigned short world_caps[] = { 'W', 'O', 'R', 'L', 'D', '\0' }; | |
150 const unsigned short world_substr[] = { 'o', 'r', 'l', 'd', '\0' }; | |
151 | |
152 // No occurances of "nope" in test page. | |
153 FPDF_SCHHANDLE search = FPDFText_FindStart(textpage, nope, 0, 0); | |
154 EXPECT_NE(nullptr, search); | |
155 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
156 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
157 | |
158 // Advancing finds nothing. | |
159 EXPECT_FALSE(FPDFText_FindNext(search)); | |
160 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
161 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
162 | |
163 // Retreating finds nothing. | |
164 EXPECT_FALSE(FPDFText_FindPrev(search)); | |
165 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
166 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
167 FPDFText_FindClose(search); | |
168 | |
169 // Two occurances of "world" in test page. | |
Lei Zhang
2015/01/28 02:27:51
occurrences, ditto below
Tom Sepez
2015/01/28 18:12:13
Done.
| |
170 search = FPDFText_FindStart(textpage, world, 0, 2); | |
171 EXPECT_NE(nullptr, search); | |
172 | |
173 // Remains not found until advanced. | |
174 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
175 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
176 | |
177 // First occurance of "world" in this test page. | |
178 EXPECT_TRUE(FPDFText_FindNext(search)); | |
179 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
180 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
181 | |
182 // Last occurance of "world" in this test page. | |
183 EXPECT_TRUE(FPDFText_FindNext(search)); | |
184 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); | |
185 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
186 | |
187 // Found position unchanged when fails to advance. | |
188 EXPECT_FALSE(FPDFText_FindNext(search)); | |
189 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); | |
190 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
191 | |
192 // Back to first occurance. | |
193 EXPECT_TRUE(FPDFText_FindPrev(search)); | |
194 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
195 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
196 | |
197 // Found position unchanged when fails to retreat. | |
198 EXPECT_FALSE(FPDFText_FindPrev(search)); | |
199 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
200 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
201 FPDFText_FindClose(search); | |
202 | |
203 // Exact search unaffected by case sensitiity and whole word flags. | |
204 search = FPDFText_FindStart( | |
205 textpage, world, FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0); | |
206 EXPECT_NE(nullptr, search); | |
207 EXPECT_TRUE(FPDFText_FindNext(search)); | |
208 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
209 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
210 FPDFText_FindClose(search); | |
211 | |
212 // Default is case-insensitive, so matching agaist caps works. | |
213 search = FPDFText_FindStart(textpage, world_caps, 0, 0); | |
214 EXPECT_NE(nullptr, search); | |
215 EXPECT_TRUE(FPDFText_FindNext(search)); | |
216 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); | |
217 EXPECT_EQ(5, FPDFText_GetSchCount(search)); | |
218 FPDFText_FindClose(search); | |
219 | |
220 // But can be made case sensitive, in which case this fails. | |
221 search = FPDFText_FindStart(textpage, world_caps, FPDF_MATCHCASE, 0); | |
222 EXPECT_FALSE(FPDFText_FindNext(search)); | |
223 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); | |
224 EXPECT_EQ(0, FPDFText_GetSchCount(search)); | |
225 FPDFText_FindClose(search); | |
226 | |
227 // Default is match anywhere within word, so matching substirng works. | |
228 search = FPDFText_FindStart(textpage, world_substr, 0, 0); | |
229 EXPECT_TRUE(FPDFText_FindNext(search)); | |
230 EXPECT_EQ(8, FPDFText_GetSchResultIndex(search)); | |
231 EXPECT_EQ(4, FPDFText_GetSchCount(search)); | |
232 FPDFText_FindClose(search); | |
233 | |
234 // But can be made to mach word boundaries, in which case this fails. | |
235 search = FPDFText_FindStart(textpage, world_substr, FPDF_MATCHWHOLEWORD, 0); | |
236 EXPECT_FALSE(FPDFText_FindNext(search)); | |
237 // TODO(tsepez): investigate strange index/count values in this state. | |
238 FPDFText_FindClose(search); | |
239 | |
240 FPDFText_ClosePage(textpage); | |
241 ClearFormFillEnvironment(form_handle); | |
242 } | |
243 | |
13 // Test that the page has characters despite a bad stream length. | 244 // Test that the page has characters despite a bad stream length. |
14 TEST_F(FPDFTextEmbeddertest, StreamLengthPastEndOfFile) { | 245 TEST_F(FPDFTextEmbeddertest, StreamLengthPastEndOfFile) { |
15 EXPECT_TRUE(OpenDocument("testing/resources/bug_57.pdf")); | 246 EXPECT_TRUE(OpenDocument("testing/resources/bug_57.pdf")); |
16 FPDF_FORMHANDLE form_handle = SetFormFillEnvironment(); | 247 FPDF_FORMHANDLE form_handle = SetFormFillEnvironment(); |
17 FPDF_PAGE page = LoadPage(0, form_handle); | 248 FPDF_PAGE page = LoadPage(0, form_handle); |
18 EXPECT_NE(nullptr, page); | 249 EXPECT_NE(nullptr, page); |
250 | |
19 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | 251 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); |
20 EXPECT_NE(nullptr, textpage); | 252 EXPECT_NE(nullptr, textpage); |
21 EXPECT_EQ(13, FPDFText_CountChars(textpage)); | 253 EXPECT_EQ(13, FPDFText_CountChars(textpage)); |
254 | |
255 FPDFText_ClosePage(textpage); | |
22 ClearFormFillEnvironment(form_handle); | 256 ClearFormFillEnvironment(form_handle); |
23 } | 257 } |
258 | |
259 TEST_F(FPDFTextEmbeddertest, WebLinks) { | |
260 EXPECT_TRUE(OpenDocument("testing/resources/weblinks.pdf")); | |
261 FPDF_FORMHANDLE form_handle = SetFormFillEnvironment(); | |
262 FPDF_PAGE page = LoadPage(0, form_handle); | |
263 EXPECT_NE(nullptr, page); | |
264 | |
265 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); | |
266 EXPECT_NE(nullptr, textpage); | |
267 | |
268 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage); | |
269 EXPECT_NE(nullptr, pagelink); | |
270 | |
271 // Page contains two HTTP-style URLs. | |
272 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink)); | |
273 | |
274 // Only a terminating NUL required for bogus links. | |
275 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 2, nullptr, 0)); | |
276 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 1400, nullptr, 0)); | |
277 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, -1, nullptr, 0)); | |
278 | |
279 // Query the number of characters required for each link (incl NUL). | |
280 EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0)); | |
281 EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0)); | |
282 | |
283 const char* expected_url = "http://example.com?q=foo"; | |
Lei Zhang
2015/01/28 02:27:52
also const char foo[]
Tom Sepez
2015/01/28 18:12:13
Done.
| |
284 unsigned short fixed_buffer[128]; | |
285 | |
286 // Retrieve a link with too small a buffer. Buffer will not be | |
287 // NUL-terminated, but must not be modified past indicated length, | |
288 // so pre-fill with a pattern to check write bounds. | |
289 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
290 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 1)); | |
291 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 1)); | |
292 EXPECT_EQ(0xbdbd, fixed_buffer[1]); | |
293 | |
294 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
295 EXPECT_EQ(24, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 24)); | |
296 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 24)); | |
297 EXPECT_EQ(0xbdbd, fixed_buffer[24]); | |
298 | |
299 // Retreive link with ample-sized-buffer. | |
300 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
301 EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 25)); | |
302 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 25)); | |
303 EXPECT_EQ(0u, fixed_buffer[24]); | |
304 EXPECT_EQ(0xbdbd, fixed_buffer[25]); | |
305 | |
306 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); | |
307 EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 128)); | |
308 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 25)); | |
309 EXPECT_EQ(0u, fixed_buffer[24]); | |
310 EXPECT_EQ(0xbdbd, fixed_buffer[25]); | |
311 | |
312 // Each link rendered in a single rect in this test page. | |
313 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0)); | |
314 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1)); | |
315 | |
316 // Each link rendered in a single rect in this test page. | |
317 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1)); | |
318 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2)); | |
319 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000)); | |
320 | |
321 // Check boundary of valid link with valid rect index. | |
322 double left = 0.0; | |
323 double right = 0.0; | |
324 double top = 0.0; | |
325 double bottom = 0.0; | |
326 FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom); | |
327 EXPECT_EQ(three_places(50.791), three_places(left)); | |
328 EXPECT_EQ(three_places(187.963), three_places(right)); | |
329 EXPECT_EQ(three_places(97.624), three_places(bottom)); | |
330 EXPECT_EQ(three_places(108.736), three_places(top)); | |
331 | |
332 // Check boundary of valid link with invalid rect index. | |
333 left = 0.0; | |
334 right = 0.0; | |
335 top = 0.0; | |
336 bottom = 0.0; | |
337 FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom); | |
338 EXPECT_EQ(0.0, left); | |
339 EXPECT_EQ(0.0, right); | |
340 EXPECT_EQ(0.0, bottom); | |
341 EXPECT_EQ(0.0, top); | |
342 | |
343 // Check bounardy of invalid link. | |
344 left = 0.0; | |
345 right = 0.0; | |
346 top = 0.0; | |
347 bottom = 0.0; | |
348 FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom); | |
349 EXPECT_EQ(0.0, left); | |
350 EXPECT_EQ(0.0, right); | |
351 EXPECT_EQ(0.0, bottom); | |
352 EXPECT_EQ(0.0, top); | |
353 | |
354 FPDFLink_CloseWebLinks(pagelink); | |
355 FPDFText_ClosePage(textpage); | |
356 ClearFormFillEnvironment(form_handle); | |
357 } | |
OLD | NEW |