Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(432)

Side by Side Diff: core/src/fpdftext/fpdf_text_int_unittest.cpp

Issue 1530763005: Correctly extracting email addresses (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "testing/gtest/include/gtest/gtest.h"
6
7 #include "core/src/fpdftext/text_int.h"
8
9 // Class to help test functions in CPDF_LinkExtract class.
10 class CPDF_TestLinkExtract : public CPDF_LinkExtract {
11 private:
12 // Add test cases as friends to access protected member functions.
13 // Access CheckMailLink.
14 FRIEND_TEST(fpdf_text_int, CheckMailLink);
15 };
16
17 TEST(fpdf_text_int, CheckMailLink) {
18 CPDF_TestLinkExtract extractor;
19 // Check cases that fail to extract valid mail link.
20 const wchar_t* invalid_strs[] = {
21 L"",
22 L"peter.pan" // '@' is required.
23 L"abc@server", // Host name needs at least one '.'.
Lei Zhang 2015/12/18 00:24:09 As I mentioned previously, we need to investigate
Wei Li 2015/12/18 01:12:21 Added in cpp file.
24 L"abc.@gmail.com", // '.' can not immediately precede '@'.
25 L"abc@xyz&q.org", // Host name should not contain '&'.
26 L"abc@.xyz.org", // Host name should not start with '.'.
27 L"fan@g..com" // Host name should not have consecutive '.'
28 };
29 for (int i = 0; i < FX_ArraySize(invalid_strs); ++i) {
30 CFX_WideString text_str(invalid_strs[i]);
31 EXPECT_EQ(FALSE, extractor.CheckMailLink(text_str));
Lei Zhang 2015/12/18 00:24:09 It should be trivial to convert CheckMailLink() to
Wei Li 2015/12/18 01:12:21 Done.
32 }
33
34 // Check cases that can extract valid mail link.
35 // An array of {input_string, expected_extracted_email_address}.
36 const wchar_t* valid_strs[][2] = {
37 {L"peter@abc.d", L"peter@abc.d"},
38 {L"red.teddy.b@abc.com", L"red.teddy.b@abc.com"},
39 {L"abc_@gmail.com", L"abc_@gmail.com"}, // '_' is ok before '@'.
40 {L"dummy-hi@gmail.com",
41 L"dummy-hi@gmail.com"}, // '-' is ok in user name.
42 {L"a..df@gmail.com", L"df@gmail.com"}, // Stop at consecutive '.'.
43 {L".john@yahoo.com", L"john@yahoo.com"}, // Remove heading '.'.
44 {L"abc@xyz.org?/", L"abc@xyz.org"}, // Trim ending invalid chars.
45 {L"fan{abc@xyz.org", L"abc@xyz.org"}, // Trim beginning invalid chars.
46 {L"fan@g.com..", L"fan@g.com"}, // Trim the ending periods.
47 {L"CAP.cap@Gmail.Com", L"CAP.cap@Gmail.Com"}, // Keep the original case.
48 };
49 for (int i = 0; i < FX_ArraySize(valid_strs); ++i) {
50 CFX_WideString text_str(valid_strs[i][0]);
51 CFX_WideString expected_str(L"mailto:");
52 expected_str += valid_strs[i][1];
53 EXPECT_EQ(TRUE, extractor.CheckMailLink(text_str));
54 EXPECT_STREQ(text_str.c_str(), expected_str.c_str());
55 }
56 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698