Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(451)

Side by Side Diff: core/src/fpdftext/fpdf_text_int.cpp

Issue 1676913004: Banish CFX_WordArray to XFA-land. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "core/src/fpdftext/text_int.h" 7 #include "core/src/fpdftext/text_int.h"
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <cctype> 10 #include <cctype>
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
137 FX_BOOL CPDF_TextPage::ParseTextPage() { 137 FX_BOOL CPDF_TextPage::ParseTextPage() {
138 m_bIsParsed = false; 138 m_bIsParsed = false;
139 if (!m_pPage) 139 if (!m_pPage)
140 return FALSE; 140 return FALSE;
141 141
142 m_TextBuf.Clear(); 142 m_TextBuf.Clear();
143 m_CharList.clear(); 143 m_CharList.clear();
144 m_pPreTextObj = NULL; 144 m_pPreTextObj = NULL;
145 ProcessObject(); 145 ProcessObject();
146 m_bIsParsed = true; 146 m_bIsParsed = true;
147 m_CharIndex.RemoveAll(); 147 m_CharIndex.clear();
148 int nCount = pdfium::CollectionSize<int>(m_CharList); 148 int nCount = pdfium::CollectionSize<int>(m_CharList);
149 if (nCount) { 149 if (nCount) {
150 m_CharIndex.Add(0); 150 m_CharIndex.push_back(0);
151 } 151 }
152 for (int i = 0; i < nCount; i++) { 152 for (int i = 0; i < nCount; i++) {
153 int indexSize = m_CharIndex.GetSize(); 153 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
154 FX_BOOL bNormal = FALSE; 154 FX_BOOL bNormal = FALSE;
155 const PAGECHAR_INFO& charinfo = m_CharList[i]; 155 const PAGECHAR_INFO& charinfo = m_CharList[i];
156 if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { 156 if (charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
157 bNormal = TRUE; 157 bNormal = TRUE;
158 } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) { 158 } else if (charinfo.m_Unicode == 0 || IsControlChar(charinfo)) {
159 bNormal = FALSE; 159 bNormal = FALSE;
160 } else { 160 } else {
161 bNormal = TRUE; 161 bNormal = TRUE;
162 } 162 }
163 if (bNormal) { 163 if (bNormal) {
164 if (indexSize % 2) { 164 if (indexSize % 2) {
165 m_CharIndex.Add(1); 165 m_CharIndex.push_back(1);
166 } else { 166 } else {
167 if (indexSize <= 0) { 167 if (indexSize <= 0) {
168 continue; 168 continue;
169 } 169 }
170 m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1) + 1); 170 m_CharIndex[indexSize - 1] += 1;
171 } 171 }
172 } else { 172 } else {
173 if (indexSize % 2) { 173 if (indexSize % 2) {
174 if (indexSize <= 0) { 174 if (indexSize <= 0) {
175 continue; 175 continue;
176 } 176 }
177 m_CharIndex.SetAt(indexSize - 1, i + 1); 177 m_CharIndex[indexSize - 1] = i + 1;
178 } else { 178 } else {
179 m_CharIndex.Add(i + 1); 179 m_CharIndex.push_back(i + 1);
180 } 180 }
181 } 181 }
182 } 182 }
183 int indexSize = m_CharIndex.GetSize(); 183 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
184 if (indexSize % 2) { 184 if (indexSize % 2) {
185 m_CharIndex.RemoveAt(indexSize - 1); 185 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1);
186 } 186 }
187 return TRUE; 187 return TRUE;
188 } 188 }
189 int CPDF_TextPage::CountChars() const { 189 int CPDF_TextPage::CountChars() const {
190 return pdfium::CollectionSize<int>(m_CharList); 190 return pdfium::CollectionSize<int>(m_CharList);
191 } 191 }
192 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const { 192 int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const {
193 int indexSize = m_CharIndex.GetSize(); 193 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
194 int count = 0; 194 int count = 0;
195 for (int i = 0; i < indexSize; i += 2) { 195 for (int i = 0; i < indexSize; i += 2) {
196 count += m_CharIndex.GetAt(i + 1); 196 count += m_CharIndex[i + 1];
197 if (count > TextIndex) { 197 if (count > TextIndex)
198 return TextIndex - count + m_CharIndex.GetAt(i + 1) + 198 return TextIndex - count + m_CharIndex[i + 1] + m_CharIndex[i];
199 m_CharIndex.GetAt(i);
200 }
201 } 199 }
202 return -1; 200 return -1;
203 } 201 }
204 int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const { 202 int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const {
205 int indexSize = m_CharIndex.GetSize(); 203 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
206 int count = 0; 204 int count = 0;
207 for (int i = 0; i < indexSize; i += 2) { 205 for (int i = 0; i < indexSize; i += 2) {
208 count += m_CharIndex.GetAt(i + 1); 206 count += m_CharIndex[i + 1];
209 if (m_CharIndex.GetAt(i + 1) + m_CharIndex.GetAt(i) > CharIndex) { 207 if (m_CharIndex[i + 1] + m_CharIndex[i] > CharIndex) {
210 if (CharIndex - m_CharIndex.GetAt(i) < 0) { 208 if (CharIndex - m_CharIndex[i] < 0)
211 return -1; 209 return -1;
212 } 210
213 return CharIndex - m_CharIndex.GetAt(i) + count - 211 return CharIndex - m_CharIndex[i] + count - m_CharIndex[i + 1];
214 m_CharIndex.GetAt(i + 1);
215 } 212 }
216 } 213 }
217 return -1; 214 return -1;
218 } 215 }
219 void CPDF_TextPage::GetRectArray(int start, 216 void CPDF_TextPage::GetRectArray(int start,
220 int nCount, 217 int nCount,
221 CFX_RectArray& rectArray) const { 218 CFX_RectArray& rectArray) const {
222 if (start < 0 || nCount == 0) { 219 if (start < 0 || nCount == 0) {
223 return; 220 return;
224 } 221 }
(...skipping 761 matching lines...) Expand 10 before | Expand all | Expand 10 after
986 info.m_Index = -1; 983 info.m_Index = -1;
987 } 984 }
988 m_CharList.push_back(info); 985 m_CharList.push_back(info);
989 } 986 }
990 void CPDF_TextPage::CloseTempLine() { 987 void CPDF_TextPage::CloseTempLine() {
991 if (m_TempCharList.empty()) { 988 if (m_TempCharList.empty()) {
992 return; 989 return;
993 } 990 }
994 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar); 991 std::unique_ptr<CFX_BidiChar> pBidiChar(new CFX_BidiChar);
995 CFX_WideString str = m_TempTextBuf.GetWideString(); 992 CFX_WideString str = m_TempTextBuf.GetWideString();
996 CFX_WordArray order; 993 std::vector<FX_WORD> order;
997 FX_BOOL bR2L = FALSE; 994 FX_BOOL bR2L = FALSE;
998 int32_t start = 0, count = 0; 995 int32_t start = 0, count = 0;
999 int nR2L = 0, nL2R = 0; 996 int nR2L = 0, nL2R = 0;
1000 FX_BOOL bPrevSpace = FALSE; 997 FX_BOOL bPrevSpace = FALSE;
1001 for (int i = 0; i < str.GetLength(); i++) { 998 for (int i = 0; i < str.GetLength(); i++) {
1002 if (str.GetAt(i) == 32) { 999 if (str.GetAt(i) == 32) {
1003 if (bPrevSpace) { 1000 if (bPrevSpace) {
1004 m_TempTextBuf.Delete(i, 1); 1001 m_TempTextBuf.Delete(i, 1);
1005 m_TempCharList.erase(m_TempCharList.begin() + i); 1002 m_TempCharList.erase(m_TempCharList.begin() + i);
1006 str.Delete(i); 1003 str.Delete(i);
1007 i--; 1004 i--;
1008 continue; 1005 continue;
1009 } 1006 }
1010 bPrevSpace = TRUE; 1007 bPrevSpace = TRUE;
1011 } else { 1008 } else {
1012 bPrevSpace = FALSE; 1009 bPrevSpace = FALSE;
1013 } 1010 }
1014 if (pBidiChar->AppendChar(str.GetAt(i))) { 1011 if (pBidiChar->AppendChar(str.GetAt(i))) {
1015 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); 1012 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count);
1016 order.Add(start); 1013 order.push_back(start);
1017 order.Add(count); 1014 order.push_back(count);
1018 order.Add(ret); 1015 order.push_back(ret);
1019 if (!bR2L) { 1016 if (!bR2L) {
1020 if (ret == CFX_BidiChar::RIGHT) { 1017 if (ret == CFX_BidiChar::RIGHT) {
1021 nR2L++; 1018 nR2L++;
1022 } else if (ret == CFX_BidiChar::LEFT) { 1019 } else if (ret == CFX_BidiChar::LEFT) {
1023 nL2R++; 1020 nL2R++;
1024 } 1021 }
1025 } 1022 }
1026 } 1023 }
1027 } 1024 }
1028 if (pBidiChar->EndChar()) { 1025 if (pBidiChar->EndChar()) {
1029 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count); 1026 CFX_BidiChar::Direction ret = pBidiChar->GetBidiInfo(&start, &count);
1030 order.Add(start); 1027 order.push_back(start);
1031 order.Add(count); 1028 order.push_back(count);
1032 order.Add(ret); 1029 order.push_back(ret);
1033 if (!bR2L) { 1030 if (!bR2L) {
1034 if (ret == CFX_BidiChar::RIGHT) { 1031 if (ret == CFX_BidiChar::RIGHT) {
1035 nR2L++; 1032 nR2L++;
1036 } else if (ret == CFX_BidiChar::LEFT) { 1033 } else if (ret == CFX_BidiChar::LEFT) {
1037 nL2R++; 1034 nL2R++;
1038 } 1035 }
1039 } 1036 }
1040 } 1037 }
1041 if (nR2L > 0 && nR2L >= nL2R) { 1038 if (nR2L > 0 && nR2L >= nL2R) {
1042 bR2L = TRUE; 1039 bR2L = TRUE;
1043 } 1040 }
1044 if (m_parserflag == FPDFTEXT_RLTB || bR2L) { 1041 if (m_parserflag == FPDFTEXT_RLTB || bR2L) {
1045 int count = order.GetSize(); 1042 int count = order.size();
Lei Zhang 2016/02/09 00:45:21 pdfium::CollectionSize<int>(order);
1046 for (int i = count - 1; i > 0; i -= 3) { 1043 for (int i = count - 1; i > 0; i -= 3) {
1047 int ret = order.GetAt(i); 1044 int ret = order[i];
1048 int start = order.GetAt(i - 2); 1045 int count1 = order[i - 1];
1049 int count1 = order.GetAt(i - 1); 1046 int start = order[i - 2];
1050 if (ret == 2 || ret == 0) { 1047 if (ret == 2 || ret == 0) {
1051 for (int j = start + count1 - 1; j >= start; j--) { 1048 for (int j = start + count1 - 1; j >= start; j--) {
1052 AddCharInfoByRLDirection(str, j); 1049 AddCharInfoByRLDirection(str, j);
1053 } 1050 }
1054 } else { 1051 } else {
1055 int j = i; 1052 int j = i;
1056 FX_BOOL bSymbol = FALSE; 1053 FX_BOOL bSymbol = FALSE;
1057 while (j > 0 && order.GetAt(j) != 2) { 1054 while (j > 0 && order[j] != 2) {
1058 bSymbol = !order.GetAt(j); 1055 bSymbol = !order[j];
1059 j -= 3; 1056 j -= 3;
1060 } 1057 }
1061 int end = start + count1; 1058 int end = start + count1;
1062 int n = 0; 1059 int n = 0;
1063 if (bSymbol) { 1060 if (bSymbol) {
1064 n = j + 6; 1061 n = j + 6;
1065 } else { 1062 } else {
1066 n = j + 3; 1063 n = j + 3;
1067 } 1064 }
1068 if (n >= i) { 1065 if (n >= i) {
1069 for (int m = start; m < end; m++) { 1066 for (int m = start; m < end; m++) {
1070 AddCharInfoByLRDirection(str, m); 1067 AddCharInfoByLRDirection(str, m);
1071 } 1068 }
1072 } else { 1069 } else {
1073 j = i; 1070 j = i;
1074 i = n; 1071 i = n;
1075 for (; n <= j; n += 3) { 1072 for (; n <= j; n += 3) {
1076 int start = order.GetAt(n - 2); 1073 int start = order[n - 2];
1077 int count1 = order.GetAt(n - 1); 1074 int count1 = order[n - 1];
1078 int end = start + count1; 1075 int end = start + count1;
1079 for (int m = start; m < end; m++) { 1076 for (int m = start; m < end; m++) {
1080 AddCharInfoByLRDirection(str, m); 1077 AddCharInfoByLRDirection(str, m);
1081 } 1078 }
1082 } 1079 }
1083 } 1080 }
1084 } 1081 }
1085 } 1082 }
1086 } else { 1083 } else {
1087 int count = order.GetSize(); 1084 int count = order.size();
1088 FX_BOOL bL2R = FALSE; 1085 FX_BOOL bL2R = FALSE;
1089 for (int i = 0; i < count; i += 3) { 1086 for (int i = 0; i < count; i += 3) {
1090 int ret = order.GetAt(i + 2); 1087 int start = order[i];
1091 int start = order.GetAt(i); 1088 int count1 = order[i + 1];
1092 int count1 = order.GetAt(i + 1); 1089 int ret = order[i + 2];
1093 if (ret == 2 || (i == 0 && ret == 0 && !bL2R)) { 1090 if (ret == 2 || (i == 0 && ret == 0 && !bL2R)) {
1094 int j = i + 3; 1091 int j = i + 3;
1095 while (bR2L && j < count) { 1092 while (bR2L && j < count) {
1096 if (order.GetAt(j + 2) == 1) { 1093 if (order[j + 2] == 1)
1097 break; 1094 break;
1098 } else { 1095 j += 3;
1099 j += 3;
1100 }
1101 } 1096 }
1102 if (j == 3) { 1097 if (j == 3) {
1103 i = -3; 1098 i = -3;
1104 bL2R = TRUE; 1099 bL2R = TRUE;
1105 continue; 1100 continue;
1106 } 1101 }
1107 int end = pdfium::CollectionSize<int>(m_TempCharList) - 1; 1102 int end = pdfium::CollectionSize<int>(m_TempCharList) - 1;
1108 if (j < count) { 1103 if (j < count) {
1109 end = order.GetAt(j) - 1; 1104 end = order[j] - 1;
1110 } 1105 }
1111 i = j - 3; 1106 i = j - 3;
1112 for (int n = end; n >= start; n--) { 1107 for (int n = end; n >= start; n--) {
1113 AddCharInfoByRLDirection(str, n); 1108 AddCharInfoByRLDirection(str, n);
1114 } 1109 }
1115 } else { 1110 } else {
1116 int end = start + count1; 1111 int end = start + count1;
1117 for (int n = start; n < end; n++) { 1112 for (int n = start; n < end; n++) {
1118 AddCharInfoByLRDirection(str, n); 1113 AddCharInfoByLRDirection(str, n);
1119 } 1114 }
1120 } 1115 }
1121 } 1116 }
1122 } 1117 }
1123 order.RemoveAll();
1124 m_TempCharList.clear(); 1118 m_TempCharList.clear();
1125 m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength()); 1119 m_TempTextBuf.Delete(0, m_TempTextBuf.GetLength());
1126 } 1120 }
1127 void CPDF_TextPage::ProcessTextObject(CPDF_TextObject* pTextObj, 1121 void CPDF_TextPage::ProcessTextObject(CPDF_TextObject* pTextObj,
1128 const CFX_Matrix& formMatrix, 1122 const CFX_Matrix& formMatrix,
1129 FX_POSITION ObjPos) { 1123 FX_POSITION ObjPos) {
1130 CFX_FloatRect re(pTextObj->m_Left, pTextObj->m_Bottom, pTextObj->m_Right, 1124 CFX_FloatRect re(pTextObj->m_Left, pTextObj->m_Bottom, pTextObj->m_Right,
1131 pTextObj->m_Top); 1125 pTextObj->m_Top);
1132 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) { 1126 if (FXSYS_fabs(pTextObj->m_Right - pTextObj->m_Left) < 0.01f) {
1133 return; 1127 return;
(...skipping 894 matching lines...) Expand 10 before | Expand all | Expand 10 after
2028 m_findNextStart(-1), 2022 m_findNextStart(-1),
2029 m_findPreStart(-1), 2023 m_findPreStart(-1),
2030 m_bMatchCase(FALSE), 2024 m_bMatchCase(FALSE),
2031 m_bMatchWholeWord(FALSE), 2025 m_bMatchWholeWord(FALSE),
2032 m_resStart(0), 2026 m_resStart(0),
2033 m_resEnd(-1), 2027 m_resEnd(-1),
2034 m_IsFind(FALSE) { 2028 m_IsFind(FALSE) {
2035 m_strText = m_pTextPage->GetPageText(); 2029 m_strText = m_pTextPage->GetPageText();
2036 int nCount = pTextPage->CountChars(); 2030 int nCount = pTextPage->CountChars();
2037 if (nCount) { 2031 if (nCount) {
2038 m_CharIndex.Add(0); 2032 m_CharIndex.push_back(0);
2039 } 2033 }
2040 for (int i = 0; i < nCount; i++) { 2034 for (int i = 0; i < nCount; i++) {
2041 FPDF_CHAR_INFO info; 2035 FPDF_CHAR_INFO info;
2042 pTextPage->GetCharInfo(i, &info); 2036 pTextPage->GetCharInfo(i, &info);
2043 int indexSize = m_CharIndex.GetSize(); 2037 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
2044 if (info.m_Flag == CHAR_NORMAL || info.m_Flag == CHAR_GENERATED) { 2038 if (info.m_Flag == CHAR_NORMAL || info.m_Flag == CHAR_GENERATED) {
2045 if (indexSize % 2) { 2039 if (indexSize % 2) {
2046 m_CharIndex.Add(1); 2040 m_CharIndex.push_back(1);
2047 } else { 2041 } else {
2048 if (indexSize <= 0) { 2042 if (indexSize <= 0) {
2049 continue; 2043 continue;
2050 } 2044 }
2051 m_CharIndex.SetAt(indexSize - 1, m_CharIndex.GetAt(indexSize - 1) + 1); 2045 m_CharIndex[indexSize - 1] += 1;
2052 } 2046 }
2053 } else { 2047 } else {
2054 if (indexSize % 2) { 2048 if (indexSize % 2) {
2055 if (indexSize <= 0) { 2049 if (indexSize <= 0) {
2056 continue; 2050 continue;
2057 } 2051 }
2058 m_CharIndex.SetAt(indexSize - 1, i + 1); 2052 m_CharIndex[indexSize - 1] = i + 1;
2059 } else { 2053 } else {
2060 m_CharIndex.Add(i + 1); 2054 m_CharIndex.push_back(i + 1);
2061 } 2055 }
2062 } 2056 }
2063 } 2057 }
2064 int indexSize = m_CharIndex.GetSize(); 2058 int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
2065 if (indexSize % 2) { 2059 if (indexSize % 2) {
2066 m_CharIndex.RemoveAt(indexSize - 1); 2060 m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1);
2067 } 2061 }
2068 } 2062 }
2069 int CPDF_TextPageFind::GetCharIndex(int index) const { 2063 int CPDF_TextPageFind::GetCharIndex(int index) const {
2070 return m_pTextPage->CharIndexFromTextIndex(index); 2064 return m_pTextPage->CharIndexFromTextIndex(index);
2071 int indexSize = m_CharIndex.GetSize();
2072 int count = 0;
2073 for (int i = 0; i < indexSize; i += 2) {
2074 count += m_CharIndex.GetAt(i + 1);
2075 if (count > index) {
2076 return index - count + m_CharIndex.GetAt(i + 1) + m_CharIndex.GetAt(i);
2077 }
2078 }
2079 return -1;
2080 } 2065 }
2081 FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat, 2066 FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat,
2082 int flags, 2067 int flags,
2083 int startPos) { 2068 int startPos) {
2084 if (!m_pTextPage) { 2069 if (!m_pTextPage) {
2085 return FALSE; 2070 return FALSE;
2086 } 2071 }
2087 if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE)) { 2072 if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE)) {
2088 m_strText = m_pTextPage->GetPageText(); 2073 m_strText = m_pTextPage->GetPageText();
2089 } 2074 }
(...skipping 539 matching lines...) Expand 10 before | Expand all | Expand 10 after
2629 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { 2614 if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
2630 return; 2615 return;
2631 } 2616 }
2632 CPDF_LinkExt* link = NULL; 2617 CPDF_LinkExt* link = NULL;
2633 link = m_LinkList.GetAt(index); 2618 link = m_LinkList.GetAt(index);
2634 if (!link) { 2619 if (!link) {
2635 return; 2620 return;
2636 } 2621 }
2637 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); 2622 m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
2638 } 2623 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698