OLD | NEW |
| (Empty) |
1 // Copyright 2014 PDFium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
6 | |
7 #include "xfa/src/fee/src/fx_wordbreak/fx_wordbreak_impl.h" | |
8 | |
9 #define FX_IsOdd(a) ((a)&1) | |
10 | |
11 FX_WordBreakProp FX_GetWordBreakProperty(FX_WCHAR wcCodePoint) { | |
12 FX_DWORD dwProperty = | |
13 (FX_DWORD)gs_FX_WordBreak_CodePointProperties[wcCodePoint >> 1]; | |
14 return (FX_WordBreakProp)(FX_IsOdd(wcCodePoint) ? (dwProperty & 0x0F) | |
15 : (dwProperty >> 4)); | |
16 } | |
17 CFX_CharIter::CFX_CharIter(const CFX_WideString& wsText) | |
18 : m_wsText(wsText), m_nIndex(0) { | |
19 FXSYS_assert(!wsText.IsEmpty()); | |
20 } | |
21 CFX_CharIter::~CFX_CharIter() {} | |
22 void CFX_CharIter::Release() { | |
23 delete this; | |
24 } | |
25 FX_BOOL CFX_CharIter::Next(FX_BOOL bPrev) { | |
26 if (bPrev) { | |
27 if (m_nIndex <= 0) { | |
28 return FALSE; | |
29 } | |
30 m_nIndex--; | |
31 } else { | |
32 if (m_nIndex + 1 >= m_wsText.GetLength()) { | |
33 return FALSE; | |
34 } | |
35 m_nIndex++; | |
36 } | |
37 return TRUE; | |
38 } | |
39 FX_WCHAR CFX_CharIter::GetChar() { | |
40 return m_wsText.GetAt(m_nIndex); | |
41 } | |
42 void CFX_CharIter::SetAt(int32_t nIndex) { | |
43 if (nIndex < 0 || nIndex >= m_wsText.GetLength()) { | |
44 return; | |
45 } | |
46 m_nIndex = nIndex; | |
47 } | |
48 int32_t CFX_CharIter::GetAt() const { | |
49 return m_nIndex; | |
50 } | |
51 FX_BOOL CFX_CharIter::IsEOF(FX_BOOL bTail) const { | |
52 return bTail ? (m_nIndex + 1 == m_wsText.GetLength()) : (m_nIndex == 0); | |
53 } | |
54 IFX_CharIter* CFX_CharIter::Clone() { | |
55 CFX_CharIter* pIter = new CFX_CharIter(m_wsText); | |
56 pIter->m_nIndex = m_nIndex; | |
57 return pIter; | |
58 } | |
59 CFX_WordBreak::CFX_WordBreak() : m_pPreIter(NULL), m_pCurIter(NULL) {} | |
60 CFX_WordBreak::~CFX_WordBreak() { | |
61 if (m_pPreIter) { | |
62 m_pPreIter->Release(); | |
63 m_pPreIter = NULL; | |
64 } | |
65 if (m_pCurIter) { | |
66 m_pCurIter->Release(); | |
67 m_pCurIter = NULL; | |
68 } | |
69 } | |
70 void CFX_WordBreak::Release() { | |
71 delete this; | |
72 } | |
73 void CFX_WordBreak::Attach(IFX_CharIter* pIter) { | |
74 FXSYS_assert(pIter); | |
75 m_pCurIter = pIter; | |
76 } | |
77 void CFX_WordBreak::Attach(const CFX_WideString& wsText) { | |
78 m_pCurIter = new CFX_CharIter(wsText); | |
79 } | |
80 FX_BOOL CFX_WordBreak::Next(FX_BOOL bPrev) { | |
81 IFX_CharIter* pIter = bPrev ? m_pPreIter->Clone() : m_pCurIter->Clone(); | |
82 if (pIter->IsEOF(!bPrev)) { | |
83 return FALSE; | |
84 } | |
85 pIter->Next(bPrev); | |
86 if (!FindNextBreakPos(pIter, bPrev, TRUE)) { | |
87 pIter->Release(); | |
88 return FALSE; | |
89 } | |
90 if (bPrev) { | |
91 m_pCurIter->Release(); | |
92 m_pCurIter = m_pPreIter; | |
93 m_pCurIter->Next(TRUE); | |
94 m_pPreIter = pIter; | |
95 } else { | |
96 m_pPreIter->Release(); | |
97 m_pPreIter = m_pCurIter; | |
98 m_pPreIter->Next(); | |
99 m_pCurIter = pIter; | |
100 } | |
101 return TRUE; | |
102 } | |
103 void CFX_WordBreak::SetAt(int32_t nIndex) { | |
104 if (m_pPreIter) { | |
105 m_pPreIter->Release(); | |
106 m_pPreIter = NULL; | |
107 } | |
108 m_pCurIter->SetAt(nIndex); | |
109 FindNextBreakPos(m_pCurIter, TRUE, FALSE); | |
110 m_pPreIter = m_pCurIter; | |
111 m_pCurIter = m_pPreIter->Clone(); | |
112 FindNextBreakPos(m_pCurIter, FALSE, FALSE); | |
113 } | |
114 int32_t CFX_WordBreak::GetWordPos() const { | |
115 return m_pPreIter->GetAt(); | |
116 } | |
117 int32_t CFX_WordBreak::GetWordLength() const { | |
118 return m_pCurIter->GetAt() - m_pPreIter->GetAt() + 1; | |
119 } | |
120 void CFX_WordBreak::GetWord(CFX_WideString& wsWord) const { | |
121 int32_t nWordLength = GetWordLength(); | |
122 if (nWordLength <= 0) { | |
123 return; | |
124 } | |
125 FX_WCHAR* lpBuf = wsWord.GetBuffer(nWordLength); | |
126 IFX_CharIter* pTempIter = m_pPreIter->Clone(); | |
127 int32_t i = 0; | |
128 while (pTempIter->GetAt() <= m_pCurIter->GetAt()) { | |
129 lpBuf[i++] = pTempIter->GetChar(); | |
130 FX_BOOL bEnd = pTempIter->Next(); | |
131 if (!bEnd) { | |
132 break; | |
133 } | |
134 } | |
135 pTempIter->Release(); | |
136 wsWord.ReleaseBuffer(nWordLength); | |
137 } | |
138 FX_BOOL CFX_WordBreak::IsEOF(FX_BOOL bTail) const { | |
139 return m_pCurIter->IsEOF(bTail); | |
140 } | |
141 FX_BOOL CFX_WordBreak::FindNextBreakPos(IFX_CharIter* pIter, | |
142 FX_BOOL bPrev, | |
143 FX_BOOL bFromNext) { | |
144 FX_WordBreakProp ePreType = FX_WordBreakProp_None; | |
145 FX_WordBreakProp eCurType = FX_WordBreakProp_None; | |
146 FX_WordBreakProp eNextType = FX_WordBreakProp_None; | |
147 if (pIter->IsEOF(!bPrev)) { | |
148 return TRUE; | |
149 } | |
150 if (!(bFromNext || pIter->IsEOF(bPrev))) { | |
151 pIter->Next(!bPrev); | |
152 FX_WCHAR wcTemp = pIter->GetChar(); | |
153 ePreType = FX_GetWordBreakProperty(wcTemp); | |
154 pIter->Next(bPrev); | |
155 } | |
156 FX_WCHAR wcTemp = pIter->GetChar(); | |
157 eCurType = FX_GetWordBreakProperty(wcTemp); | |
158 FX_BOOL bFirst = TRUE; | |
159 do { | |
160 pIter->Next(bPrev); | |
161 FX_WCHAR wcTemp = pIter->GetChar(); | |
162 eNextType = FX_GetWordBreakProperty(wcTemp); | |
163 FX_WORD wBreak = | |
164 gs_FX_WordBreak_Table[eCurType] & ((FX_WORD)(1 << eNextType)); | |
165 if (wBreak) { | |
166 if (pIter->IsEOF(!bPrev)) { | |
167 pIter->Next(!bPrev); | |
168 return TRUE; | |
169 } | |
170 if (bFirst) { | |
171 int32_t nFlags = 0; | |
172 if (eCurType == FX_WordBreakProp_MidLetter) { | |
173 if (eNextType == FX_WordBreakProp_ALetter) { | |
174 nFlags = 1; | |
175 } | |
176 } else if (eCurType == FX_WordBreakProp_MidNum) { | |
177 if (eNextType == FX_WordBreakProp_Numberic) { | |
178 nFlags = 2; | |
179 } | |
180 } else if (eCurType == FX_WordBreakProp_MidNumLet) { | |
181 if (eNextType == FX_WordBreakProp_ALetter) { | |
182 nFlags = 1; | |
183 } else if (eNextType == FX_WordBreakProp_Numberic) { | |
184 nFlags = 2; | |
185 } | |
186 } | |
187 if (nFlags > 0) { | |
188 FXSYS_assert(nFlags <= 2); | |
189 if (!((nFlags == 1 && ePreType == FX_WordBreakProp_ALetter) || | |
190 (nFlags == 2 && ePreType == FX_WordBreakProp_Numberic))) { | |
191 pIter->Next(!bPrev); | |
192 return TRUE; | |
193 } | |
194 pIter->Next(bPrev); | |
195 wBreak = FALSE; | |
196 } | |
197 bFirst = FALSE; | |
198 } | |
199 if (wBreak) { | |
200 int32_t nFlags = 0; | |
201 if (eNextType == FX_WordBreakProp_MidLetter) { | |
202 if (eCurType == FX_WordBreakProp_ALetter) { | |
203 nFlags = 1; | |
204 } | |
205 } else if (eNextType == FX_WordBreakProp_MidNum) { | |
206 if (eCurType == FX_WordBreakProp_Numberic) { | |
207 nFlags = 2; | |
208 } | |
209 } else if (eNextType == FX_WordBreakProp_MidNumLet) { | |
210 if (eCurType == FX_WordBreakProp_ALetter) { | |
211 nFlags = 1; | |
212 } else if (eCurType == FX_WordBreakProp_Numberic) { | |
213 nFlags = 2; | |
214 } | |
215 } | |
216 if (nFlags <= 0) { | |
217 pIter->Next(!bPrev); | |
218 return TRUE; | |
219 } | |
220 FXSYS_assert(nFlags <= 2); | |
221 pIter->Next(bPrev); | |
222 wcTemp = pIter->GetChar(); | |
223 eNextType = (FX_WordBreakProp)FX_GetWordBreakProperty(wcTemp); | |
224 if (!((nFlags == 1 && eNextType == FX_WordBreakProp_ALetter) || | |
225 (nFlags == 2 && eNextType == FX_WordBreakProp_Numberic))) { | |
226 pIter->Next(!bPrev); | |
227 pIter->Next(!bPrev); | |
228 return TRUE; | |
229 } | |
230 } | |
231 } | |
232 ePreType = eCurType; | |
233 eCurType = eNextType; | |
234 bFirst = FALSE; | |
235 } while (!pIter->IsEOF(!bPrev)); | |
236 return TRUE; | |
237 } | |
238 IFX_WordBreak* FX_WordBreak_Create() { | |
239 return new CFX_WordBreak; | |
240 } | |
OLD | NEW |