| OLD | NEW |
| (Empty) |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
| 6 | |
| 7 #include "xfa/src/fee/src/fx_wordbreak/fx_wordbreak_impl.h" | |
| 8 | |
| 9 #define FX_IsOdd(a) ((a)&1) | |
| 10 | |
| 11 FX_WordBreakProp FX_GetWordBreakProperty(FX_WCHAR wcCodePoint) { | |
| 12 FX_DWORD dwProperty = | |
| 13 (FX_DWORD)gs_FX_WordBreak_CodePointProperties[wcCodePoint >> 1]; | |
| 14 return (FX_WordBreakProp)(FX_IsOdd(wcCodePoint) ? (dwProperty & 0x0F) | |
| 15 : (dwProperty >> 4)); | |
| 16 } | |
| 17 CFX_CharIter::CFX_CharIter(const CFX_WideString& wsText) | |
| 18 : m_wsText(wsText), m_nIndex(0) { | |
| 19 FXSYS_assert(!wsText.IsEmpty()); | |
| 20 } | |
| 21 CFX_CharIter::~CFX_CharIter() {} | |
| 22 void CFX_CharIter::Release() { | |
| 23 delete this; | |
| 24 } | |
| 25 FX_BOOL CFX_CharIter::Next(FX_BOOL bPrev) { | |
| 26 if (bPrev) { | |
| 27 if (m_nIndex <= 0) { | |
| 28 return FALSE; | |
| 29 } | |
| 30 m_nIndex--; | |
| 31 } else { | |
| 32 if (m_nIndex + 1 >= m_wsText.GetLength()) { | |
| 33 return FALSE; | |
| 34 } | |
| 35 m_nIndex++; | |
| 36 } | |
| 37 return TRUE; | |
| 38 } | |
| 39 FX_WCHAR CFX_CharIter::GetChar() { | |
| 40 return m_wsText.GetAt(m_nIndex); | |
| 41 } | |
| 42 void CFX_CharIter::SetAt(int32_t nIndex) { | |
| 43 if (nIndex < 0 || nIndex >= m_wsText.GetLength()) { | |
| 44 return; | |
| 45 } | |
| 46 m_nIndex = nIndex; | |
| 47 } | |
| 48 int32_t CFX_CharIter::GetAt() const { | |
| 49 return m_nIndex; | |
| 50 } | |
| 51 FX_BOOL CFX_CharIter::IsEOF(FX_BOOL bTail) const { | |
| 52 return bTail ? (m_nIndex + 1 == m_wsText.GetLength()) : (m_nIndex == 0); | |
| 53 } | |
| 54 IFX_CharIter* CFX_CharIter::Clone() { | |
| 55 CFX_CharIter* pIter = new CFX_CharIter(m_wsText); | |
| 56 pIter->m_nIndex = m_nIndex; | |
| 57 return pIter; | |
| 58 } | |
| 59 CFX_WordBreak::CFX_WordBreak() : m_pPreIter(NULL), m_pCurIter(NULL) {} | |
| 60 CFX_WordBreak::~CFX_WordBreak() { | |
| 61 if (m_pPreIter) { | |
| 62 m_pPreIter->Release(); | |
| 63 m_pPreIter = NULL; | |
| 64 } | |
| 65 if (m_pCurIter) { | |
| 66 m_pCurIter->Release(); | |
| 67 m_pCurIter = NULL; | |
| 68 } | |
| 69 } | |
| 70 void CFX_WordBreak::Release() { | |
| 71 delete this; | |
| 72 } | |
| 73 void CFX_WordBreak::Attach(IFX_CharIter* pIter) { | |
| 74 FXSYS_assert(pIter); | |
| 75 m_pCurIter = pIter; | |
| 76 } | |
| 77 void CFX_WordBreak::Attach(const CFX_WideString& wsText) { | |
| 78 m_pCurIter = new CFX_CharIter(wsText); | |
| 79 } | |
| 80 FX_BOOL CFX_WordBreak::Next(FX_BOOL bPrev) { | |
| 81 IFX_CharIter* pIter = bPrev ? m_pPreIter->Clone() : m_pCurIter->Clone(); | |
| 82 if (pIter->IsEOF(!bPrev)) { | |
| 83 return FALSE; | |
| 84 } | |
| 85 pIter->Next(bPrev); | |
| 86 if (!FindNextBreakPos(pIter, bPrev, TRUE)) { | |
| 87 pIter->Release(); | |
| 88 return FALSE; | |
| 89 } | |
| 90 if (bPrev) { | |
| 91 m_pCurIter->Release(); | |
| 92 m_pCurIter = m_pPreIter; | |
| 93 m_pCurIter->Next(TRUE); | |
| 94 m_pPreIter = pIter; | |
| 95 } else { | |
| 96 m_pPreIter->Release(); | |
| 97 m_pPreIter = m_pCurIter; | |
| 98 m_pPreIter->Next(); | |
| 99 m_pCurIter = pIter; | |
| 100 } | |
| 101 return TRUE; | |
| 102 } | |
| 103 void CFX_WordBreak::SetAt(int32_t nIndex) { | |
| 104 if (m_pPreIter) { | |
| 105 m_pPreIter->Release(); | |
| 106 m_pPreIter = NULL; | |
| 107 } | |
| 108 m_pCurIter->SetAt(nIndex); | |
| 109 FindNextBreakPos(m_pCurIter, TRUE, FALSE); | |
| 110 m_pPreIter = m_pCurIter; | |
| 111 m_pCurIter = m_pPreIter->Clone(); | |
| 112 FindNextBreakPos(m_pCurIter, FALSE, FALSE); | |
| 113 } | |
| 114 int32_t CFX_WordBreak::GetWordPos() const { | |
| 115 return m_pPreIter->GetAt(); | |
| 116 } | |
| 117 int32_t CFX_WordBreak::GetWordLength() const { | |
| 118 return m_pCurIter->GetAt() - m_pPreIter->GetAt() + 1; | |
| 119 } | |
| 120 void CFX_WordBreak::GetWord(CFX_WideString& wsWord) const { | |
| 121 int32_t nWordLength = GetWordLength(); | |
| 122 if (nWordLength <= 0) { | |
| 123 return; | |
| 124 } | |
| 125 FX_WCHAR* lpBuf = wsWord.GetBuffer(nWordLength); | |
| 126 IFX_CharIter* pTempIter = m_pPreIter->Clone(); | |
| 127 int32_t i = 0; | |
| 128 while (pTempIter->GetAt() <= m_pCurIter->GetAt()) { | |
| 129 lpBuf[i++] = pTempIter->GetChar(); | |
| 130 FX_BOOL bEnd = pTempIter->Next(); | |
| 131 if (!bEnd) { | |
| 132 break; | |
| 133 } | |
| 134 } | |
| 135 pTempIter->Release(); | |
| 136 wsWord.ReleaseBuffer(nWordLength); | |
| 137 } | |
| 138 FX_BOOL CFX_WordBreak::IsEOF(FX_BOOL bTail) const { | |
| 139 return m_pCurIter->IsEOF(bTail); | |
| 140 } | |
| 141 FX_BOOL CFX_WordBreak::FindNextBreakPos(IFX_CharIter* pIter, | |
| 142 FX_BOOL bPrev, | |
| 143 FX_BOOL bFromNext) { | |
| 144 FX_WordBreakProp ePreType = FX_WordBreakProp_None; | |
| 145 FX_WordBreakProp eCurType = FX_WordBreakProp_None; | |
| 146 FX_WordBreakProp eNextType = FX_WordBreakProp_None; | |
| 147 if (pIter->IsEOF(!bPrev)) { | |
| 148 return TRUE; | |
| 149 } | |
| 150 if (!(bFromNext || pIter->IsEOF(bPrev))) { | |
| 151 pIter->Next(!bPrev); | |
| 152 FX_WCHAR wcTemp = pIter->GetChar(); | |
| 153 ePreType = FX_GetWordBreakProperty(wcTemp); | |
| 154 pIter->Next(bPrev); | |
| 155 } | |
| 156 FX_WCHAR wcTemp = pIter->GetChar(); | |
| 157 eCurType = FX_GetWordBreakProperty(wcTemp); | |
| 158 FX_BOOL bFirst = TRUE; | |
| 159 do { | |
| 160 pIter->Next(bPrev); | |
| 161 FX_WCHAR wcTemp = pIter->GetChar(); | |
| 162 eNextType = FX_GetWordBreakProperty(wcTemp); | |
| 163 FX_WORD wBreak = | |
| 164 gs_FX_WordBreak_Table[eCurType] & ((FX_WORD)(1 << eNextType)); | |
| 165 if (wBreak) { | |
| 166 if (pIter->IsEOF(!bPrev)) { | |
| 167 pIter->Next(!bPrev); | |
| 168 return TRUE; | |
| 169 } | |
| 170 if (bFirst) { | |
| 171 int32_t nFlags = 0; | |
| 172 if (eCurType == FX_WordBreakProp_MidLetter) { | |
| 173 if (eNextType == FX_WordBreakProp_ALetter) { | |
| 174 nFlags = 1; | |
| 175 } | |
| 176 } else if (eCurType == FX_WordBreakProp_MidNum) { | |
| 177 if (eNextType == FX_WordBreakProp_Numberic) { | |
| 178 nFlags = 2; | |
| 179 } | |
| 180 } else if (eCurType == FX_WordBreakProp_MidNumLet) { | |
| 181 if (eNextType == FX_WordBreakProp_ALetter) { | |
| 182 nFlags = 1; | |
| 183 } else if (eNextType == FX_WordBreakProp_Numberic) { | |
| 184 nFlags = 2; | |
| 185 } | |
| 186 } | |
| 187 if (nFlags > 0) { | |
| 188 FXSYS_assert(nFlags <= 2); | |
| 189 if (!((nFlags == 1 && ePreType == FX_WordBreakProp_ALetter) || | |
| 190 (nFlags == 2 && ePreType == FX_WordBreakProp_Numberic))) { | |
| 191 pIter->Next(!bPrev); | |
| 192 return TRUE; | |
| 193 } | |
| 194 pIter->Next(bPrev); | |
| 195 wBreak = FALSE; | |
| 196 } | |
| 197 bFirst = FALSE; | |
| 198 } | |
| 199 if (wBreak) { | |
| 200 int32_t nFlags = 0; | |
| 201 if (eNextType == FX_WordBreakProp_MidLetter) { | |
| 202 if (eCurType == FX_WordBreakProp_ALetter) { | |
| 203 nFlags = 1; | |
| 204 } | |
| 205 } else if (eNextType == FX_WordBreakProp_MidNum) { | |
| 206 if (eCurType == FX_WordBreakProp_Numberic) { | |
| 207 nFlags = 2; | |
| 208 } | |
| 209 } else if (eNextType == FX_WordBreakProp_MidNumLet) { | |
| 210 if (eCurType == FX_WordBreakProp_ALetter) { | |
| 211 nFlags = 1; | |
| 212 } else if (eCurType == FX_WordBreakProp_Numberic) { | |
| 213 nFlags = 2; | |
| 214 } | |
| 215 } | |
| 216 if (nFlags <= 0) { | |
| 217 pIter->Next(!bPrev); | |
| 218 return TRUE; | |
| 219 } | |
| 220 FXSYS_assert(nFlags <= 2); | |
| 221 pIter->Next(bPrev); | |
| 222 wcTemp = pIter->GetChar(); | |
| 223 eNextType = (FX_WordBreakProp)FX_GetWordBreakProperty(wcTemp); | |
| 224 if (!((nFlags == 1 && eNextType == FX_WordBreakProp_ALetter) || | |
| 225 (nFlags == 2 && eNextType == FX_WordBreakProp_Numberic))) { | |
| 226 pIter->Next(!bPrev); | |
| 227 pIter->Next(!bPrev); | |
| 228 return TRUE; | |
| 229 } | |
| 230 } | |
| 231 } | |
| 232 ePreType = eCurType; | |
| 233 eCurType = eNextType; | |
| 234 bFirst = FALSE; | |
| 235 } while (!pIter->IsEOF(!bPrev)); | |
| 236 return TRUE; | |
| 237 } | |
| 238 IFX_WordBreak* FX_WordBreak_Create() { | |
| 239 return new CFX_WordBreak; | |
| 240 } | |
| OLD | NEW |