Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(396)

Side by Side Diff: xfa/fde/xml/fde_xml_imp.cpp

Issue 2067253002: Cleanup XML parser code. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Review feedback Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « xfa/fde/xml/fde_xml_imp.h ('k') | xfa/fxfa/parser/xfa_parser.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "xfa/fde/xml/fde_xml_imp.h" 7 #include "xfa/fde/xml/fde_xml_imp.h"
8 8
9 #include <algorithm> 9 #include <algorithm>
10 10
(...skipping 852 matching lines...) Expand 10 before | Expand all | Expand 10 after
863 void CFDE_XMLDoc::ReleaseParser() { 863 void CFDE_XMLDoc::ReleaseParser() {
864 if (m_pXMLParser) { 864 if (m_pXMLParser) {
865 m_pXMLParser->Release(); 865 m_pXMLParser->Release();
866 m_pXMLParser = nullptr; 866 m_pXMLParser = nullptr;
867 } 867 }
868 if (m_pSyntaxParser) { 868 if (m_pSyntaxParser) {
869 m_pSyntaxParser->Release(); 869 m_pSyntaxParser->Release();
870 m_pSyntaxParser = nullptr; 870 m_pSyntaxParser = nullptr;
871 } 871 }
872 } 872 }
873 FX_BOOL CFDE_XMLDoc::LoadXML(IFX_Stream* pXMLStream,
874 int32_t iXMLPlaneSize,
875 int32_t iTextDataSize,
876 FDE_XMLREADERHANDLER* pHandler) {
877 if (!pXMLStream)
878 return FALSE;
879 873
880 Reset(TRUE);
881 iXMLPlaneSize = iXMLPlaneSize / 1024;
882 if (iXMLPlaneSize < 1) {
883 iXMLPlaneSize = 1;
884 }
885 iXMLPlaneSize *= 1024;
886 if (iXMLPlaneSize < 4096) {
887 iXMLPlaneSize = 4096;
888 }
889 iTextDataSize = iTextDataSize / 128;
890 if (iTextDataSize < 1) {
891 iTextDataSize = 1;
892 }
893 iTextDataSize *= 128;
894 if (iTextDataSize < 128) {
895 iTextDataSize = 128;
896 }
897 m_pStream = pXMLStream;
898 uint16_t wCodePage = m_pStream->GetCodePage();
899 if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE &&
900 wCodePage != FX_CODEPAGE_UTF8) {
901 m_pStream->SetCodePage(FX_CODEPAGE_UTF8);
902 }
903 m_pSyntaxParser = new CFDE_XMLSyntaxParser;
904
905 m_pSyntaxParser->Init(m_pStream, iXMLPlaneSize, iTextDataSize);
906 if (pHandler)
907 m_pXMLParser = new CFDE_XMLSAXParser(pHandler, m_pSyntaxParser);
908 else
909 m_pXMLParser = new CFDE_XMLDOMParser(m_pRoot, m_pSyntaxParser);
910 return TRUE;
911 }
912 FX_BOOL CFDE_XMLDoc::LoadXML(CFDE_XMLParser* pXMLParser) { 874 FX_BOOL CFDE_XMLDoc::LoadXML(CFDE_XMLParser* pXMLParser) {
913 if (!pXMLParser) 875 if (!pXMLParser)
914 return FALSE; 876 return FALSE;
915 877
916 Reset(TRUE); 878 Reset(TRUE);
917 m_pXMLParser = pXMLParser; 879 m_pXMLParser = pXMLParser;
918 return !!m_pXMLParser; 880 return !!m_pXMLParser;
919 } 881 }
920 int32_t CFDE_XMLDoc::DoLoad(IFX_Pause* pPause) { 882 int32_t CFDE_XMLDoc::DoLoad(IFX_Pause* pPause) {
921 if (m_iStatus >= 100) 883 if (m_iStatus >= 100)
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
1053 CFDE_XMLNode* pNode = m_pRoot->m_pChild; 1015 CFDE_XMLNode* pNode = m_pRoot->m_pChild;
1054 while (pNode) { 1016 while (pNode) {
1055 SaveXMLNode(pXMLStream, static_cast<CFDE_XMLNode*>(pNode)); 1017 SaveXMLNode(pXMLStream, static_cast<CFDE_XMLNode*>(pNode));
1056 pNode = pNode->m_pNext; 1018 pNode = pNode->m_pNext;
1057 } 1019 }
1058 if (pXMLStream == m_pStream) { 1020 if (pXMLStream == m_pStream) {
1059 int32_t iPos = pXMLStream->GetPosition(); 1021 int32_t iPos = pXMLStream->GetPosition();
1060 pXMLStream->SetLength(iPos); 1022 pXMLStream->SetLength(iPos);
1061 } 1023 }
1062 } 1024 }
1063 CFDE_XMLDOMParser::CFDE_XMLDOMParser(CFDE_XMLNode* pRoot,
1064 CFDE_XMLSyntaxParser* pParser)
1065 : m_pParser(pParser),
1066 m_pParent(pRoot),
1067 m_pChild(nullptr),
1068 m_NodeStack(16),
1069 m_ws1(),
1070 m_ws2() {
1071 m_NodeStack.Push(m_pParent);
1072 }
1073 CFDE_XMLDOMParser::~CFDE_XMLDOMParser() {
1074 m_NodeStack.RemoveAll();
1075 m_ws1.clear();
1076 m_ws2.clear();
1077 }
1078
1079 int32_t CFDE_XMLDOMParser::DoParser(IFX_Pause* pPause) {
1080 FDE_XmlSyntaxResult syntaxParserResult;
1081 int32_t iCount = 0;
1082 while (TRUE) {
1083 syntaxParserResult = m_pParser->DoSyntaxParse();
1084 switch (syntaxParserResult) {
1085 case FDE_XmlSyntaxResult::InstructionOpen:
1086 break;
1087 case FDE_XmlSyntaxResult::InstructionClose:
1088 if (m_pChild->GetType() != FDE_XMLNODE_Instruction) {
1089 syntaxParserResult = FDE_XmlSyntaxResult::Error;
1090 break;
1091 }
1092 m_pChild = m_pParent;
1093 break;
1094 case FDE_XmlSyntaxResult::ElementOpen:
1095 case FDE_XmlSyntaxResult::ElementBreak:
1096 break;
1097 case FDE_XmlSyntaxResult::ElementClose:
1098 if (m_pChild->GetType() != FDE_XMLNODE_Element) {
1099 syntaxParserResult = FDE_XmlSyntaxResult::Error;
1100 break;
1101 }
1102 m_pParser->GetTagName(m_ws1);
1103 ((CFDE_XMLElement*)m_pChild)->GetTagName(m_ws2);
1104 if (m_ws1.GetLength() > 0 && m_ws1.Compare(m_ws2) != 0) {
1105 syntaxParserResult = FDE_XmlSyntaxResult::Error;
1106 break;
1107 }
1108 m_NodeStack.Pop();
1109 if (m_NodeStack.GetSize() < 1) {
1110 syntaxParserResult = FDE_XmlSyntaxResult::Error;
1111 break;
1112 }
1113 m_pParent = (CFDE_XMLNode*)*m_NodeStack.GetTopElement();
1114 m_pChild = m_pParent;
1115 iCount++;
1116 break;
1117 case FDE_XmlSyntaxResult::TargetName:
1118 m_pParser->GetTargetName(m_ws1);
1119 m_pChild = new CFDE_XMLInstruction(m_ws1);
1120 m_pParent->InsertChildNode(m_pChild);
1121 m_ws1.clear();
1122 break;
1123 case FDE_XmlSyntaxResult::TagName:
1124 m_pParser->GetTagName(m_ws1);
1125 m_pChild = new CFDE_XMLElement(m_ws1);
1126 m_pParent->InsertChildNode(m_pChild);
1127 m_NodeStack.Push(m_pChild);
1128 m_pParent = m_pChild;
1129 break;
1130 case FDE_XmlSyntaxResult::AttriName:
1131 m_pParser->GetAttributeName(m_ws1);
1132 break;
1133 case FDE_XmlSyntaxResult::AttriValue:
1134 if (!m_pChild) {
1135 syntaxParserResult = FDE_XmlSyntaxResult::Error;
1136 break;
1137 }
1138 m_pParser->GetAttributeName(m_ws2);
1139 if (m_pChild->GetType() == FDE_XMLNODE_Element) {
1140 ((CFDE_XMLElement*)m_pChild)->SetString(m_ws1, m_ws2);
1141 } else if (m_pChild->GetType() == FDE_XMLNODE_Instruction) {
1142 ((CFDE_XMLInstruction*)m_pChild)->SetString(m_ws1, m_ws2);
1143 }
1144 m_ws1.clear();
1145 break;
1146 case FDE_XmlSyntaxResult::Text:
1147 m_pParser->GetTextData(m_ws1);
1148 m_pChild = new CFDE_XMLText(m_ws1);
1149 m_pParent->InsertChildNode(m_pChild);
1150 m_pChild = m_pParent;
1151 break;
1152 case FDE_XmlSyntaxResult::CData:
1153 m_pParser->GetTextData(m_ws1);
1154 m_pChild = new CFDE_XMLCharData(m_ws1);
1155 m_pParent->InsertChildNode(m_pChild);
1156 m_pChild = m_pParent;
1157 break;
1158 case FDE_XmlSyntaxResult::TargetData:
1159 if (!m_pChild || m_pChild->GetType() != FDE_XMLNODE_Instruction) {
1160 syntaxParserResult = FDE_XmlSyntaxResult::Error;
1161 break;
1162 }
1163 if (!m_ws1.IsEmpty()) {
1164 ((CFDE_XMLInstruction*)m_pChild)->AppendData(m_ws1);
1165 }
1166 m_pParser->GetTargetData(m_ws1);
1167 ((CFDE_XMLInstruction*)m_pChild)->AppendData(m_ws1);
1168 m_ws1.clear();
1169 break;
1170 default:
1171 break;
1172 }
1173 if (syntaxParserResult == FDE_XmlSyntaxResult::Error ||
1174 syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) {
1175 break;
1176 }
1177 if (iCount > 500 && pPause && pPause->NeedToPauseNow())
1178 break;
1179 }
1180 return m_pParser->GetStatus();
1181 }
1182
1183 CFDE_XMLSAXParser::CFDE_XMLSAXParser(FDE_XMLREADERHANDLER* pHandler,
1184 CFDE_XMLSyntaxParser* pParser)
1185 : m_pHandler(pHandler),
1186 m_pParser(pParser),
1187 m_TagStack(16),
1188 m_pTagTop(nullptr),
1189 m_ws1(),
1190 m_ws2() {}
1191 CFDE_XMLSAXParser::~CFDE_XMLSAXParser() {
1192 m_TagStack.RemoveAll();
1193 m_ws1.clear();
1194 m_ws2.clear();
1195 }
1196 int32_t CFDE_XMLSAXParser::DoParser(IFX_Pause* pPause) {
1197 FDE_XmlSyntaxResult syntaxParserResult;
1198 int32_t iCount = 0;
1199 while (TRUE) {
1200 syntaxParserResult = m_pParser->DoSyntaxParse();
1201 switch (syntaxParserResult) {
1202 case FDE_XmlSyntaxResult::ElementBreak:
1203 if (!m_pTagTop) {
1204 syntaxParserResult = FDE_XmlSyntaxResult::Error;
1205 break;
1206 }
1207 if (m_pTagTop->eType == FDE_XMLNODE_Element) {
1208 m_pHandler->OnTagBreak(m_pHandler, m_pTagTop->wsTagName);
1209 }
1210 break;
1211 case FDE_XmlSyntaxResult::ElementClose:
1212 if (!m_pTagTop || m_pTagTop->eType != FDE_XMLNODE_Element) {
1213 syntaxParserResult = FDE_XmlSyntaxResult::Error;
1214 break;
1215 }
1216 m_pParser->GetTagName(m_ws1);
1217 if (m_ws1.GetLength() > 0 && m_ws1.Compare(m_pTagTop->wsTagName) != 0) {
1218 syntaxParserResult = FDE_XmlSyntaxResult::Error;
1219 break;
1220 } else if (m_ws1.GetLength() == 0) {
1221 m_pHandler->OnTagBreak(m_pHandler, m_pTagTop->wsTagName);
1222 }
1223 m_pHandler->OnTagClose(m_pHandler, m_pTagTop->wsTagName);
1224 Pop();
1225 iCount++;
1226 break;
1227 case FDE_XmlSyntaxResult::TargetName: {
1228 m_pParser->GetTargetName(m_ws1);
1229 CFDE_XMLTAG xmlTag;
1230 xmlTag.wsTagName = m_ws1;
1231 xmlTag.eType = FDE_XMLNODE_Instruction;
1232 Push(xmlTag);
1233 m_pHandler->OnTagEnter(m_pHandler, FDE_XMLNODE_Instruction,
1234 m_pTagTop->wsTagName);
1235 m_ws1.clear();
1236 } break;
1237 case FDE_XmlSyntaxResult::TagName: {
1238 m_pParser->GetTargetName(m_ws1);
1239 CFDE_XMLTAG xmlTag;
1240 xmlTag.wsTagName = m_ws1;
1241 xmlTag.eType = FDE_XMLNODE_Element;
1242 Push(xmlTag);
1243 m_pHandler->OnTagEnter(m_pHandler, FDE_XMLNODE_Element,
1244 m_pTagTop->wsTagName);
1245 } break;
1246 case FDE_XmlSyntaxResult::AttriName:
1247 m_pParser->GetTargetName(m_ws1);
1248 break;
1249 case FDE_XmlSyntaxResult::AttriValue:
1250 m_pParser->GetAttributeName(m_ws2);
1251 if (!m_pTagTop) {
1252 syntaxParserResult = FDE_XmlSyntaxResult::Error;
1253 break;
1254 }
1255 if (m_pTagTop->eType == FDE_XMLNODE_Element) {
1256 m_pHandler->OnAttribute(m_pHandler, m_ws1, m_ws2);
1257 }
1258 m_ws1.clear();
1259 break;
1260 case FDE_XmlSyntaxResult::CData:
1261 m_pParser->GetTextData(m_ws1);
1262 m_pHandler->OnData(m_pHandler, FDE_XMLNODE_CharData, m_ws1);
1263 break;
1264 case FDE_XmlSyntaxResult::Text:
1265 m_pParser->GetTextData(m_ws1);
1266 m_pHandler->OnData(m_pHandler, FDE_XMLNODE_Text, m_ws1);
1267 break;
1268 case FDE_XmlSyntaxResult::TargetData:
1269 m_pParser->GetTargetData(m_ws1);
1270 m_pHandler->OnData(m_pHandler, FDE_XMLNODE_Instruction, m_ws1);
1271 m_ws1.clear();
1272 break;
1273 default:
1274 break;
1275 }
1276 if (syntaxParserResult == FDE_XmlSyntaxResult::Error ||
1277 syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) {
1278 break;
1279 }
1280 if (iCount > 500 && pPause && pPause->NeedToPauseNow())
1281 break;
1282 }
1283 return m_pParser->GetStatus();
1284 }
1285
1286 inline void CFDE_XMLSAXParser::Push(const CFDE_XMLTAG& xmlTag) {
1287 m_TagStack.Push(xmlTag);
1288 m_pTagTop = m_TagStack.GetTopElement();
1289 }
1290 inline void CFDE_XMLSAXParser::Pop() {
1291 m_TagStack.Pop();
1292 m_pTagTop = m_TagStack.GetTopElement();
1293 }
1294 1025
1295 CFDE_BlockBuffer::CFDE_BlockBuffer(int32_t iAllocStep) 1026 CFDE_BlockBuffer::CFDE_BlockBuffer(int32_t iAllocStep)
1296 : m_iDataLength(0), 1027 : m_iDataLength(0),
1297 m_iBufferSize(0), 1028 m_iBufferSize(0),
1298 m_iAllocStep(iAllocStep), 1029 m_iAllocStep(iAllocStep),
1299 m_iStartPosition(0) {} 1030 m_iStartPosition(0) {}
1300 CFDE_BlockBuffer::~CFDE_BlockBuffer() { 1031 CFDE_BlockBuffer::~CFDE_BlockBuffer() {
1301 ClearBuffer(); 1032 ClearBuffer();
1302 } 1033 }
1303 FX_WCHAR* CFDE_BlockBuffer::GetAvailableBlock(int32_t& iIndexInBlock) { 1034 FX_WCHAR* CFDE_BlockBuffer::GetAvailableBlock(int32_t& iIndexInBlock) {
(...skipping 760 matching lines...) Expand 10 before | Expand all | Expand 10 after
2064 m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart, FALSE); 1795 m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart, FALSE);
2065 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1796 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
2066 m_iEntityStart = -1; 1797 m_iEntityStart = -1;
2067 } else { 1798 } else {
2068 if (m_iEntityStart < 0 && ch == L'&') { 1799 if (m_iEntityStart < 0 && ch == L'&') {
2069 m_iEntityStart = m_iDataLength - 1; 1800 m_iEntityStart = m_iDataLength - 1;
2070 } 1801 }
2071 } 1802 }
2072 m_pStart++; 1803 m_pStart++;
2073 } 1804 }
OLDNEW
« no previous file with comments | « xfa/fde/xml/fde_xml_imp.h ('k') | xfa/fxfa/parser/xfa_parser.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698