| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include "xfa/fde/xml/fde_xml_imp.h" | 7 #include "xfa/fde/xml/fde_xml_imp.h" |
| 8 | 8 |
| 9 #include <algorithm> | 9 #include <algorithm> |
| 10 | 10 |
| 11 #include "core/fxcrt/include/fx_safe_types.h" | 11 #include "core/fxcrt/include/fx_safe_types.h" |
| 12 #include "xfa/fgas/crt/fgas_codepage.h" | 12 #include "xfa/fgas/crt/fgas_codepage.h" |
| 13 #include "xfa/fgas/crt/fgas_system.h" | 13 #include "xfa/fgas/crt/fgas_system.h" |
| 14 | 14 |
| 15 namespace { | 15 namespace { |
| 16 | 16 |
| 17 const uint32_t kMaxCharRange = 0x10ffff; |
| 18 |
| 17 const uint16_t g_XMLValidCharRange[][2] = {{0x09, 0x09}, | 19 const uint16_t g_XMLValidCharRange[][2] = {{0x09, 0x09}, |
| 18 {0x0A, 0x0A}, | 20 {0x0A, 0x0A}, |
| 19 {0x0D, 0x0D}, | 21 {0x0D, 0x0D}, |
| 20 {0x20, 0xD7FF}, | 22 {0x20, 0xD7FF}, |
| 21 {0xE000, 0xFFFD}}; | 23 {0xE000, 0xFFFD}}; |
| 22 | 24 |
| 23 bool FDE_IsXMLWhiteSpace(FX_WCHAR ch) { | 25 bool FDE_IsXMLWhiteSpace(FX_WCHAR ch) { |
| 24 return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09; | 26 return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09; |
| 25 } | 27 } |
| 26 | 28 |
| (...skipping 1804 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1831 | 1833 |
| 1832 FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const { | 1834 FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const { |
| 1833 if (!m_pStream) | 1835 if (!m_pStream) |
| 1834 return 0; | 1836 return 0; |
| 1835 | 1837 |
| 1836 int32_t nSrcLen = m_pStart - m_pBuffer; | 1838 int32_t nSrcLen = m_pStart - m_pBuffer; |
| 1837 int32_t nDstLen = FX_GetUTF8EncodeLength(m_pBuffer, nSrcLen); | 1839 int32_t nDstLen = FX_GetUTF8EncodeLength(m_pBuffer, nSrcLen); |
| 1838 return m_iParsedBytes + nDstLen; | 1840 return m_iParsedBytes + nDstLen; |
| 1839 } | 1841 } |
| 1840 | 1842 |
| 1841 void CFDE_XMLSyntaxParser::ParseTextChar(FX_WCHAR ch) { | 1843 void CFDE_XMLSyntaxParser::ParseTextChar(FX_WCHAR character) { |
| 1842 if (m_iIndexInBlock == m_iAllocStep) { | 1844 if (m_iIndexInBlock == m_iAllocStep) { |
| 1843 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); | 1845 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); |
| 1844 if (!m_pCurrentBlock) { | 1846 if (!m_pCurrentBlock) { |
| 1845 return; | 1847 return; |
| 1846 } | 1848 } |
| 1847 } | 1849 } |
| 1848 m_pCurrentBlock[m_iIndexInBlock++] = ch; | 1850 m_pCurrentBlock[m_iIndexInBlock++] = character; |
| 1849 m_iDataLength++; | 1851 m_iDataLength++; |
| 1850 if (m_iEntityStart > -1 && ch == L';') { | 1852 if (m_iEntityStart > -1 && character == L';') { |
| 1851 CFX_WideString csEntity; | 1853 CFX_WideString csEntity; |
| 1852 m_BlockBuffer.GetTextData(csEntity, m_iEntityStart + 1, | 1854 m_BlockBuffer.GetTextData(csEntity, m_iEntityStart + 1, |
| 1853 (m_iDataLength - 1) - m_iEntityStart - 1); | 1855 (m_iDataLength - 1) - m_iEntityStart - 1); |
| 1854 int32_t iLen = csEntity.GetLength(); | 1856 int32_t iLen = csEntity.GetLength(); |
| 1855 if (iLen > 0) { | 1857 if (iLen > 0) { |
| 1856 if (csEntity[0] == L'#') { | 1858 if (csEntity[0] == L'#') { |
| 1857 ch = 0; | 1859 uint32_t ch = 0; |
| 1858 FX_WCHAR w; | 1860 FX_WCHAR w; |
| 1859 if (iLen > 1 && csEntity[1] == L'x') { | 1861 if (iLen > 1 && csEntity[1] == L'x') { |
| 1860 for (int32_t i = 2; i < iLen; i++) { | 1862 for (int32_t i = 2; i < iLen; i++) { |
| 1861 w = csEntity[i]; | 1863 w = csEntity[i]; |
| 1862 if (w >= L'0' && w <= L'9') { | 1864 if (w >= L'0' && w <= L'9') { |
| 1863 ch = (ch << 4) + w - L'0'; | 1865 ch = (ch << 4) + w - L'0'; |
| 1864 } else if (w >= L'A' && w <= L'F') { | 1866 } else if (w >= L'A' && w <= L'F') { |
| 1865 ch = (ch << 4) + w - 55; | 1867 ch = (ch << 4) + w - 55; |
| 1866 } else if (w >= L'a' && w <= L'f') { | 1868 } else if (w >= L'a' && w <= L'f') { |
| 1867 ch = (ch << 4) + w - 87; | 1869 ch = (ch << 4) + w - 87; |
| 1868 } else { | 1870 } else { |
| 1869 break; | 1871 break; |
| 1870 } | 1872 } |
| 1871 } | 1873 } |
| 1872 } else { | 1874 } else { |
| 1873 for (int32_t i = 1; i < iLen; i++) { | 1875 for (int32_t i = 1; i < iLen; i++) { |
| 1874 w = csEntity[i]; | 1876 w = csEntity[i]; |
| 1875 if (w < L'0' || w > L'9') { | 1877 if (w < L'0' || w > L'9') |
| 1876 break; | 1878 break; |
| 1877 } | |
| 1878 ch = ch * 10 + w - L'0'; | 1879 ch = ch * 10 + w - L'0'; |
| 1879 } | 1880 } |
| 1880 } | 1881 } |
| 1881 if (ch != 0) { | 1882 if (ch > kMaxCharRange) |
| 1882 m_BlockBuffer.SetTextChar(m_iEntityStart, ch); | 1883 ch = ' '; |
| 1884 |
| 1885 character = static_cast<FX_WCHAR>(ch); |
| 1886 if (character != 0) { |
| 1887 m_BlockBuffer.SetTextChar(m_iEntityStart, character); |
| 1883 m_iEntityStart++; | 1888 m_iEntityStart++; |
| 1884 } | 1889 } |
| 1885 } else { | 1890 } else { |
| 1886 if (csEntity.Compare(L"amp") == 0) { | 1891 if (csEntity.Compare(L"amp") == 0) { |
| 1887 m_BlockBuffer.SetTextChar(m_iEntityStart, L'&'); | 1892 m_BlockBuffer.SetTextChar(m_iEntityStart, L'&'); |
| 1888 m_iEntityStart++; | 1893 m_iEntityStart++; |
| 1889 } else if (csEntity.Compare(L"lt") == 0) { | 1894 } else if (csEntity.Compare(L"lt") == 0) { |
| 1890 m_BlockBuffer.SetTextChar(m_iEntityStart, L'<'); | 1895 m_BlockBuffer.SetTextChar(m_iEntityStart, L'<'); |
| 1891 m_iEntityStart++; | 1896 m_iEntityStart++; |
| 1892 } else if (csEntity.Compare(L"gt") == 0) { | 1897 } else if (csEntity.Compare(L"gt") == 0) { |
| 1893 m_BlockBuffer.SetTextChar(m_iEntityStart, L'>'); | 1898 m_BlockBuffer.SetTextChar(m_iEntityStart, L'>'); |
| 1894 m_iEntityStart++; | 1899 m_iEntityStart++; |
| 1895 } else if (csEntity.Compare(L"apos") == 0) { | 1900 } else if (csEntity.Compare(L"apos") == 0) { |
| 1896 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\''); | 1901 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\''); |
| 1897 m_iEntityStart++; | 1902 m_iEntityStart++; |
| 1898 } else if (csEntity.Compare(L"quot") == 0) { | 1903 } else if (csEntity.Compare(L"quot") == 0) { |
| 1899 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"'); | 1904 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"'); |
| 1900 m_iEntityStart++; | 1905 m_iEntityStart++; |
| 1901 } | 1906 } |
| 1902 } | 1907 } |
| 1903 } | 1908 } |
| 1904 m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart, FALSE); | 1909 m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart, FALSE); |
| 1905 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); | 1910 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); |
| 1906 m_iEntityStart = -1; | 1911 m_iEntityStart = -1; |
| 1907 } else { | 1912 } else { |
| 1908 if (m_iEntityStart < 0 && ch == L'&') { | 1913 if (m_iEntityStart < 0 && character == L'&') { |
| 1909 m_iEntityStart = m_iDataLength - 1; | 1914 m_iEntityStart = m_iDataLength - 1; |
| 1910 } | 1915 } |
| 1911 } | 1916 } |
| 1912 m_pStart++; | 1917 m_pStart++; |
| 1913 } | 1918 } |
| OLD | NEW |