OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
7 #include "xfa/fde/xml/fde_xml_imp.h" | 7 #include "xfa/fde/xml/fde_xml_imp.h" |
8 | 8 |
9 #include <algorithm> | 9 #include <algorithm> |
10 | 10 |
11 #include "core/fxcrt/include/fx_safe_types.h" | 11 #include "core/fxcrt/include/fx_safe_types.h" |
12 #include "xfa/fgas/crt/fgas_codepage.h" | 12 #include "xfa/fgas/crt/fgas_codepage.h" |
13 #include "xfa/fgas/crt/fgas_system.h" | 13 #include "xfa/fgas/crt/fgas_system.h" |
14 | 14 |
15 namespace { | 15 namespace { |
16 | 16 |
| 17 const uint32_t kMaxCharRange = 0x10ffff; |
| 18 |
17 const uint16_t g_XMLValidCharRange[][2] = {{0x09, 0x09}, | 19 const uint16_t g_XMLValidCharRange[][2] = {{0x09, 0x09}, |
18 {0x0A, 0x0A}, | 20 {0x0A, 0x0A}, |
19 {0x0D, 0x0D}, | 21 {0x0D, 0x0D}, |
20 {0x20, 0xD7FF}, | 22 {0x20, 0xD7FF}, |
21 {0xE000, 0xFFFD}}; | 23 {0xE000, 0xFFFD}}; |
22 | 24 |
23 bool FDE_IsXMLWhiteSpace(FX_WCHAR ch) { | 25 bool FDE_IsXMLWhiteSpace(FX_WCHAR ch) { |
24 return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09; | 26 return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09; |
25 } | 27 } |
26 | 28 |
(...skipping 1804 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1831 | 1833 |
1832 FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const { | 1834 FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const { |
1833 if (!m_pStream) | 1835 if (!m_pStream) |
1834 return 0; | 1836 return 0; |
1835 | 1837 |
1836 int32_t nSrcLen = m_pStart - m_pBuffer; | 1838 int32_t nSrcLen = m_pStart - m_pBuffer; |
1837 int32_t nDstLen = FX_GetUTF8EncodeLength(m_pBuffer, nSrcLen); | 1839 int32_t nDstLen = FX_GetUTF8EncodeLength(m_pBuffer, nSrcLen); |
1838 return m_iParsedBytes + nDstLen; | 1840 return m_iParsedBytes + nDstLen; |
1839 } | 1841 } |
1840 | 1842 |
1841 void CFDE_XMLSyntaxParser::ParseTextChar(FX_WCHAR ch) { | 1843 void CFDE_XMLSyntaxParser::ParseTextChar(FX_WCHAR character) { |
1842 if (m_iIndexInBlock == m_iAllocStep) { | 1844 if (m_iIndexInBlock == m_iAllocStep) { |
1843 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); | 1845 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); |
1844 if (!m_pCurrentBlock) { | 1846 if (!m_pCurrentBlock) { |
1845 return; | 1847 return; |
1846 } | 1848 } |
1847 } | 1849 } |
1848 m_pCurrentBlock[m_iIndexInBlock++] = ch; | 1850 m_pCurrentBlock[m_iIndexInBlock++] = character; |
1849 m_iDataLength++; | 1851 m_iDataLength++; |
1850 if (m_iEntityStart > -1 && ch == L';') { | 1852 if (m_iEntityStart > -1 && character == L';') { |
1851 CFX_WideString csEntity; | 1853 CFX_WideString csEntity; |
1852 m_BlockBuffer.GetTextData(csEntity, m_iEntityStart + 1, | 1854 m_BlockBuffer.GetTextData(csEntity, m_iEntityStart + 1, |
1853 (m_iDataLength - 1) - m_iEntityStart - 1); | 1855 (m_iDataLength - 1) - m_iEntityStart - 1); |
1854 int32_t iLen = csEntity.GetLength(); | 1856 int32_t iLen = csEntity.GetLength(); |
1855 if (iLen > 0) { | 1857 if (iLen > 0) { |
1856 if (csEntity[0] == L'#') { | 1858 if (csEntity[0] == L'#') { |
1857 ch = 0; | 1859 uint32_t ch = 0; |
1858 FX_WCHAR w; | 1860 FX_WCHAR w; |
1859 if (iLen > 1 && csEntity[1] == L'x') { | 1861 if (iLen > 1 && csEntity[1] == L'x') { |
1860 for (int32_t i = 2; i < iLen; i++) { | 1862 for (int32_t i = 2; i < iLen; i++) { |
1861 w = csEntity[i]; | 1863 w = csEntity[i]; |
1862 if (w >= L'0' && w <= L'9') { | 1864 if (w >= L'0' && w <= L'9') { |
1863 ch = (ch << 4) + w - L'0'; | 1865 ch = (ch << 4) + w - L'0'; |
1864 } else if (w >= L'A' && w <= L'F') { | 1866 } else if (w >= L'A' && w <= L'F') { |
1865 ch = (ch << 4) + w - 55; | 1867 ch = (ch << 4) + w - 55; |
1866 } else if (w >= L'a' && w <= L'f') { | 1868 } else if (w >= L'a' && w <= L'f') { |
1867 ch = (ch << 4) + w - 87; | 1869 ch = (ch << 4) + w - 87; |
1868 } else { | 1870 } else { |
1869 break; | 1871 break; |
1870 } | 1872 } |
1871 } | 1873 } |
1872 } else { | 1874 } else { |
1873 for (int32_t i = 1; i < iLen; i++) { | 1875 for (int32_t i = 1; i < iLen; i++) { |
1874 w = csEntity[i]; | 1876 w = csEntity[i]; |
1875 if (w < L'0' || w > L'9') { | 1877 if (w < L'0' || w > L'9') |
1876 break; | 1878 break; |
1877 } | |
1878 ch = ch * 10 + w - L'0'; | 1879 ch = ch * 10 + w - L'0'; |
1879 } | 1880 } |
1880 } | 1881 } |
1881 if (ch != 0) { | 1882 if (ch > kMaxCharRange) |
1882 m_BlockBuffer.SetTextChar(m_iEntityStart, ch); | 1883 ch = ' '; |
| 1884 |
| 1885 character = static_cast<FX_WCHAR>(ch); |
| 1886 if (character != 0) { |
| 1887 m_BlockBuffer.SetTextChar(m_iEntityStart, character); |
1883 m_iEntityStart++; | 1888 m_iEntityStart++; |
1884 } | 1889 } |
1885 } else { | 1890 } else { |
1886 if (csEntity.Compare(L"amp") == 0) { | 1891 if (csEntity.Compare(L"amp") == 0) { |
1887 m_BlockBuffer.SetTextChar(m_iEntityStart, L'&'); | 1892 m_BlockBuffer.SetTextChar(m_iEntityStart, L'&'); |
1888 m_iEntityStart++; | 1893 m_iEntityStart++; |
1889 } else if (csEntity.Compare(L"lt") == 0) { | 1894 } else if (csEntity.Compare(L"lt") == 0) { |
1890 m_BlockBuffer.SetTextChar(m_iEntityStart, L'<'); | 1895 m_BlockBuffer.SetTextChar(m_iEntityStart, L'<'); |
1891 m_iEntityStart++; | 1896 m_iEntityStart++; |
1892 } else if (csEntity.Compare(L"gt") == 0) { | 1897 } else if (csEntity.Compare(L"gt") == 0) { |
1893 m_BlockBuffer.SetTextChar(m_iEntityStart, L'>'); | 1898 m_BlockBuffer.SetTextChar(m_iEntityStart, L'>'); |
1894 m_iEntityStart++; | 1899 m_iEntityStart++; |
1895 } else if (csEntity.Compare(L"apos") == 0) { | 1900 } else if (csEntity.Compare(L"apos") == 0) { |
1896 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\''); | 1901 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\''); |
1897 m_iEntityStart++; | 1902 m_iEntityStart++; |
1898 } else if (csEntity.Compare(L"quot") == 0) { | 1903 } else if (csEntity.Compare(L"quot") == 0) { |
1899 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"'); | 1904 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"'); |
1900 m_iEntityStart++; | 1905 m_iEntityStart++; |
1901 } | 1906 } |
1902 } | 1907 } |
1903 } | 1908 } |
1904 m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart, FALSE); | 1909 m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart, FALSE); |
1905 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); | 1910 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); |
1906 m_iEntityStart = -1; | 1911 m_iEntityStart = -1; |
1907 } else { | 1912 } else { |
1908 if (m_iEntityStart < 0 && ch == L'&') { | 1913 if (m_iEntityStart < 0 && character == L'&') { |
1909 m_iEntityStart = m_iDataLength - 1; | 1914 m_iEntityStart = m_iDataLength - 1; |
1910 } | 1915 } |
1911 } | 1916 } |
1912 m_pStart++; | 1917 m_pStart++; |
1913 } | 1918 } |
OLD | NEW |