OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/escape.h" | 5 #include "net/base/escape.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
73 // The basic rule is that we can't unescape anything that would changing parsing | 73 // The basic rule is that we can't unescape anything that would changing parsing |
74 // like # or ?. We also can't unescape &, =, or + since that could be part of a | 74 // like # or ?. We also can't unescape &, =, or + since that could be part of a |
75 // query and that could change the server's parsing of the query. Nor can we | 75 // query and that could change the server's parsing of the query. Nor can we |
76 // unescape \ since googleurl will convert it to a /. | 76 // unescape \ since googleurl will convert it to a /. |
77 // | 77 // |
78 // Lastly, we can't unescape anything that doesn't have a canonical | 78 // Lastly, we can't unescape anything that doesn't have a canonical |
79 // representation in a URL. This means that unescaping will change the URL, and | 79 // representation in a URL. This means that unescaping will change the URL, and |
80 // you could get different behavior if you copy and paste the URL, or press | 80 // you could get different behavior if you copy and paste the URL, or press |
81 // enter in the URL bar. The list of characters that fall into this category | 81 // enter in the URL bar. The list of characters that fall into this category |
82 // are the ones labeled PASS (allow either escaped or unescaped) in the big | 82 // are the ones labeled PASS (allow either escaped or unescaped) in the big |
83 // lookup table at the top of googleurl/src/url_canon_path.cc | 83 // lookup table at the top of googleurl/src/url_canon_path.cc. Also, characters |
| 84 // that have CHAR_QUERY set in googleurl/src/url_canon_internal.cc but are not |
| 85 // allowed in query strings according to http://www.ietf.org/rfc/rfc3261.txt are |
| 86 // not unescaped, to avoid turning a valid url according to spec into an |
| 87 // invalid one. |
84 const char kUrlUnescape[128] = { | 88 const char kUrlUnescape[128] = { |
85 // NULL, control chars... | 89 // NULL, control chars... |
86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 90 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
87 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 91 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
88 // ' ' ! " # $ % & ' ( ) * + , - . / | 92 // ' ' ! " # $ % & ' ( ) * + , - . / |
89 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, | 93 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, |
90 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? | 94 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? |
91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, | 95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, |
92 // @ A B C D E F G H I J K L M N O | 96 // @ A B C D E F G H I J K L M N O |
93 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 97 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
94 // P Q R S T U V W X Y Z [ \ ] ^ _ | 98 // P Q R S T U V W X Y Z [ \ ] ^ _ |
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, | 99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, |
96 // ` a b c d e f g h i j k l m n o | 100 // ` a b c d e f g h i j k l m n o |
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 101 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
98 // p q r s t u v w x y z { | } ~ <NBSP> | 102 // p q r s t u v w x y z { | } ~ <NBSP> |
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 | 103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 |
100 }; | 104 }; |
101 | 105 |
102 template<typename STR> | 106 template<typename STR> |
103 STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, | 107 STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, |
104 UnescapeRule::Type rules, | 108 UnescapeRule::Type rules, |
105 std::vector<size_t>* offsets_for_adjustment) { | 109 std::vector<size_t>* offsets_for_adjustment) { |
106 if (offsets_for_adjustment) { | 110 if (offsets_for_adjustment) { |
107 std::for_each(offsets_for_adjustment->begin(), | 111 std::for_each(offsets_for_adjustment->begin(), |
108 offsets_for_adjustment->end(), | 112 offsets_for_adjustment->end(), |
109 LimitOffset<STR>(escaped_text.length())); | 113 LimitOffset<STR>(escaped_text.length())); |
(...skipping 270 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
380 return; | 384 return; |
381 } | 385 } |
382 if (offset <= (location + 2)) { | 386 if (offset <= (location + 2)) { |
383 offset = string16::npos; | 387 offset = string16::npos; |
384 return; | 388 return; |
385 } | 389 } |
386 adjusted_offset -= 2; | 390 adjusted_offset -= 2; |
387 } | 391 } |
388 offset = adjusted_offset; | 392 offset = adjusted_offset; |
389 } | 393 } |
OLD | NEW |