| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/base/escape.h" | 5 #include "net/base/escape.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 73 // The basic rule is that we can't unescape anything that would changing parsing | 73 // The basic rule is that we can't unescape anything that would changing parsing |
| 74 // like # or ?. We also can't unescape &, =, or + since that could be part of a | 74 // like # or ?. We also can't unescape &, =, or + since that could be part of a |
| 75 // query and that could change the server's parsing of the query. Nor can we | 75 // query and that could change the server's parsing of the query. Nor can we |
| 76 // unescape \ since googleurl will convert it to a /. | 76 // unescape \ since googleurl will convert it to a /. |
| 77 // | 77 // |
| 78 // Lastly, we can't unescape anything that doesn't have a canonical | 78 // Lastly, we can't unescape anything that doesn't have a canonical |
| 79 // representation in a URL. This means that unescaping will change the URL, and | 79 // representation in a URL. This means that unescaping will change the URL, and |
| 80 // you could get different behavior if you copy and paste the URL, or press | 80 // you could get different behavior if you copy and paste the URL, or press |
| 81 // enter in the URL bar. The list of characters that fall into this category | 81 // enter in the URL bar. The list of characters that fall into this category |
| 82 // are the ones labeled PASS (allow either escaped or unescaped) in the big | 82 // are the ones labeled PASS (allow either escaped or unescaped) in the big |
| 83 // lookup table at the top of googleurl/src/url_canon_path.cc | 83 // lookup table at the top of googleurl/src/url_canon_path.cc. Also, characters |
| 84 // that have CHAR_QUERY set in googleurl/src/url_canon_internal.cc but are not |
| 85 // allowed in query strings according to http://www.ietf.org/rfc/rfc3261.txt are |
| 86 // not unescaped, to avoid turning a valid url according to spec into an |
| 87 // invalid one. |
| 84 const char kUrlUnescape[128] = { | 88 const char kUrlUnescape[128] = { |
| 85 // NULL, control chars... | 89 // NULL, control chars... |
| 86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 90 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 87 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 91 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 88 // ' ' ! " # $ % & ' ( ) * + , - . / | 92 // ' ' ! " # $ % & ' ( ) * + , - . / |
| 89 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, | 93 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, |
| 90 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? | 94 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? |
| 91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, | 95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, |
| 92 // @ A B C D E F G H I J K L M N O | 96 // @ A B C D E F G H I J K L M N O |
| 93 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 97 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 94 // P Q R S T U V W X Y Z [ \ ] ^ _ | 98 // P Q R S T U V W X Y Z [ \ ] ^ _ |
| 95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, | 99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, |
| 96 // ` a b c d e f g h i j k l m n o | 100 // ` a b c d e f g h i j k l m n o |
| 97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 101 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 98 // p q r s t u v w x y z { | } ~ <NBSP> | 102 // p q r s t u v w x y z { | } ~ <NBSP> |
| 99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 | 103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 |
| 100 }; | 104 }; |
| 101 | 105 |
| 102 template<typename STR> | 106 template<typename STR> |
| 103 STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, | 107 STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, |
| 104 UnescapeRule::Type rules, | 108 UnescapeRule::Type rules, |
| 105 std::vector<size_t>* offsets_for_adjustment) { | 109 std::vector<size_t>* offsets_for_adjustment) { |
| 106 if (offsets_for_adjustment) { | 110 if (offsets_for_adjustment) { |
| 107 std::for_each(offsets_for_adjustment->begin(), | 111 std::for_each(offsets_for_adjustment->begin(), |
| 108 offsets_for_adjustment->end(), | 112 offsets_for_adjustment->end(), |
| 109 LimitOffset<STR>(escaped_text.length())); | 113 LimitOffset<STR>(escaped_text.length())); |
| (...skipping 270 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 380 return; | 384 return; |
| 381 } | 385 } |
| 382 if (offset <= (location + 2)) { | 386 if (offset <= (location + 2)) { |
| 383 offset = string16::npos; | 387 offset = string16::npos; |
| 384 return; | 388 return; |
| 385 } | 389 } |
| 386 adjusted_offset -= 2; | 390 adjusted_offset -= 2; |
| 387 } | 391 } |
| 388 offset = adjusted_offset; | 392 offset = adjusted_offset; |
| 389 } | 393 } |
| OLD | NEW |