| OLD | NEW | 
|---|
| 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 #include "content/renderer/savable_resources.h" | 5 #include "content/renderer/savable_resources.h" | 
| 6 | 6 | 
| 7 #include <set> | 7 #include <set> | 
| 8 | 8 | 
| 9 #include "base/compiler_specific.h" | 9 #include "base/compiler_specific.h" | 
| 10 #include "base/logging.h" | 10 #include "base/logging.h" | 
| (...skipping 17 matching lines...) Expand all  Loading... | 
| 28 using blink::WebLocalFrame; | 28 using blink::WebLocalFrame; | 
| 29 using blink::WebNode; | 29 using blink::WebNode; | 
| 30 using blink::WebNodeList; | 30 using blink::WebNodeList; | 
| 31 using blink::WebString; | 31 using blink::WebString; | 
| 32 using blink::WebVector; | 32 using blink::WebVector; | 
| 33 using blink::WebView; | 33 using blink::WebView; | 
| 34 | 34 | 
| 35 namespace content { | 35 namespace content { | 
| 36 namespace { | 36 namespace { | 
| 37 | 37 | 
| 38 // Structure for storage the unique set of all savable resource links for |  | 
| 39 // making sure that no duplicated resource link in final result. The consumer |  | 
| 40 // of the SavableResourcesUniqueCheck is responsible for keeping these pointers |  | 
| 41 // valid for the lifetime of the SavableResourcesUniqueCheck instance. |  | 
| 42 struct SavableResourcesUniqueCheck { |  | 
| 43   // Unique set of all sub resource links. |  | 
| 44   std::set<GURL>* resources_set; |  | 
| 45   // Unique set of all frame links. |  | 
| 46   std::set<GURL>* frames_set; |  | 
| 47   // Collection of all frames we go through when getting all savable resource |  | 
| 48   // links. |  | 
| 49   std::vector<WebFrame*>* frames; |  | 
| 50 |  | 
| 51   SavableResourcesUniqueCheck() |  | 
| 52       : resources_set(NULL), |  | 
| 53         frames_set(NULL), |  | 
| 54         frames(NULL) {} |  | 
| 55 |  | 
| 56   SavableResourcesUniqueCheck(std::set<GURL>* resources_set, |  | 
| 57       std::set<GURL>* frames_set, std::vector<WebFrame*>* frames) |  | 
| 58       : resources_set(resources_set), |  | 
| 59         frames_set(frames_set), |  | 
| 60         frames(frames) {} |  | 
| 61 }; |  | 
| 62 |  | 
| 63 // Get all savable resource links from current element. One element might | 38 // Get all savable resource links from current element. One element might | 
| 64 // have more than one resource link. It is possible to have some links | 39 // have more than one resource link. It is possible to have some links | 
| 65 // in one CSS stylesheet. | 40 // in one CSS stylesheet. | 
| 66 void GetSavableResourceLinkForElement( | 41 void GetSavableResourceLinkForElement( | 
| 67     const WebElement& element, | 42     const WebElement& element, | 
| 68     const WebDocument& current_doc, | 43     const WebDocument& current_doc, | 
| 69     SavableResourcesUniqueCheck* unique_check, |  | 
| 70     SavableResourcesResult* result) { | 44     SavableResourcesResult* result) { | 
| 71 | 45   // Skipping frame and iframe tag. | 
| 72   // Handle frame and iframe tag. | 46   if (element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame")) { | 
| 73   if (element.hasHTMLTagName("iframe") || |  | 
| 74       element.hasHTMLTagName("frame")) { |  | 
| 75     WebFrame* sub_frame = WebLocalFrame::fromFrameOwnerElement(element); |  | 
| 76     if (sub_frame) |  | 
| 77       unique_check->frames->push_back(sub_frame); |  | 
| 78     return; | 47     return; | 
| 79   } | 48   } | 
| 80 | 49 | 
| 81   // Check whether the node has sub resource URL or not. | 50   // Check whether the node has sub resource URL or not. | 
| 82   WebString value = GetSubResourceLinkFromElement(element); | 51   WebString value = GetSubResourceLinkFromElement(element); | 
| 83   if (value.isNull()) | 52   if (value.isNull()) | 
| 84     return; | 53     return; | 
| 85   // Get absolute URL. | 54   // Get absolute URL. | 
| 86   GURL u = current_doc.completeURL(value); | 55   GURL u = current_doc.completeURL(value); | 
| 87   // ignore invalid URL | 56   // ignore invalid URL | 
| 88   if (!u.is_valid()) | 57   if (!u.is_valid()) | 
| 89     return; | 58     return; | 
| 90   // Ignore those URLs which are not standard protocols. Because FTP | 59   // Ignore those URLs which are not standard protocols. Because FTP | 
| 91   // protocol does no have cache mechanism, we will skip all | 60   // protocol does no have cache mechanism, we will skip all | 
| 92   // sub-resources if they use FTP protocol. | 61   // sub-resources if they use FTP protocol. | 
| 93   if (!u.SchemeIsHTTPOrHTTPS() && !u.SchemeIs(url::kFileScheme)) | 62   if (!u.SchemeIsHTTPOrHTTPS() && !u.SchemeIs(url::kFileScheme)) | 
| 94     return; | 63     return; | 
| 95   // Ignore duplicated resource link. | 64   // Ignore duplicated resource link. | 
| 96   if (!unique_check->resources_set->insert(u).second) |  | 
| 97     return; |  | 
| 98   result->resources_list->push_back(u); | 65   result->resources_list->push_back(u); | 
| 99   // Insert referrer for above new resource link. | 66   // Insert referrer for above new resource link. | 
| 100   result->referrer_urls_list->push_back(GURL()); | 67   result->referrer_urls_list->push_back(GURL()); | 
| 101   result->referrer_policies_list->push_back(blink::WebReferrerPolicyDefault); | 68   result->referrer_policies_list->push_back(blink::WebReferrerPolicyDefault); | 
| 102 } | 69 } | 
| 103 | 70 | 
| 104 // Get all savable resource links from current WebFrameImpl object pointer. | 71 }  // namespace | 
| 105 void GetAllSavableResourceLinksForFrame(WebFrame* current_frame, | 72 | 
| 106     SavableResourcesUniqueCheck* unique_check, | 73 bool GetSavableResourceLinksForFrame(WebFrame* current_frame, | 
| 107     SavableResourcesResult* result, | 74                                      SavableResourcesResult* result, | 
| 108     const char** savable_schemes) { | 75                                      const char** savable_schemes) { | 
| 109   // Get current frame's URL. | 76   // Get current frame's URL. | 
| 110   GURL current_frame_url = current_frame->document().url(); | 77   GURL current_frame_url = current_frame->document().url(); | 
| 111 | 78 | 
| 112   // If url of current frame is invalid, ignore it. | 79   // If url of current frame is invalid, ignore it. | 
| 113   if (!current_frame_url.is_valid()) | 80   if (!current_frame_url.is_valid()) | 
| 114     return; | 81     return false; | 
| 115 | 82 | 
| 116   // If url of current frame is not a savable protocol, ignore it. | 83   // If url of current frame is not a savable protocol, ignore it. | 
| 117   bool is_valid_protocol = false; | 84   bool is_valid_protocol = false; | 
| 118   for (int i = 0; savable_schemes[i] != NULL; ++i) { | 85   for (int i = 0; savable_schemes[i] != NULL; ++i) { | 
| 119     if (current_frame_url.SchemeIs(savable_schemes[i])) { | 86     if (current_frame_url.SchemeIs(savable_schemes[i])) { | 
| 120       is_valid_protocol = true; | 87       is_valid_protocol = true; | 
| 121       break; | 88       break; | 
| 122     } | 89     } | 
| 123   } | 90   } | 
| 124   if (!is_valid_protocol) | 91   if (!is_valid_protocol) | 
| 125     return; | 92     return false; | 
| 126 |  | 
| 127   // If find same frame we have recorded, ignore it. |  | 
| 128   if (!unique_check->frames_set->insert(current_frame_url).second) |  | 
| 129     return; |  | 
| 130 | 93 | 
| 131   // Get current using document. | 94   // Get current using document. | 
| 132   WebDocument current_doc = current_frame->document(); | 95   WebDocument current_doc = current_frame->document(); | 
| 133   // Go through all descent nodes. | 96   // Go through all descent nodes. | 
| 134   WebElementCollection all = current_doc.all(); | 97   WebElementCollection all = current_doc.all(); | 
| 135   // Go through all elements in this frame. | 98   // Go through all elements in this frame. | 
| 136   for (WebElement element = all.firstItem(); !element.isNull(); | 99   for (WebElement element = all.firstItem(); !element.isNull(); | 
| 137        element = all.nextItem()) { | 100        element = all.nextItem()) { | 
| 138     GetSavableResourceLinkForElement(element, | 101     GetSavableResourceLinkForElement(element, | 
| 139                                      current_doc, | 102                                      current_doc, | 
| 140                                      unique_check, |  | 
| 141                                      result); | 103                                      result); | 
| 142   } | 104   } | 
|  | 105 | 
|  | 106   return true; | 
| 143 } | 107 } | 
| 144 | 108 | 
| 145 }  // namespace |  | 
| 146 |  | 
| 147 WebString GetSubResourceLinkFromElement(const WebElement& element) { | 109 WebString GetSubResourceLinkFromElement(const WebElement& element) { | 
| 148   const char* attribute_name = NULL; | 110   const char* attribute_name = NULL; | 
| 149   if (element.hasHTMLTagName("img") || | 111   if (element.hasHTMLTagName("img") || | 
| 150       element.hasHTMLTagName("script")) { | 112       element.hasHTMLTagName("script")) { | 
| 151     attribute_name = "src"; | 113     attribute_name = "src"; | 
| 152   } else if (element.hasHTMLTagName("input")) { | 114   } else if (element.hasHTMLTagName("input")) { | 
| 153     const WebInputElement input = element.toConst<WebInputElement>(); | 115     const WebInputElement input = element.toConst<WebInputElement>(); | 
| 154     if (input.isImageButton()) { | 116     if (input.isImageButton()) { | 
| 155       attribute_name = "src"; | 117       attribute_name = "src"; | 
| 156     } | 118     } | 
| (...skipping 25 matching lines...) Expand all  Loading... | 
| 182   // If value has content and not start with "javascript:" then return it, | 144   // If value has content and not start with "javascript:" then return it, | 
| 183   // otherwise return NULL. | 145   // otherwise return NULL. | 
| 184   if (!value.isNull() && !value.isEmpty() && | 146   if (!value.isNull() && !value.isEmpty() && | 
| 185       !base::StartsWith(value.utf8(), "javascript:", | 147       !base::StartsWith(value.utf8(), "javascript:", | 
| 186                         base::CompareCase::INSENSITIVE_ASCII)) | 148                         base::CompareCase::INSENSITIVE_ASCII)) | 
| 187     return value; | 149     return value; | 
| 188 | 150 | 
| 189   return WebString(); | 151   return WebString(); | 
| 190 } | 152 } | 
| 191 | 153 | 
| 192 // Get all savable resource links from current webview, include main |  | 
| 193 // frame and sub-frame |  | 
| 194 bool GetAllSavableResourceLinksForCurrentPage(WebView* view, |  | 
| 195     const GURL& page_url, SavableResourcesResult* result, |  | 
| 196     const char** savable_schemes) { |  | 
| 197   WebFrame* main_frame = view->mainFrame(); |  | 
| 198   if (!main_frame) |  | 
| 199     return false; |  | 
| 200 |  | 
| 201   std::set<GURL> resources_set; |  | 
| 202   std::set<GURL> frames_set; |  | 
| 203   std::vector<WebFrame*> frames; |  | 
| 204   SavableResourcesUniqueCheck unique_check(&resources_set, |  | 
| 205                                            &frames_set, |  | 
| 206                                            &frames); |  | 
| 207 |  | 
| 208   GURL main_page_gurl(main_frame->document().url()); |  | 
| 209 |  | 
| 210   // Make sure we are saving same page between embedder and webkit. |  | 
| 211   // If page has being navigated, embedder will get three empty vector, |  | 
| 212   // which will make the saving page job ended. |  | 
| 213   if (page_url != main_page_gurl) |  | 
| 214     return true; |  | 
| 215 |  | 
| 216   // First, process main frame. |  | 
| 217   frames.push_back(main_frame); |  | 
| 218 |  | 
| 219   // Check all resource in this page, include sub-frame. |  | 
| 220   for (int i = 0; i < static_cast<int>(frames.size()); ++i) { |  | 
| 221     // Get current frame's all savable resource links. |  | 
| 222     GetAllSavableResourceLinksForFrame(frames[i], &unique_check, result, |  | 
| 223                                        savable_schemes); |  | 
| 224   } |  | 
| 225 |  | 
| 226   // Since frame's src can also point to sub-resources link, so it is possible |  | 
| 227   // that some URLs in frames_list are also in resources_list. For those |  | 
| 228   // URLs, we will remove it from frame_list, only keep them in resources_list. |  | 
| 229   for (std::set<GURL>::iterator it = frames_set.begin(); |  | 
| 230        it != frames_set.end(); ++it) { |  | 
| 231     // Append unique frame source to savable frame list. |  | 
| 232     if (resources_set.find(*it) == resources_set.end()) |  | 
| 233       result->frames_list->push_back(*it); |  | 
| 234   } |  | 
| 235 |  | 
| 236   return true; |  | 
| 237 } |  | 
| 238 |  | 
| 239 }  // namespace content | 154 }  // namespace content | 
| OLD | NEW | 
|---|