OLD | NEW |
1 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 from collections import defaultdict | 5 from collections import defaultdict |
6 import posixpath | 6 import posixpath |
7 | 7 |
| 8 from future import Gettable, Future |
8 from path_util import SplitParent | 9 from path_util import SplitParent |
9 from special_paths import SITE_VERIFICATION_FILE | 10 from special_paths import SITE_VERIFICATION_FILE |
10 | 11 |
11 | 12 |
12 def _SimplifyFileName(file_name): | 13 def _SimplifyFileName(file_name): |
13 return (posixpath.splitext(file_name)[0] | 14 return (posixpath.splitext(file_name)[0] |
14 .lower() | 15 .lower() |
15 .replace('.', '') | 16 .replace('.', '') |
16 .replace('-', '') | 17 .replace('-', '') |
17 .replace('_', '')) | 18 .replace('_', '')) |
18 | 19 |
19 | 20 |
20 class PathCanonicalizer(object): | 21 class PathCanonicalizer(object): |
21 '''Transforms paths into their canonical forms. Since the docserver has had | 22 '''Transforms paths into their canonical forms. Since the docserver has had |
22 many incarnations - e.g. there didn't use to be apps/ - there may be old | 23 many incarnations - e.g. there didn't use to be apps/ - there may be old |
23 paths lying around the webs. We try to redirect those to where they are now. | 24 paths lying around the webs. We try to redirect those to where they are now. |
24 ''' | 25 ''' |
25 def __init__(self, | 26 def __init__(self, |
26 file_system, | 27 file_system, |
27 object_store_creator, | 28 object_store_creator, |
28 strip_extensions): | 29 strip_extensions): |
29 # |strip_extensions| is a list of file extensions (e.g. .html) that should | 30 # |strip_extensions| is a list of file extensions (e.g. .html) that should |
30 # be stripped for a path's canonical form. | 31 # be stripped for a path's canonical form. |
31 self._cache = object_store_creator.Create( | 32 self._cache = object_store_creator.Create( |
32 PathCanonicalizer, category=file_system.GetIdentity()) | 33 PathCanonicalizer, category=file_system.GetIdentity()) |
33 self._file_system = file_system | 34 self._file_system = file_system |
34 self._strip_extensions = strip_extensions | 35 self._strip_extensions = strip_extensions |
35 | 36 |
36 def _LoadCache(self): | 37 def _LoadCache(self): |
37 cached = self._cache.GetMulti(('canonical_paths', | 38 cached_future = self._cache.GetMulti(('canonical_paths', |
38 'simplified_paths_map')).Get() | 39 'simplified_paths_map')) |
39 | 40 |
40 # |canonical_paths| is the pre-calculated set of canonical paths. | 41 def resolve(): |
41 # |simplified_paths_map| is a lazily populated mapping of simplified file | 42 # |canonical_paths| is the pre-calculated set of canonical paths. |
42 # names to a list of full paths that contain them. For example, | 43 # |simplified_paths_map| is a lazily populated mapping of simplified file |
43 # - browseraction: [extensions/browserAction.html] | 44 # names to a list of full paths that contain them. For example, |
44 # - storage: [apps/storage.html, extensions/storage.html] | 45 # - browseraction: [extensions/browserAction.html] |
45 canonical_paths, simplified_paths_map = ( | 46 # - storage: [apps/storage.html, extensions/storage.html] |
46 cached.get('canonical_paths'), cached.get('simplified_paths_map')) | 47 cached = cached_future.Get() |
| 48 canonical_paths, simplified_paths_map = ( |
| 49 cached.get('canonical_paths'), cached.get('simplified_paths_map')) |
47 | 50 |
48 if canonical_paths is None: | 51 if canonical_paths is None: |
49 assert simplified_paths_map is None | 52 assert simplified_paths_map is None |
50 canonical_paths = set() | 53 canonical_paths = set() |
51 simplified_paths_map = defaultdict(list) | 54 simplified_paths_map = defaultdict(list) |
52 for base, dirs, files in self._file_system.Walk(''): | 55 for base, dirs, files in self._file_system.Walk(''): |
53 for path in dirs + files: | 56 for path in dirs + files: |
54 path_without_ext, ext = posixpath.splitext(path) | 57 path_without_ext, ext = posixpath.splitext(path) |
55 canonical_path = posixpath.join(base, path_without_ext) | 58 canonical_path = posixpath.join(base, path_without_ext) |
56 if (ext not in self._strip_extensions or | 59 if (ext not in self._strip_extensions or |
57 path == SITE_VERIFICATION_FILE): | 60 path == SITE_VERIFICATION_FILE): |
58 canonical_path += ext | 61 canonical_path += ext |
59 canonical_paths.add(canonical_path) | 62 canonical_paths.add(canonical_path) |
60 simplified_paths_map[_SimplifyFileName(path)].append(canonical_path) | 63 simplified_paths_map[_SimplifyFileName(path)].append(canonical_path) |
61 # Store |simplified_paths_map| sorted. Ties in length are broken by taking | 64 # Store |simplified_paths_map| sorted. Ties in length are broken by |
62 # the shortest, lexicographically smallest path. | 65 # taking the shortest, lexicographically smallest path. |
63 for path_list in simplified_paths_map.itervalues(): | 66 for path_list in simplified_paths_map.itervalues(): |
64 path_list.sort(key=lambda p: (len(p), p)) | 67 path_list.sort(key=lambda p: (len(p), p)) |
65 self._cache.SetMulti({ | 68 self._cache.SetMulti({ |
66 'canonical_paths': canonical_paths, | 69 'canonical_paths': canonical_paths, |
67 'simplified_paths_map': simplified_paths_map, | 70 'simplified_paths_map': simplified_paths_map, |
68 }) | 71 }) |
69 else: | 72 else: |
70 assert simplified_paths_map is not None | 73 assert simplified_paths_map is not None |
71 | 74 |
72 return canonical_paths, simplified_paths_map | 75 return canonical_paths, simplified_paths_map |
| 76 |
| 77 return Future(delegate=Gettable(resolve)) |
73 | 78 |
74 def Canonicalize(self, path): | 79 def Canonicalize(self, path): |
75 '''Returns the canonical path for |path|. | 80 '''Returns the canonical path for |path|. |
76 ''' | 81 ''' |
77 canonical_paths, simplified_paths_map = self._LoadCache() | 82 canonical_paths, simplified_paths_map = self._LoadCache().Get() |
78 | 83 |
79 # Path may already be the canonical path. | 84 # Path may already be the canonical path. |
80 if path in canonical_paths: | 85 if path in canonical_paths: |
81 return path | 86 return path |
82 | 87 |
83 # Path not found. Our single heuristic: find |base| in the directory | 88 # Path not found. Our single heuristic: find |base| in the directory |
84 # structure with the longest common prefix of |path|. | 89 # structure with the longest common prefix of |path|. |
85 _, base = SplitParent(path) | 90 _, base = SplitParent(path) |
86 potential_paths = simplified_paths_map.get(_SimplifyFileName(base)) | 91 potential_paths = simplified_paths_map.get(_SimplifyFileName(base)) |
87 if not potential_paths: | 92 if not potential_paths: |
88 # There is no file with anything close to that name. | 93 # There is no file with anything close to that name. |
89 return path | 94 return path |
90 | 95 |
91 # The most likely canonical file is the one with the longest common prefix | 96 # The most likely canonical file is the one with the longest common prefix |
92 # with |path|. This is slightly weaker than it could be; |path| is | 97 # with |path|. This is slightly weaker than it could be; |path| is |
93 # compared, not the simplified form of |path|, which may matter. | 98 # compared, not the simplified form of |path|, which may matter. |
94 max_prefix = potential_paths[0] | 99 max_prefix = potential_paths[0] |
95 max_prefix_length = len(posixpath.commonprefix((max_prefix, path))) | 100 max_prefix_length = len(posixpath.commonprefix((max_prefix, path))) |
96 for path_for_file in potential_paths[1:]: | 101 for path_for_file in potential_paths[1:]: |
97 prefix_length = len(posixpath.commonprefix((path_for_file, path))) | 102 prefix_length = len(posixpath.commonprefix((path_for_file, path))) |
98 if prefix_length > max_prefix_length: | 103 if prefix_length > max_prefix_length: |
99 max_prefix, max_prefix_length = path_for_file, prefix_length | 104 max_prefix, max_prefix_length = path_for_file, prefix_length |
100 | 105 |
101 return max_prefix | 106 return max_prefix |
| 107 |
| 108 def Cron(self): |
| 109 return self._LoadCache() |
OLD | NEW |