Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(254)

Side by Side Diff: chrome/common/extensions/docs/server2/path_canonicalizer.py

Issue 164193003: Docserver: Implement Cron for PathCanonicalizer. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 # Copyright 2013 The Chromium Authors. All rights reserved. 1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 from collections import defaultdict 5 from collections import defaultdict
6 import posixpath 6 import posixpath
7 7
8 from future import Gettable, Future
8 from path_util import SplitParent 9 from path_util import SplitParent
9 from special_paths import SITE_VERIFICATION_FILE 10 from special_paths import SITE_VERIFICATION_FILE
10 11
11 12
12 def _SimplifyFileName(file_name): 13 def _SimplifyFileName(file_name):
13 return (posixpath.splitext(file_name)[0] 14 return (posixpath.splitext(file_name)[0]
14 .lower() 15 .lower()
15 .replace('.', '') 16 .replace('.', '')
16 .replace('-', '') 17 .replace('-', '')
17 .replace('_', '')) 18 .replace('_', ''))
18 19
19 20
20 class PathCanonicalizer(object): 21 class PathCanonicalizer(object):
21 '''Transforms paths into their canonical forms. Since the docserver has had 22 '''Transforms paths into their canonical forms. Since the docserver has had
22 many incarnations - e.g. there didn't use to be apps/ - there may be old 23 many incarnations - e.g. there didn't use to be apps/ - there may be old
23 paths lying around the webs. We try to redirect those to where they are now. 24 paths lying around the webs. We try to redirect those to where they are now.
24 ''' 25 '''
25 def __init__(self, 26 def __init__(self,
26 file_system, 27 file_system,
27 object_store_creator, 28 object_store_creator,
28 strip_extensions): 29 strip_extensions):
29 # |strip_extensions| is a list of file extensions (e.g. .html) that should 30 # |strip_extensions| is a list of file extensions (e.g. .html) that should
30 # be stripped for a path's canonical form. 31 # be stripped for a path's canonical form.
31 self._cache = object_store_creator.Create( 32 self._cache = object_store_creator.Create(
32 PathCanonicalizer, category=file_system.GetIdentity()) 33 PathCanonicalizer, category=file_system.GetIdentity())
33 self._file_system = file_system 34 self._file_system = file_system
34 self._strip_extensions = strip_extensions 35 self._strip_extensions = strip_extensions
35 36
36 def _LoadCache(self): 37 def _LoadCache(self):
37 cached = self._cache.GetMulti(('canonical_paths', 38 cached_future = self._cache.GetMulti(('canonical_paths',
38 'simplified_paths_map')).Get() 39 'simplified_paths_map'))
39 40
40 # |canonical_paths| is the pre-calculated set of canonical paths. 41 def resolve():
41 # |simplified_paths_map| is a lazily populated mapping of simplified file 42 # |canonical_paths| is the pre-calculated set of canonical paths.
42 # names to a list of full paths that contain them. For example, 43 # |simplified_paths_map| is a lazily populated mapping of simplified file
43 # - browseraction: [extensions/browserAction.html] 44 # names to a list of full paths that contain them. For example,
44 # - storage: [apps/storage.html, extensions/storage.html] 45 # - browseraction: [extensions/browserAction.html]
45 canonical_paths, simplified_paths_map = ( 46 # - storage: [apps/storage.html, extensions/storage.html]
46 cached.get('canonical_paths'), cached.get('simplified_paths_map')) 47 cached = cached_future.Get()
48 canonical_paths, simplified_paths_map = (
49 cached.get('canonical_paths'), cached.get('simplified_paths_map'))
47 50
48 if canonical_paths is None: 51 if canonical_paths is None:
49 assert simplified_paths_map is None 52 assert simplified_paths_map is None
50 canonical_paths = set() 53 canonical_paths = set()
51 simplified_paths_map = defaultdict(list) 54 simplified_paths_map = defaultdict(list)
52 for base, dirs, files in self._file_system.Walk(''): 55 for base, dirs, files in self._file_system.Walk(''):
53 for path in dirs + files: 56 for path in dirs + files:
54 path_without_ext, ext = posixpath.splitext(path) 57 path_without_ext, ext = posixpath.splitext(path)
55 canonical_path = posixpath.join(base, path_without_ext) 58 canonical_path = posixpath.join(base, path_without_ext)
56 if (ext not in self._strip_extensions or 59 if (ext not in self._strip_extensions or
57 path == SITE_VERIFICATION_FILE): 60 path == SITE_VERIFICATION_FILE):
58 canonical_path += ext 61 canonical_path += ext
59 canonical_paths.add(canonical_path) 62 canonical_paths.add(canonical_path)
60 simplified_paths_map[_SimplifyFileName(path)].append(canonical_path) 63 simplified_paths_map[_SimplifyFileName(path)].append(canonical_path)
61 # Store |simplified_paths_map| sorted. Ties in length are broken by taking 64 # Store |simplified_paths_map| sorted. Ties in length are broken by
62 # the shortest, lexicographically smallest path. 65 # taking the shortest, lexicographically smallest path.
63 for path_list in simplified_paths_map.itervalues(): 66 for path_list in simplified_paths_map.itervalues():
64 path_list.sort(key=lambda p: (len(p), p)) 67 path_list.sort(key=lambda p: (len(p), p))
65 self._cache.SetMulti({ 68 self._cache.SetMulti({
66 'canonical_paths': canonical_paths, 69 'canonical_paths': canonical_paths,
67 'simplified_paths_map': simplified_paths_map, 70 'simplified_paths_map': simplified_paths_map,
68 }) 71 })
69 else: 72 else:
70 assert simplified_paths_map is not None 73 assert simplified_paths_map is not None
71 74
72 return canonical_paths, simplified_paths_map 75 return canonical_paths, simplified_paths_map
76
77 return Future(delegate=Gettable(resolve))
73 78
74 def Canonicalize(self, path): 79 def Canonicalize(self, path):
75 '''Returns the canonical path for |path|. 80 '''Returns the canonical path for |path|.
76 ''' 81 '''
77 canonical_paths, simplified_paths_map = self._LoadCache() 82 canonical_paths, simplified_paths_map = self._LoadCache().Get()
78 83
79 # Path may already be the canonical path. 84 # Path may already be the canonical path.
80 if path in canonical_paths: 85 if path in canonical_paths:
81 return path 86 return path
82 87
83 # Path not found. Our single heuristic: find |base| in the directory 88 # Path not found. Our single heuristic: find |base| in the directory
84 # structure with the longest common prefix of |path|. 89 # structure with the longest common prefix of |path|.
85 _, base = SplitParent(path) 90 _, base = SplitParent(path)
86 potential_paths = simplified_paths_map.get(_SimplifyFileName(base)) 91 potential_paths = simplified_paths_map.get(_SimplifyFileName(base))
87 if not potential_paths: 92 if not potential_paths:
88 # There is no file with anything close to that name. 93 # There is no file with anything close to that name.
89 return path 94 return path
90 95
91 # The most likely canonical file is the one with the longest common prefix 96 # The most likely canonical file is the one with the longest common prefix
92 # with |path|. This is slightly weaker than it could be; |path| is 97 # with |path|. This is slightly weaker than it could be; |path| is
93 # compared, not the simplified form of |path|, which may matter. 98 # compared, not the simplified form of |path|, which may matter.
94 max_prefix = potential_paths[0] 99 max_prefix = potential_paths[0]
95 max_prefix_length = len(posixpath.commonprefix((max_prefix, path))) 100 max_prefix_length = len(posixpath.commonprefix((max_prefix, path)))
96 for path_for_file in potential_paths[1:]: 101 for path_for_file in potential_paths[1:]:
97 prefix_length = len(posixpath.commonprefix((path_for_file, path))) 102 prefix_length = len(posixpath.commonprefix((path_for_file, path)))
98 if prefix_length > max_prefix_length: 103 if prefix_length > max_prefix_length:
99 max_prefix, max_prefix_length = path_for_file, prefix_length 104 max_prefix, max_prefix_length = path_for_file, prefix_length
100 105
101 return max_prefix 106 return max_prefix
107
108 def Cron(self):
109 return self._LoadCache()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698