Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright 2014 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 """Implementation of package repository service. | |
| 6 | |
| 7 Definitions: | |
| 8 * Package: a named set of files that can be deployed to a client. Package name | |
| 9 is a path-like string, e.g. "infra/tools/cipd". | |
| 10 * Package instance: concrete incarnation of a package, identified by SHA1 of | |
| 11 the package file content. This hex SHA1 digest is referred to as | |
| 12 "instance ID". Package files are deterministically built (i.e. same inputs | |
| 13 produce exact same outputs) and thus instance IDs also depend only on the | |
| 14 contents of the package. | |
| 15 * Package instance metadata: describes when and how package instance was built | |
| 16 and uploaded. It is not a part of a package file, since metadata contains | |
| 17 non-deterministic fields like timestamp. | |
| 18 * Package instance signature: a signature of a package file. Each package | |
| 19 instance may have >=0 signatures attached. The service itself doesn't check | |
| 20 them, but clients do when they download the package. | |
|
nodir
2014/12/30 22:54:01
Will metadata be signed?
Vadim Sh.
2014/12/31 01:27:35
There's a separate thing called "manifest" that is
nodir
2015/01/02 19:13:04
Acknowledged.
| |
| 21 | |
| 22 Package instances are stored in the following way: | |
| 23 * Package file itself is stored in CAS (implemented on top of Google Storage), | |
| 24 using SHA1 content hash (i.e. instance ID) as identifier. | |
| 25 * Package metadata and list of package signatures are stored in the Datastore, | |
| 26 in append-only entity (see PackageInstance class). | |
| 27 | |
| 28 All PackageInstance that belong to the same package are stored in the same | |
| 29 entity group (with root key derived from package name, see Package entity). | |
| 30 | |
| 31 Package entity (even though it is empty) is also instantiated in the datastore | |
| 32 to make possible querying for a list of known packages. | |
| 33 """ | |
| 34 | |
| 35 import re | |
| 36 | |
| 37 from components import auth | |
| 38 from google.appengine.ext import ndb | |
| 39 | |
| 40 import cas | |
| 41 | |
| 42 | |
| 43 # Regular expression for a package name: <word>/<word/<word>. Package names must | |
| 44 # be lower case. | |
| 45 PACKAGE_NAME_RE = re.compile(r'^([a-z0-9_\-\.]+/)*[a-z0-9_\-\.]+$') | |
| 46 | |
| 47 # Hash algorithm used to derive package instance ID from package data. | |
| 48 DIGEST_ALGO = 'SHA1' | |
|
nodir
2014/12/30 22:54:01
I assume this will never change since you use shas
Vadim Sh.
2014/12/31 01:27:36
Yes.
| |
| 49 | |
| 50 | |
| 51 def is_valid_package_name(package_name): | |
| 52 """True if string looks like a valid package name.""" | |
| 53 return bool(PACKAGE_NAME_RE.match(package_name)) | |
| 54 | |
| 55 | |
| 56 def is_valid_instance_id(instance_id): | |
| 57 """True if string looks like a valida package instance ID.""" | |
|
nodir
2014/12/30 22:54:01
typo: valid
Vadim Sh.
2014/12/31 01:27:36
Done.
| |
| 58 return cas.is_valid_hash_digest(DIGEST_ALGO, instance_id) | |
| 59 | |
| 60 | |
| 61 def get_repo_service(): | |
| 62 """Factory method that returns configured RepoService instance. | |
| 63 | |
| 64 If the service is not configured, returns None. Also acts as a mocking point | |
| 65 for unit tests. | |
| 66 """ | |
| 67 cas_service = cas.get_cas_service() | |
| 68 return RepoService(cas_service) if cas_service else None | |
| 69 | |
| 70 | |
| 71 class RepoService(object): | |
| 72 """Package repository service.""" | |
| 73 | |
| 74 def __init__(self, cas_service): | |
| 75 self.cas_service = cas_service | |
| 76 | |
| 77 def get_instance(self, package_name, instance_id): | |
| 78 """Returns PackageInstance entity if such instance is registered. | |
| 79 | |
| 80 Args: | |
| 81 package_name: name of the package, e.g. 'infra/tools/cipd'. | |
| 82 instance_id: identified of the package instance (SHA1 of package content). | |
| 83 | |
| 84 Returns: | |
| 85 PackageInstance or None. | |
| 86 """ | |
| 87 return package_instance_key(package_name, instance_id).get() | |
| 88 | |
| 89 @ndb.transactional | |
| 90 def register_instance(self, package_name, instance_id, metadata, signatures): | |
| 91 """Makes new PackageInstance or updates signatures on existing one. | |
|
nodir
2014/12/30 22:54:01
"of an existing one"
Vadim Sh.
2014/12/31 01:27:36
Done.
| |
| 92 | |
| 93 Caller must verify that package data is already uploaded to CAS (by using | |
| 94 is_data_uploaded method). If such PackageInstance already exists, all new | |
| 95 signatures (passed via |signatures|) will be added to the package signature | |
|
nodir
2014/12/30 22:54:01
just "...all new |signatures| will ..."
Vadim Sh.
2014/12/31 01:27:36
Done.
| |
| 96 list. Metadata of existing packages is never changed. | |
|
nodir
2014/12/30 22:54:01
Why you don't just raise an exception of a package
Vadim Sh.
2014/12/31 01:27:36
Well.. okay. It is more transactions in case of a
| |
| 97 | |
| 98 Args: | |
| 99 package_name: name of the package, e.g. 'infra/tools/cipd'. | |
| 100 instance_id: identified of the package instance (SHA1 of package content). | |
|
nodir
2014/12/30 22:54:01
typo?: identity of the package instance
Vadim Sh.
2014/12/31 01:27:35
Done.
| |
| 101 metadata: PackageInstanceMetadata object. | |
| 102 signatures: list of PackageInstanceSignature objects. | |
| 103 | |
| 104 Returns: | |
| 105 (PackageInstance entity, True if registered or False if existed). | |
| 106 """ | |
| 107 key = package_instance_key(package_name, instance_id) | |
| 108 inst = key.get() | |
| 109 if inst is not None: | |
| 110 if inst._add_signatures(signatures): | |
| 111 inst.put() | |
| 112 return inst, False | |
| 113 Package(key=key.parent()).put() | |
| 114 inst = PackageInstance( | |
| 115 key=key, | |
| 116 metadata=metadata, | |
| 117 signatures=signatures, | |
| 118 signature_ids=[s.identifier for s in signatures]) | |
| 119 inst.put() | |
| 120 return inst, True | |
| 121 | |
| 122 @ndb.transactional | |
| 123 def add_signatures(self, package_name, instance_id, signatures): | |
| 124 """Updates signatures on existing package instance. | |
|
nodir
2014/12/30 22:54:00
of an existing package
Vadim Sh.
2014/12/31 01:27:35
Done.
| |
| 125 | |
| 126 Args: | |
| 127 package_name: name of the package, e.g. 'infra/tools/cipd'. | |
| 128 instance_id: identified of the package instance (SHA1 of package content). | |
|
nodir
2014/12/30 22:54:00
typo? identity
Vadim Sh.
2014/12/31 01:27:35
Copy-pasted typo.
| |
| 129 signatures: list of PackageInstanceSignature objects. | |
| 130 | |
| 131 Returns: | |
| 132 PackageInstance entity with updated list of signatures. | |
| 133 | |
| 134 Raises: | |
| 135 ValueError is package instance is missing. | |
|
nodir
2014/12/30 22:54:01
s/is/if
Vadim Sh.
2014/12/31 01:27:35
Done.
| |
| 136 """ | |
| 137 inst = package_instance_key(package_name, instance_id).get() | |
| 138 if not inst: | |
| 139 raise ValueError('Package instance is not registered') | |
|
nodir
2014/12/30 22:54:01
Consider being more explicit, PackageNotFoundError
Vadim Sh.
2014/12/31 01:27:35
Done.
| |
| 140 if inst._add_signatures(signatures): | |
| 141 inst.put() | |
| 142 return inst | |
| 143 | |
| 144 def is_data_uploaded(self, package_name, instance_id): | |
|
nodir
2014/12/30 22:54:01
what is data? Maybe rename to "is_instance_uploade
Vadim Sh.
2014/12/31 01:27:36
Done.
| |
| 145 """Returns True if package instance data is uploaded to CAS. | |
| 146 | |
| 147 Args: | |
| 148 package_name: name of the package, e.g. 'infra/tools/cipd'. | |
| 149 instance_id: identified of the package instance (SHA1 of package content). | |
| 150 | |
| 151 Returns: | |
| 152 True or False. | |
| 153 """ | |
| 154 assert is_valid_package_name(package_name), package_name | |
| 155 assert is_valid_instance_id(instance_id), instance_id | |
| 156 return self.cas_service.is_object_present(DIGEST_ALGO, instance_id) | |
| 157 | |
| 158 def create_upload_session(self, package_name, instance_id, caller): | |
| 159 """Opens new session for data upload to CAS. | |
|
nodir
2014/12/30 22:54:01
typo: Opens a new
Vadim Sh.
2014/12/31 01:27:35
Done.
| |
| 160 | |
| 161 Args: | |
| 162 package_name: name of the package, e.g. 'infra/tools/cipd'. | |
| 163 instance_id: identified of the package instance (SHA1 of package content). | |
|
nodir
2014/12/30 22:54:01
typo? identity
Vadim Sh.
2014/12/31 01:27:35
Done.
| |
| 164 caller: auth.Identity of whoever is opening an upload session. | |
| 165 | |
| 166 Returns: | |
| 167 (upload URL to upload data to, upload session ID to pass to CAS API). | |
| 168 """ | |
| 169 assert is_valid_package_name(package_name), package_name | |
| 170 assert is_valid_instance_id(instance_id), instance_id | |
| 171 upload_session, upload_session_id = self.cas_service.create_upload_session( | |
| 172 DIGEST_ALGO, instance_id, caller) | |
| 173 return upload_session.upload_url, upload_session_id | |
| 174 | |
| 175 | |
| 176 ################################################################################ | |
| 177 | |
| 178 | |
| 179 class PackageInstanceMetadata(ndb.Model): | |
| 180 """Description of how the package instance was built and registered. | |
| 181 | |
| 182 Used only as a structured property inside PackageInstance entity. | |
| 183 """ | |
| 184 # Date then the package was built (as provided by the client). | |
| 185 date = ndb.StringProperty(indexed=False) | |
|
nodir
2014/12/30 22:54:01
Why string?
Vadim Sh.
2014/12/31 01:27:35
Eh.. because I'm lazy. No longer relevant.
| |
| 186 # Hostname of the machine that built the package (as provided by the client). | |
| 187 hostname = ndb.StringProperty() | |
| 188 # Unix username used to build the package (as provided by the client). | |
| 189 user = ndb.StringProperty() | |
| 190 | |
| 191 # Who registered the instance. | |
| 192 registered_by = auth.IdentityProperty() | |
| 193 # When the instance was registered. | |
| 194 registered_ts = ndb.DateTimeProperty() | |
| 195 | |
| 196 | |
| 197 class PackageInstanceSignature(ndb.Model): | |
| 198 """Single signature. Each package instance can have multiple signatures. | |
| 199 | |
| 200 Used only as a structured property inside PackageInstance entity. | |
| 201 """ | |
| 202 # Name of the hashing algorithm used to obtain the digest. | |
| 203 hash_algo = ndb.StringProperty() | |
| 204 # Blob with package digest. | |
| 205 digest = ndb.BlobProperty() | |
| 206 # Algorithm used to compute the signature. | |
| 207 signature_algo = ndb.StringProperty() | |
| 208 # Fingerprint of the public key that can be used to validate the signature. | |
| 209 signature_key = ndb.StringProperty() | |
| 210 # Blob with the signature data. | |
| 211 signature = ndb.BlobProperty() | |
| 212 | |
| 213 # Who added this signature to the list. | |
| 214 added_by = auth.IdentityProperty() | |
| 215 # When the signature was added. | |
| 216 added_ts = ndb.DateTimeProperty() | |
| 217 | |
| 218 @property | |
| 219 def identifier(self): | |
| 220 """String that identifies how the signature was obtained.""" | |
|
nodir
2014/12/30 22:54:01
I'd call this property "fingerprint" or something.
Vadim Sh.
2014/12/31 01:27:36
signature_key itself is fingerprint (hash of publi
| |
| 221 return '%s:%s:%s' % ( | |
| 222 self.hash_algo, self.signature_algo, self.signature_key) | |
| 223 | |
| 224 | |
| 225 class Package(ndb.Model): | |
| 226 """Entity root for PackageInstance entities for some particular package. | |
| 227 | |
| 228 Id is a package name. | |
| 229 """ | |
| 230 | |
| 231 | |
| 232 class PackageInstance(ndb.Model): | |
| 233 """Represents some uploaded package instance. | |
| 234 | |
| 235 ID is package instance ID (SHA1 hex digest of package body). | |
| 236 Parent entity is Package(id=package_name). | |
| 237 """ | |
| 238 # Describes how package was built and registered. Read only. | |
| 239 metadata = ndb.StructuredProperty(PackageInstanceMetadata, required=True) | |
| 240 # Append only list of package signatures. | |
| 241 signatures = ndb.LocalStructuredProperty( | |
| 242 PackageInstanceSignature, repeated=True, compressed=True) | |
| 243 # Indexed list of signature identifies. Kept in sync with 'signatures'. | |
| 244 signature_ids = ndb.StringProperty(repeated=True) | |
|
nodir
2014/12/30 22:54:01
I'd call this signature_fingerprints. I don't know
Vadim Sh.
2014/12/31 01:27:35
Renamed to signature_keys. I plan to use it for qu
| |
| 245 | |
| 246 def _add_signatures(self, signatures): | |
| 247 """Adds new signatures to signature list (skips existing ones).""" | |
|
nodir
2014/12/30 22:54:01
document Returns
Vadim Sh.
2014/12/31 01:27:35
Done.
| |
| 248 def add_one(sig): | |
| 249 for s in self.signatures: | |
| 250 if s.identifier == sig.identifier and s.signature == sig.signature: | |
| 251 return False | |
| 252 self.signatures.append(sig) | |
| 253 self.signature_ids.append(sig.identifier) | |
| 254 return True | |
| 255 modified = False | |
| 256 for s in signatures: | |
| 257 if add_one(s): | |
| 258 modified = True | |
| 259 return modified | |
| 260 | |
|
nodir
2014/12/30 22:54:01
Having a _pre_put_hook in models that checks invar
Vadim Sh.
2014/12/31 01:27:35
Done.
| |
| 261 | |
| 262 def package_key(package_name): | |
| 263 """Returns ndb.Key corresponding to particular Package entity.""" | |
| 264 assert is_valid_package_name(package_name), package_name | |
| 265 return ndb.Key(Package, package_name) | |
| 266 | |
| 267 | |
| 268 def package_instance_key(package_name, instance_id): | |
| 269 """Returns ndb.Key corresponding to particular PackageInstance.""" | |
| 270 assert is_valid_instance_id(instance_id), instance_id | |
|
nodir
2014/12/30 22:54:00
Also validate package name
Vadim Sh.
2014/12/31 01:27:35
It is validated in package_key
nodir
2015/01/02 19:13:04
Acknowledged.
| |
| 271 return ndb.Key(PackageInstance, instance_id, parent=package_key(package_name)) | |
| OLD | NEW |