Index: appengine/chrome_infra_packages/cipd/impl.py |
diff --git a/appengine/chrome_infra_packages/cipd/impl.py b/appengine/chrome_infra_packages/cipd/impl.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..eb50fc36f15d9a16a18c8dfd1a526c86b43c3dbf |
--- /dev/null |
+++ b/appengine/chrome_infra_packages/cipd/impl.py |
@@ -0,0 +1,271 @@ |
+# Copyright 2014 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+"""Implementation of package repository service. |
+ |
+Definitions: |
+ * Package: a named set of files that can be deployed to a client. Package name |
+ is a path-like string, e.g. "infra/tools/cipd". |
+ * Package instance: concrete incarnation of a package, identified by SHA1 of |
+ the package file content. This hex SHA1 digest is referred to as |
+ "instance ID". Package files are deterministically built (i.e. same inputs |
+ produce exact same outputs) and thus instance IDs also depend only on the |
+ contents of the package. |
+ * Package instance metadata: describes when and how package instance was built |
+ and uploaded. It is not a part of a package file, since metadata contains |
+ non-deterministic fields like timestamp. |
+ * Package instance signature: a signature of a package file. Each package |
+ instance may have >=0 signatures attached. The service itself doesn't check |
+ them, but clients do when they download the package. |
nodir
2014/12/30 22:54:01
Will metadata be signed?
Vadim Sh.
2014/12/31 01:27:35
There's a separate thing called "manifest" that is
nodir
2015/01/02 19:13:04
Acknowledged.
|
+ |
+Package instances are stored in the following way: |
+ * Package file itself is stored in CAS (implemented on top of Google Storage), |
+ using SHA1 content hash (i.e. instance ID) as identifier. |
+ * Package metadata and list of package signatures are stored in the Datastore, |
+ in append-only entity (see PackageInstance class). |
+ |
+All PackageInstance that belong to the same package are stored in the same |
+entity group (with root key derived from package name, see Package entity). |
+ |
+Package entity (even though it is empty) is also instantiated in the datastore |
+to make possible querying for a list of known packages. |
+""" |
+ |
+import re |
+ |
+from components import auth |
+from google.appengine.ext import ndb |
+ |
+import cas |
+ |
+ |
+# Regular expression for a package name: <word>/<word/<word>. Package names must |
+# be lower case. |
+PACKAGE_NAME_RE = re.compile(r'^([a-z0-9_\-\.]+/)*[a-z0-9_\-\.]+$') |
+ |
+# Hash algorithm used to derive package instance ID from package data. |
+DIGEST_ALGO = 'SHA1' |
nodir
2014/12/30 22:54:01
I assume this will never change since you use shas
Vadim Sh.
2014/12/31 01:27:36
Yes.
|
+ |
+ |
+def is_valid_package_name(package_name): |
+ """True if string looks like a valid package name.""" |
+ return bool(PACKAGE_NAME_RE.match(package_name)) |
+ |
+ |
+def is_valid_instance_id(instance_id): |
+ """True if string looks like a valida package instance ID.""" |
nodir
2014/12/30 22:54:01
typo: valid
Vadim Sh.
2014/12/31 01:27:36
Done.
|
+ return cas.is_valid_hash_digest(DIGEST_ALGO, instance_id) |
+ |
+ |
+def get_repo_service(): |
+ """Factory method that returns configured RepoService instance. |
+ |
+ If the service is not configured, returns None. Also acts as a mocking point |
+ for unit tests. |
+ """ |
+ cas_service = cas.get_cas_service() |
+ return RepoService(cas_service) if cas_service else None |
+ |
+ |
+class RepoService(object): |
+ """Package repository service.""" |
+ |
+ def __init__(self, cas_service): |
+ self.cas_service = cas_service |
+ |
+ def get_instance(self, package_name, instance_id): |
+ """Returns PackageInstance entity if such instance is registered. |
+ |
+ Args: |
+ package_name: name of the package, e.g. 'infra/tools/cipd'. |
+ instance_id: identified of the package instance (SHA1 of package content). |
+ |
+ Returns: |
+ PackageInstance or None. |
+ """ |
+ return package_instance_key(package_name, instance_id).get() |
+ |
+ @ndb.transactional |
+ def register_instance(self, package_name, instance_id, metadata, signatures): |
+ """Makes new PackageInstance or updates signatures on existing one. |
nodir
2014/12/30 22:54:01
"of an existing one"
Vadim Sh.
2014/12/31 01:27:36
Done.
|
+ |
+ Caller must verify that package data is already uploaded to CAS (by using |
+ is_data_uploaded method). If such PackageInstance already exists, all new |
+ signatures (passed via |signatures|) will be added to the package signature |
nodir
2014/12/30 22:54:01
just "...all new |signatures| will ..."
Vadim Sh.
2014/12/31 01:27:36
Done.
|
+ list. Metadata of existing packages is never changed. |
nodir
2014/12/30 22:54:01
Why you don't just raise an exception of a package
Vadim Sh.
2014/12/31 01:27:36
Well.. okay. It is more transactions in case of a
|
+ |
+ Args: |
+ package_name: name of the package, e.g. 'infra/tools/cipd'. |
+ instance_id: identified of the package instance (SHA1 of package content). |
nodir
2014/12/30 22:54:01
typo?: identity of the package instance
Vadim Sh.
2014/12/31 01:27:35
Done.
|
+ metadata: PackageInstanceMetadata object. |
+ signatures: list of PackageInstanceSignature objects. |
+ |
+ Returns: |
+ (PackageInstance entity, True if registered or False if existed). |
+ """ |
+ key = package_instance_key(package_name, instance_id) |
+ inst = key.get() |
+ if inst is not None: |
+ if inst._add_signatures(signatures): |
+ inst.put() |
+ return inst, False |
+ Package(key=key.parent()).put() |
+ inst = PackageInstance( |
+ key=key, |
+ metadata=metadata, |
+ signatures=signatures, |
+ signature_ids=[s.identifier for s in signatures]) |
+ inst.put() |
+ return inst, True |
+ |
+ @ndb.transactional |
+ def add_signatures(self, package_name, instance_id, signatures): |
+ """Updates signatures on existing package instance. |
nodir
2014/12/30 22:54:00
of an existing package
Vadim Sh.
2014/12/31 01:27:35
Done.
|
+ |
+ Args: |
+ package_name: name of the package, e.g. 'infra/tools/cipd'. |
+ instance_id: identified of the package instance (SHA1 of package content). |
nodir
2014/12/30 22:54:00
typo? identity
Vadim Sh.
2014/12/31 01:27:35
Copy-pasted typo.
|
+ signatures: list of PackageInstanceSignature objects. |
+ |
+ Returns: |
+ PackageInstance entity with updated list of signatures. |
+ |
+ Raises: |
+ ValueError is package instance is missing. |
nodir
2014/12/30 22:54:01
s/is/if
Vadim Sh.
2014/12/31 01:27:35
Done.
|
+ """ |
+ inst = package_instance_key(package_name, instance_id).get() |
+ if not inst: |
+ raise ValueError('Package instance is not registered') |
nodir
2014/12/30 22:54:01
Consider being more explicit, PackageNotFoundError
Vadim Sh.
2014/12/31 01:27:35
Done.
|
+ if inst._add_signatures(signatures): |
+ inst.put() |
+ return inst |
+ |
+ def is_data_uploaded(self, package_name, instance_id): |
nodir
2014/12/30 22:54:01
what is data? Maybe rename to "is_instance_uploade
Vadim Sh.
2014/12/31 01:27:36
Done.
|
+ """Returns True if package instance data is uploaded to CAS. |
+ |
+ Args: |
+ package_name: name of the package, e.g. 'infra/tools/cipd'. |
+ instance_id: identified of the package instance (SHA1 of package content). |
+ |
+ Returns: |
+ True or False. |
+ """ |
+ assert is_valid_package_name(package_name), package_name |
+ assert is_valid_instance_id(instance_id), instance_id |
+ return self.cas_service.is_object_present(DIGEST_ALGO, instance_id) |
+ |
+ def create_upload_session(self, package_name, instance_id, caller): |
+ """Opens new session for data upload to CAS. |
nodir
2014/12/30 22:54:01
typo: Opens a new
Vadim Sh.
2014/12/31 01:27:35
Done.
|
+ |
+ Args: |
+ package_name: name of the package, e.g. 'infra/tools/cipd'. |
+ instance_id: identified of the package instance (SHA1 of package content). |
nodir
2014/12/30 22:54:01
typo? identity
Vadim Sh.
2014/12/31 01:27:35
Done.
|
+ caller: auth.Identity of whoever is opening an upload session. |
+ |
+ Returns: |
+ (upload URL to upload data to, upload session ID to pass to CAS API). |
+ """ |
+ assert is_valid_package_name(package_name), package_name |
+ assert is_valid_instance_id(instance_id), instance_id |
+ upload_session, upload_session_id = self.cas_service.create_upload_session( |
+ DIGEST_ALGO, instance_id, caller) |
+ return upload_session.upload_url, upload_session_id |
+ |
+ |
+################################################################################ |
+ |
+ |
+class PackageInstanceMetadata(ndb.Model): |
+ """Description of how the package instance was built and registered. |
+ |
+ Used only as a structured property inside PackageInstance entity. |
+ """ |
+ # Date then the package was built (as provided by the client). |
+ date = ndb.StringProperty(indexed=False) |
nodir
2014/12/30 22:54:01
Why string?
Vadim Sh.
2014/12/31 01:27:35
Eh.. because I'm lazy. No longer relevant.
|
+ # Hostname of the machine that built the package (as provided by the client). |
+ hostname = ndb.StringProperty() |
+ # Unix username used to build the package (as provided by the client). |
+ user = ndb.StringProperty() |
+ |
+ # Who registered the instance. |
+ registered_by = auth.IdentityProperty() |
+ # When the instance was registered. |
+ registered_ts = ndb.DateTimeProperty() |
+ |
+ |
+class PackageInstanceSignature(ndb.Model): |
+ """Single signature. Each package instance can have multiple signatures. |
+ |
+ Used only as a structured property inside PackageInstance entity. |
+ """ |
+ # Name of the hashing algorithm used to obtain the digest. |
+ hash_algo = ndb.StringProperty() |
+ # Blob with package digest. |
+ digest = ndb.BlobProperty() |
+ # Algorithm used to compute the signature. |
+ signature_algo = ndb.StringProperty() |
+ # Fingerprint of the public key that can be used to validate the signature. |
+ signature_key = ndb.StringProperty() |
+ # Blob with the signature data. |
+ signature = ndb.BlobProperty() |
+ |
+ # Who added this signature to the list. |
+ added_by = auth.IdentityProperty() |
+ # When the signature was added. |
+ added_ts = ndb.DateTimeProperty() |
+ |
+ @property |
+ def identifier(self): |
+ """String that identifies how the signature was obtained.""" |
nodir
2014/12/30 22:54:01
I'd call this property "fingerprint" or something.
Vadim Sh.
2014/12/31 01:27:36
signature_key itself is fingerprint (hash of publi
|
+ return '%s:%s:%s' % ( |
+ self.hash_algo, self.signature_algo, self.signature_key) |
+ |
+ |
+class Package(ndb.Model): |
+ """Entity root for PackageInstance entities for some particular package. |
+ |
+ Id is a package name. |
+ """ |
+ |
+ |
+class PackageInstance(ndb.Model): |
+ """Represents some uploaded package instance. |
+ |
+ ID is package instance ID (SHA1 hex digest of package body). |
+ Parent entity is Package(id=package_name). |
+ """ |
+ # Describes how package was built and registered. Read only. |
+ metadata = ndb.StructuredProperty(PackageInstanceMetadata, required=True) |
+ # Append only list of package signatures. |
+ signatures = ndb.LocalStructuredProperty( |
+ PackageInstanceSignature, repeated=True, compressed=True) |
+ # Indexed list of signature identifies. Kept in sync with 'signatures'. |
+ signature_ids = ndb.StringProperty(repeated=True) |
nodir
2014/12/30 22:54:01
I'd call this signature_fingerprints. I don't know
Vadim Sh.
2014/12/31 01:27:35
Renamed to signature_keys. I plan to use it for qu
|
+ |
+ def _add_signatures(self, signatures): |
+ """Adds new signatures to signature list (skips existing ones).""" |
nodir
2014/12/30 22:54:01
document Returns
Vadim Sh.
2014/12/31 01:27:35
Done.
|
+ def add_one(sig): |
+ for s in self.signatures: |
+ if s.identifier == sig.identifier and s.signature == sig.signature: |
+ return False |
+ self.signatures.append(sig) |
+ self.signature_ids.append(sig.identifier) |
+ return True |
+ modified = False |
+ for s in signatures: |
+ if add_one(s): |
+ modified = True |
+ return modified |
+ |
nodir
2014/12/30 22:54:01
Having a _pre_put_hook in models that checks invar
Vadim Sh.
2014/12/31 01:27:35
Done.
|
+ |
+def package_key(package_name): |
+ """Returns ndb.Key corresponding to particular Package entity.""" |
+ assert is_valid_package_name(package_name), package_name |
+ return ndb.Key(Package, package_name) |
+ |
+ |
+def package_instance_key(package_name, instance_id): |
+ """Returns ndb.Key corresponding to particular PackageInstance.""" |
+ assert is_valid_instance_id(instance_id), instance_id |
nodir
2014/12/30 22:54:00
Also validate package name
Vadim Sh.
2014/12/31 01:27:35
It is validated in package_key
nodir
2015/01/02 19:13:04
Acknowledged.
|
+ return ndb.Key(PackageInstance, instance_id, parent=package_key(package_name)) |