Index: extensions/common/api/media_perception_private.idl |
diff --git a/extensions/common/api/media_perception_private.idl b/extensions/common/api/media_perception_private.idl |
new file mode 100644 |
index 0000000000000000000000000000000000000000..88721d651634ac14d5fe72ba5a4e2011781338b2 |
--- /dev/null |
+++ b/extensions/common/api/media_perception_private.idl |
@@ -0,0 +1,158 @@ |
+// Copyright 2017 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+// Private API for communicating with and receiving real-time media perception |
+// information from an computer vision + machine intelligence binary. |
+[platforms=("chromeos"), |
+ implemented_in = "extensions/browser/api/media_perception_private/media_perception_private_api.h"] |
tbarzic
2017/05/05 21:10:41
I don't think you need this - this seems to be the
Luke Sorenson
2017/05/08 19:06:07
Done. In https://codereview.chromium.org/286020300
|
+ |
+namespace mediaPerceptionPrivate { |
+ enum Status { |
+ // An error occurred that needs to be propagated to the frontend. |
tbarzic
2017/05/05 21:10:41
I'd drop the "that needs to be propagated to the f
Luke Sorenson
2017/05/08 19:06:07
Done. In https://codereview.chromium.org/28602030
|
+ ERROR, |
+ |
+ // Unable to reach media analysis process. |
+ TIMEOUT, |
+ |
+ // Media analytics process waiting to be started. |
+ UNINITIALIZED, |
+ |
+ // Analysis process running but not recieving frames. |
tbarzic
2017/05/05 21:10:41
Can you explain the difference between STARTED/RUN
Luke Sorenson
2017/05/08 19:06:07
Done. In https://codereview.chromium.org/286020300
|
+ STARTED, |
+ |
+ // Analysis process running and injesting frames. |
+ RUNNING, |
+ |
+ // Analysis process ready to be set to state RUNNING. |
+ SUSPENDED |
+ }; |
+ |
+ // The system and configuration state of the analysis process and v4lplugin. |
+ dictionary State { |
+ Status? status; |
+ // Optionally add device context to setState command for starting |
+ // media analytics process, so that the media analytics process can |
+ // better select the right video device to open. |
+ DOMString? deviceContext; |
tbarzic
2017/05/05 21:10:41
Is this allowed only with setState(RUNNING)?
I.e.
Luke Sorenson
2017/05/08 19:06:07
Providing device context is only meaningful with s
|
+ }; |
+ |
+ dictionary Point { |
+ // x represents the horizontal distance from the top left corner of the |
tbarzic
2017/05/05 21:10:41
drop "x represents" and "to the point"
Luke Sorenson
2017/05/08 19:06:07
Done.
|
+ // image to the point. |
+ double? x; |
+ // y represents the vertical distance from the top left corner of the |
tbarzic
2017/05/05 21:10:41
drop "y represents" and "to the point"
Luke Sorenson
2017/05/08 19:06:06
Done.
|
+ // image to the point. |
+ double? y; |
tbarzic
2017/05/05 21:10:41
This seems like a required value - can you go thro
Luke Sorenson
2017/05/08 19:06:07
Because many of these Dictionary definitions are b
|
+ }; |
+ |
+ dictionary BoundingBox { |
+ // Specifies whether the points are normalized to the size of the image. |
+ // If not set, the points are not normalized. |
tbarzic
2017/05/05 21:10:41
"If not set, the points are not normalized" seems
Luke Sorenson
2017/05/08 19:06:07
Done :) In https://codereview.chromium.org/2860203
|
+ boolean? normalized; |
+ // The two points that define the corners of a bounding box. |
+ Point? topLeft; |
+ Point? bottomRight; |
+ }; |
+ |
+ enum EntityType { |
+ UNSPECIFIED, |
+ FACE, |
+ PERSON |
+ }; |
+ |
+ dictionary Entity { |
+ // A unique id associated with the detected entity, which can be used to |
+ // track the entity over time. |
+ long? id; |
tbarzic
2017/05/05 21:10:41
why is this optional?
Luke Sorenson
2017/05/08 19:06:07
See above explanation about decision to use only o
tbarzic
2017/05/08 23:04:55
My idea was closer to dropping nonsensical message
|
+ |
+ EntityType? type; |
+ |
+ // Minimum box which captures entire detected entity. |
+ BoundingBox? boundingBox; |
+ |
+ // A value for the quality of this detection. |
+ double? confidence; |
+ }; |
+ |
+ // The set of computer vision metadata for an image frame. |
+ dictionary FramePerception { |
+ long? frameId; |
+ |
+ long? frameWidthInPx; |
+ long? frameHeightInPx; |
+ // The timestamp associated with the frame (when its recieved by the |
+ // analysis process). |
+ double? timestamp; |
+ |
+ // The list of entities detected in this frame. |
+ Entity[]? entities; |
+ }; |
+ |
+ dictionary MediaPerception { |
+ // The timestamp attached with when this data originated from the analysis |
+ // process. |
tbarzic
2017/05/05 21:10:41
I'd comment on relationship to timestamps in frame
Luke Sorenson
2017/05/08 19:06:07
Done.
|
+ double? timestamp; |
+ // An array of framePerceptions, often just one. |
+ FramePerception[]? framePerceptions; |
+ }; |
+ |
+ // TODO(lasoren): Change this interface based on the compressed images coming |
+ // from the media analytics process. |
+ dictionary ImageFrame { |
+ long? width; |
+ long? height; |
+ // colorspace is defined in the same way as SimpleImage::ColorSpace. |
tbarzic
2017/05/05 21:10:41
I'd avoid referencing SimpleImage::ColorSpace here
Luke Sorenson
2017/05/08 19:06:07
Acknowledged.
|
+ long? colorspace; |
+ // By default, 1 channel means Grayscale, 2 channels meangs Grayscale + Alpha, |
tbarzic
2017/05/05 21:10:41
So this is the number of channels? maybe rename it
Luke Sorenson
2017/05/08 19:06:06
For now, since we not yet sure how we'll be repres
|
+ // 3 channels means RGB, and 4 channels means RGBA. |
+ long? channels; |
+ // TODO(lasoren): Add compression format marker. |
+ // The bytes of the image frame. |
+ ArrayBuffer? frame; |
+ }; |
+ |
+ dictionary PerceptionSample { |
+ // The video analytics FramePerception for the associated image frame |
+ // data. |
+ FramePerception? framePerception; |
+ // The image frame data for the associated FramePerception object. |
+ ImageFrame? imageFrame; |
+ }; |
+ |
+ dictionary Diagnostics { |
+ // A buffer of image frames and the associated video analytics to be sent |
+ // for diagnostics (when a user reports malfunction). |
+ PerceptionSample[]? perceptionSamples; |
+ }; |
+ |
+ callback StateCallback = void(State state); |
+ |
+ callback DiagnosticsCallback = void(Diagnostics diagnostics); |
+ |
+ interface Functions { |
+ // Get the status of the media perception process. |
+ // |callback| : The current State of the system. |
+ static void getState(StateCallback callback); |
+ |
+ // Set the desired state of the system. |
+ // |state| : A dictionary with the desired new state. |
+ // |callback| : The State of the system after setting it. Verifies that |
+ // the state was set as desired. |
+ static void setState(State state, StateCallback callback); |
+ |
+ // Get a diagnostics buffer out of the video analytics process. |
+ // |callback| : Returns a Diagnostics dictionary object which |
+ // contains image frame data and associated detections to be logged. |
+ static void getDiagnostics(DiagnosticsCallback callback); |
+ }; |
+ |
+ interface Events { |
+ // Fired when the analysis process has passed back to Chrome the current |
+ // mediaPerception information. |
+ // |mediaPerception| : The dictionary which contains a dump of everything |
+ // the analysis process has detected or determined from the incoming media |
+ // streams. |
+ static void onMediaPerception(MediaPerception mediaPerception); |
+ }; |
+}; |