Initial stats proto refactoring. (#52)

Bogdan Drutu · web-flow · commit 92db6960e9cb · 2018-03-04T17:03:01.000-08:00
* Initial stats proto refactoring.

* Fix some comments.

* Remove max aggregation.
diff --git a/opencensus/proto/stats/stats.proto b/opencensus/proto/stats/stats.proto
@@ -1,4 +1,4 @@
-// Copyright 2016-17, OpenCensus Authors
+// Copyright 2016-18, OpenCensus Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,163 +16,91 @@ syntax = "proto3";
 
 package opencensus.proto.stats;
 
-import "google/protobuf/duration.proto";
 import "google/protobuf/timestamp.proto";
 
+option go_package = "statsproto";
 option java_multiple_files = true;
 option java_package = "io.opencensus.proto.stats";
 option java_outer_classname = "StatsProto";
 
-option go_package = "statsproto";
-
-// All the census protos.
-//
-// Nomenclature notes:
-//   * Capitalized names below (like View) are protos.
-//   * Protos which describe types are named with a Descriptor suffix (e.g.
-//     MesurementDescriptor).
-//
-// Census lets you define the type and description of the data being measured
-// (e.g. the latency of an RPC or the number of CPU cycles spent on an
-// operation using MeasurementDescriptor. As individual measurements (a double
-// value) for are recorded, they are aggregated together into an
-// Aggregation. There are two Aggregation types available: Distribution
-// (describes the distribution of all measurements, possibly with a histogram)
-// and IntervalStats (the count and mean of measurements across specified time
-// periods). An Aggregation is described by an AggregationDescriptor.
-//
-// You can define how your measurements (described by a MeasurementDescriptor)
-// are broken down by Tag values and which Aggregations to use through a
-// ViewDescriptor. The output (all measurements broken down by tag values into
-// specific Aggregations) is called a View.
-
+// TODO(bdrutu): Consider if this should be moved to a "tags" directory to match the API structure.
+message Tag {
+  string key = 1;
+  string value = 2;
+}
 
-// MeasurementDescriptor describes a data point (measurement) type.
-message MeasurementDescriptor {
-  // A descriptive name, e.g. rpc_latency, cpu. Must be unique.
+// Measure .
+message Measure {
+  // A string by which the measure will be referred to, e.g. "rpc_server_latency". Names MUST be
+  // unique within the library.
   string name = 1;
 
-  // More detailed description of the resource, used in documentation.
+  // Describes the measure, e.g. "RPC latency in seconds".
   string description = 2;
 
-  // Fundamental units of measurement supported by Census
-  // TODO(aveitch): expand this to include other S.I. units?
-  enum BasicUnit {
-    UNKNOWN = 0;    // Implementations should not use this
-    SCALAR = 1;     // Dimensionless
-    BITS = 2;       // A single bit
-    BYTES = 3;      // An 8-bit byte
-    SECONDS = 4;    // S.I. unit
-    CORES = 5;      // CPU core usage
-    MAX_UNITS = 6;  // Last defined value; implementations should only use
-                    // this for validation.
-  }
-
-  // MeasurementUnit lets you build compound units of the form
-  //   10^n * (A * B * ...) / (X * Y * ...),
-  // where the elements in the numerator and denominator are all BasicUnits.  A
-  // MeasurementUnit must have at least one BasicUnit in its numerator.
-  //
-  // To specify multiplication in the numerator or denominator, simply specify
-  // multiple numerator or denominator fields.  For example:
-  //
-  // - byte-seconds (i.e. bytes * seconds):
-  //     numerator: BYTES
-  //     numerator: SECS
-  //
-  // - events/sec^2 (i.e. rate of change of events/sec):
-  //     numerator: SCALAR
-  //     denominator: SECS
-  //     denominator: SECS
-  //
-  // To specify multiples (in power of 10) of units, specify a non-zero
-  // 'power10' value, for example:
-  //
-  // - MB/s (i.e. megabytes / s):
-  //     power10: 6
-  //     numerator: BYTES
-  //     denominator: SECS
-  //
-  // - nanoseconds
-  //     power10: -9
-  //     numerator: SECS
-  message MeasurementUnit {
-    int32 power10 = 1;
-    repeated BasicUnit numerators = 2;
-    repeated BasicUnit denominators = 3;
+  // Describes the unit used for the Measure. Follows the format described by
+  // http://unitsofmeasure.org/ucum.html.
+  string unit = 3;
+
+  enum Type {
+    // Unknown type.
+    TYPE_UNSPECIFIED = 0;
+    // Indicates an int64 Measure.
+    INT64 = 1;
+    // Indicates a double Measure.
+    DOUBLE = 2;
   }
 
-  // The units used by this type of measurement.
-  MeasurementUnit unit = 3;
+  // The type used for this Measure.
+  Type type = 4;
 }
 
-// An aggregation summarizes a series of individual measurements. There are
-// two types of aggregation (IntervalAggregation and DistributionAggregation),
-// unique types of each can be set using descriptors for each.
+message View {
+  // A string by which the View will be referred to, e.g. "rpc_latency". Names MUST be unique
+  // within the library.
+  string name = 1;
 
-// DistributionAggregation contains summary statistics for a population of
-// values and, optionally, a histogram representing the distribution of those
-// values across a specified set of histogram buckets, as defined in
-// DistributionAggregationDescriptor.bucket_bounds.
-//
-// The summary statistics are the count, mean, minimum, and the maximum of the
-// set of population of values.
-//
-// Although it is not forbidden, it is generally a bad idea to include
-// non-finite values (infinities or NaNs) in the population of values, as this
-// will render the `mean` field meaningless.
-message DistributionAggregation {
-  // The number of values in the population. Must be non-negative.
-  int64 count = 1;
+  // Describes the view, e.g. "RPC latency distribution"
+  string description = 2;
 
-  // The arithmetic mean of the values in the population. If `count` is zero
-  // then this field must be zero.
-  double mean = 2;
+  // The Measure to which this view is applied.
+  Measure measure = 3;
 
-  // The sum of the values in the population.  If `count` is zero then this
-  // field must be zero.
-  double sum = 3;
+  // An array of tag keys. These values associated with tags of this name form the basis by which
+  // individual stats will be aggregated (one aggregation per unique tag value). If none are
+  // provided, then all data is recorded in a single aggregation.
+  repeated string columns = 4;
 
-  // Describes a range of population values.
-  message Range {
-    // The minimum of the population values.
-    double min = 1;
-    // The maximum of the population values.
-    double max = 2;
+  // The description of the aggregation used for this view which describes how data collected are
+  // aggregated.
+  oneof aggregation {
+    // Counts the number of measurements recorded.
+    CountAggregation count_aggregation = 5;
+    // Indicates that data collected and aggregated with this Aggregation will be summed up.
+    SumAggregation sum_aggregation = 6;
+    // Indicates that data collected and aggregated with this Aggregation will represent the last
+    // recorded value. This is useful to support Gauges.
+    LastValueAggregation last_value_aggregation = 7;
+    // Indicates that the desired Aggregation is a histogram distribution. A distribution
+    // Aggregation may contain a histogram of the values in the population. User should define the
+    // bucket boundaries for that histogram (see DistributionAggregation).
+    DistributionAggregation distribution_aggregation = 8;
   }
+}
 
-  // The range of the population values. If `count` is zero, this field will not
-  // be defined.
-  Range range = 4;
+message CountAggregation {}
 
-  // A Distribution may optionally contain a histogram of the values in the
-  // population. The histogram is given in `bucket_count` as counts of values
-  // that fall into one of a sequence of non-overlapping buckets, as described
-  // by `DistributionAggregationDescriptor.bucket_boundaries`. The sum of the
-  // values in `bucket_counts` must equal the value in `count`.
-  //
-  // Bucket counts are given in order under the numbering scheme described
-  // above (the underflow bucket has number 0; the finite buckets, if any,
-  // have numbers 1 through N-2; the overflow bucket has number N-1).
-  //
-  // The size of `bucket_count` must be no greater than N as defined in
-  // `bucket_boundaries`.
-  //
-  // Any suffix of trailing zero bucket_count fields may be omitted.
-  repeated int64 bucket_counts = 5;
+message SumAggregation {}
 
-  // Tags associated with this DistributionAggregation. These will be filled
-  // in based on the View specification.
-  repeated Tag tags = 6;
-}
+message LastValueAggregation {}
 
-message DistributionAggregationDescriptor {
+message DistributionAggregation {
   // A Distribution may optionally contain a histogram of the values in the
   // population. The bucket boundaries for that histogram are described by
   // `bucket_bounds`. This defines `size(bucket_bounds) + 1` (= N)
   // buckets. The boundaries for bucket index i are:
   //
-  // [-infinity, bucket_bounds[i]) for i == 0
+  // (-infinity, bucket_bounds[i]) for i == 0
   // [bucket_bounds[i-1], bucket_bounds[i]) for 0 < i < N-2
   // [bucket_bounds[i-1], +infinity) for i == N-1
   //
@@ -188,104 +116,20 @@ message DistributionAggregationDescriptor {
   repeated double bucket_bounds = 1;
 }
 
-// An IntervalAggreation records summary stats over various time
-// windows. These stats are approximate, with the degree of accuracy
-// controlled by setting the n_sub_intervals parameter in the
-// IntervalAggregationDescriptor.
-message IntervalAggregation {
-  // Summary statistic over a single time interval.
-  message Interval {
-    // The interval duration. Must be positive.
-    google.protobuf.Duration interval_size = 1;
-    // Approximate number of measurements recorded in this interval.
-    double count = 2;
-    // The cumulative sum of measurements in this interval.
-    double sum = 3;
-  }
-
-  // Full set of intervals for this aggregation.
-  repeated Interval intervals = 1;
-
-  // Tags associated with this IntervalAggregation. These will be filled in
-  // based on the View specification.
-  repeated Tag tags = 2;
-}
-
-// An IntervalAggreationDescriptor specifies time intervals for an
-// IntervalAggregation.
-message IntervalAggregationDescriptor {
-  // Number of internal sub-intervals to use when collecting stats for each
-  // interval. The max error in interval measurements will be approximately
-  // 1/n_sub_intervals (although in practice, this will only be approached in
-  // the presence of very large and bursty workload changes), and underlying
-  // memory usage will be roughly proportional to the value of this
-  // field. Must be in the range [2, 20]. A value of 5 will be used if this is
-  // unspecified.
-  int32 n_sub_intervals = 1;
-
-  // The size of each interval, as a time duration. Must have at least one
-  // element.
-  repeated google.protobuf.Duration interval_sizes = 2;
-}
-
-// A Tag: key-value pair.
-// Both strings must be printable ASCII.
-message Tag {
-  string key = 1;
-  string value = 2;
-}
-
-// A ViewDescriptor specifies an AggregationDescriptor and a set of tag
-// keys. Views instantiated from this descriptor will contain Aggregations
-// broken down by the unique set of matching tag values for each measurement.
-message ViewDescriptor {
-  // Name of view. Must be unique.
-  string name = 1;
-
-  // More detailed description, for documentation purposes.
-  string description = 2;
+// Describes a data point to be collected for a Measure.
+message Measurement {
+  repeated Tag tags = 1;
 
-  // Name of a MeasurementDescriptor to be used for this view.
-  string measurement_descriptor_name = 3;
+  // The name of the measure to which the value is applied.
+  string measure_name = 2;
 
-  // Aggregation type to associate with View.
-  oneof aggregation {
-    IntervalAggregationDescriptor interval_aggregation = 4;
-    DistributionAggregationDescriptor distribution_aggregation = 5;
+  // The recorded value, MUST have the appropriate type to match the Measure.
+  oneof value {
+    double double_value = 3;
+    int64 int_value = 4;
   }
 
-  // Tag keys to match with a given measurement. If no keys are specified,
-  // then all stats are recorded. Keys must be unique.
-  repeated string tag_keys = 6;
-}
-
-// DistributionView contains all aggregations for a view specified using a
-// DistributionAggregationDescriptor.
-message DistributionView {
-  // Aggregations - each will have a unique set of tag values for the tag_keys
-  // associated with the corresponding View.
-  repeated DistributionAggregation aggregations = 1;
-
-  // Start and end timestamps over which aggregations was accumulated.
-  google.protobuf.Timestamp start = 2;
-  google.protobuf.Timestamp end = 3;
-}
-
-// IntervalView contains all aggregations for a view specified using a
-// IntervalAggregationDescriptor.
-message IntervalView {
-  // Aggregations - each will have a unique set of tag values for the tag_keys
-  // associated with the corresponding View.
-  repeated IntervalAggregation aggregations = 1;
-}
-
-// A View contains the aggregations based on a ViewDescriptor.
-message View {
-  // ViewDescriptor name associated with this set of View.
-  string view_name = 1;
-
-  oneof view {
-    DistributionView distribution_view = 2;
-    IntervalView interval_view = 3;
-  }
+  // The time when this measurement was recorded. If the implementation uses a async buffer to
+  // record measurements this may be the time when the measurement was read from the buffer.
+  google.protobuf.Timestamp time = 5;
 }