Skip to content
This repository was archived by the owner on Jul 31, 2023. It is now read-only.

Commit 72dce31

Browse files
author
Ian Sturdy
authored
Add MeasureData, covering all aggregations for one measure. (#96)
1 parent 6daa43a commit 72dce31

5 files changed

Lines changed: 308 additions & 0 deletions

File tree

opencensus/stats/BUILD

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ cc_library(
5959
"internal/bucket_boundaries.cc",
6060
"internal/distribution.cc",
6161
"internal/measure.cc",
62+
"internal/measure_data.cc",
6263
"internal/measure_descriptor.cc",
6364
"internal/measure_registry.cc",
6465
"internal/measure_registry_impl.cc",
@@ -76,6 +77,7 @@ cc_library(
7677
"bucket_boundaries.h",
7778
"distribution.h",
7879
"internal/aggregation_window.h",
80+
"internal/measure_data.h",
7981
"internal/measure_registry_impl.h",
8082
"internal/set_aggregation_window.h",
8183
"internal/stats_exporter_impl.h",
@@ -151,6 +153,19 @@ cc_test(
151153
],
152154
)
153155

156+
cc_test(
157+
name = "measure_data_test",
158+
size = "small",
159+
srcs = ["internal/measure_data_test.cc"],
160+
copts = TEST_COPTS,
161+
deps = [
162+
":core",
163+
":test_utils",
164+
"@com_google_absl//absl/types:span",
165+
"@com_google_googletest//:gtest_main",
166+
],
167+
)
168+
154169
cc_test(
155170
name = "measure_registry_test",
156171
srcs = ["internal/measure_registry_test.cc"],

opencensus/stats/distribution.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ class Distribution final {
5555

5656
private:
5757
friend class ViewDataImpl; // ViewDataImpl populates data directly.
58+
friend class MeasureData;
5859
friend class testing::TestUtils;
5960

6061
// buckets must outlive the Distribution.
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
// Copyright 2018, OpenCensus Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "opencensus/stats/internal/measure_data.h"
16+
17+
#include <algorithm>
18+
#include <cmath>
19+
#include <cstdint>
20+
#include <iostream>
21+
#include <vector>
22+
23+
#include "absl/base/macros.h"
24+
#include "absl/types/span.h"
25+
#include "opencensus/stats/bucket_boundaries.h"
26+
#include "opencensus/stats/distribution.h"
27+
28+
namespace opencensus {
29+
namespace stats {
30+
31+
MeasureData::MeasureData(absl::Span<const BucketBoundaries> boundaries)
32+
: boundaries_(boundaries) {
33+
histograms_.reserve(boundaries_.size());
34+
for (const auto& b : boundaries_) {
35+
histograms_.emplace_back(b.num_buckets());
36+
}
37+
}
38+
39+
void MeasureData::Add(double value) {
40+
// Update using the method of provisional means.
41+
++count_;
42+
ABSL_ASSERT(count_ > 0 && "Histogram count overflow.");
43+
const double old_mean = mean_;
44+
mean_ += (value - mean_) / count_;
45+
sum_of_squared_deviation_ =
46+
sum_of_squared_deviation_ + (value - old_mean) * (value - mean_);
47+
48+
min_ = std::min(value, min_);
49+
max_ = std::max(value, max_);
50+
51+
for (int i = 0; i < boundaries_.size(); ++i) {
52+
++histograms_[i][boundaries_[i].BucketForValue(value)];
53+
}
54+
}
55+
56+
void MeasureData::AddToDistribution(Distribution* distribution) const {
57+
// This uses the method of provisional means generalized for multiple values
58+
// in both datasets.
59+
const double new_count = distribution->count_ + count_;
60+
const double new_mean =
61+
distribution->mean_ + (mean_ - distribution->mean_) * count_ / new_count;
62+
distribution->sum_of_squared_deviation_ +=
63+
sum_of_squared_deviation_ +
64+
distribution->count_ * std::pow(distribution->mean_, 2) +
65+
count_ * std::pow(mean_, 2) - new_count * std::pow(new_mean, 2);
66+
distribution->count_ = new_count;
67+
distribution->mean_ = new_mean;
68+
69+
distribution->min_ = std::min(distribution->min_, min_);
70+
distribution->max_ = std::max(distribution->max_, max_);
71+
72+
int histogram_index = std::find(boundaries_.begin(), boundaries_.end(),
73+
distribution->bucket_boundaries()) -
74+
boundaries_.begin();
75+
if (histogram_index >= histograms_.size()) {
76+
std::cerr << "No matching BucketBoundaries in AddToDistribution\n";
77+
ABSL_ASSERT(false);
78+
// Add to the underflow bucket, to avoid downstream errors from the sum of
79+
// bucket counts not matching the total count.
80+
distribution->bucket_counts_[0] += count_;
81+
} else {
82+
for (int i = 0; i < histograms_[histogram_index].size(); ++i) {
83+
distribution->bucket_counts_[i] += histograms_[histogram_index][i];
84+
}
85+
}
86+
}
87+
88+
} // namespace stats
89+
} // namespace opencensus
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// Copyright 2018, OpenCensus Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#ifndef OPENCENSUS_STATS_INTERNAL_MEASURE_DATA_H_
16+
#define OPENCENSUS_STATS_INTERNAL_MEASURE_DATA_H_
17+
18+
#include <cstdint>
19+
#include <vector>
20+
21+
#include "absl/types/span.h"
22+
#include "opencensus/stats/bucket_boundaries.h"
23+
#include "opencensus/stats/distribution.h"
24+
25+
namespace opencensus {
26+
namespace stats {
27+
28+
// MeasureData tracks all aggregations for a single measure, including
29+
// histograms for a number of different BucketBoundaries.
30+
//
31+
// MeasureData is thread-compatible.
32+
class MeasureData final {
33+
public:
34+
MeasureData(absl::Span<const BucketBoundaries> boundaries);
35+
36+
void Add(double value);
37+
38+
uint64_t count() const { return count_; }
39+
double sum() const { return count_ * mean_; }
40+
41+
// Adds this to 'distribution'. Requires that
42+
// distribution->bucket_boundaries() be in the set of boundaries passed to
43+
// this on construction.
44+
void AddToDistribution(Distribution* distribution) const;
45+
46+
private:
47+
absl::Span<const BucketBoundaries> boundaries_;
48+
49+
uint64_t count_ = 0;
50+
double mean_ = 0;
51+
double sum_of_squared_deviation_ = 0;
52+
double min_ = std::numeric_limits<double>::infinity();
53+
double max_ = -std::numeric_limits<double>::infinity();
54+
std::vector<std::vector<int64_t>> histograms_;
55+
};
56+
57+
} // namespace stats
58+
} // namespace opencensus
59+
60+
#endif // OPENCENSUS_STATS_INTERNAL_MEASURE_DATA_H_
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
// Copyright 2018, OpenCensus Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "opencensus/stats/internal/measure_data.h"
16+
17+
#include <algorithm>
18+
#include <cmath>
19+
#include <numeric>
20+
#include <vector>
21+
22+
#include "absl/types/span.h"
23+
#include "gmock/gmock.h"
24+
#include "gtest/gtest.h"
25+
#include "opencensus/stats/bucket_boundaries.h"
26+
#include "opencensus/stats/distribution.h"
27+
#include "opencensus/stats/testing/test_utils.h"
28+
29+
namespace opencensus {
30+
namespace stats {
31+
namespace {
32+
33+
TEST(MeasureDataTest, SmallSequence) {
34+
MeasureData data({});
35+
36+
data.Add(-6);
37+
data.Add(0);
38+
data.Add(3);
39+
40+
EXPECT_EQ(data.count(), 3);
41+
EXPECT_DOUBLE_EQ(data.sum(), -3);
42+
}
43+
44+
TEST(MeasureDataTest, MultipleHistograms) {
45+
std::vector<BucketBoundaries> buckets = {BucketBoundaries::Explicit({0, 10}),
46+
BucketBoundaries::Explicit({}),
47+
BucketBoundaries::Explicit({5})};
48+
MeasureData data(buckets);
49+
data.Add(-1);
50+
data.Add(1);
51+
data.Add(8);
52+
53+
Distribution distribution1 =
54+
testing::TestUtils::MakeDistribution(&buckets[0]);
55+
data.AddToDistribution(&distribution1);
56+
EXPECT_THAT(distribution1.bucket_counts(), ::testing::ElementsAre(1, 2, 0));
57+
58+
Distribution distribution2 =
59+
testing::TestUtils::MakeDistribution(&buckets[2]);
60+
data.AddToDistribution(&distribution2);
61+
EXPECT_THAT(distribution2.bucket_counts(), ::testing::ElementsAre(2, 1));
62+
}
63+
64+
TEST(MeasureDataTest, DistributionStatistics) {
65+
BucketBoundaries buckets = BucketBoundaries::Explicit({});
66+
MeasureData data(absl::MakeSpan(&buckets, 1));
67+
68+
const std::vector<int> samples{91, 18, 63, 98, 87, 77, 14, 97, 10, 35,
69+
12, 5, 75, 41, 49, 38, 40, 20, 55, 83};
70+
const double expected_mean =
71+
static_cast<double>(std::accumulate(samples.begin(), samples.end(), 0)) /
72+
samples.size();
73+
double expected_sum_of_squared_deviation = 0;
74+
for (const auto sample : samples) {
75+
data.Add(sample);
76+
expected_sum_of_squared_deviation += pow(sample - expected_mean, 2);
77+
}
78+
79+
Distribution distribution = testing::TestUtils::MakeDistribution(&buckets);
80+
data.AddToDistribution(&distribution);
81+
EXPECT_EQ(distribution.count(), samples.size());
82+
EXPECT_DOUBLE_EQ(distribution.mean(), expected_mean);
83+
EXPECT_DOUBLE_EQ(distribution.sum_of_squared_deviation(),
84+
expected_sum_of_squared_deviation);
85+
EXPECT_DOUBLE_EQ(distribution.min(),
86+
*std::min_element(samples.begin(), samples.end()));
87+
EXPECT_DOUBLE_EQ(distribution.max(),
88+
*std::max_element(samples.begin(), samples.end()));
89+
}
90+
91+
TEST(MeasureDataTest, BatchedAddToDistribution) {
92+
// Tests that batching values in the MeasureData is equivalent to sequentially
93+
// adding to the distribution.
94+
BucketBoundaries buckets = BucketBoundaries::Exponential(7, 2, 2);
95+
MeasureData data(absl::MakeSpan(&buckets, 1));
96+
Distribution base_distribution =
97+
testing::TestUtils::MakeDistribution(&buckets);
98+
// Add some preexisting data to fully test the merge.
99+
testing::TestUtils::AddToDistribution(&base_distribution, 20);
100+
testing::TestUtils::AddToDistribution(&base_distribution, 10);
101+
102+
Distribution expected_distribution = base_distribution;
103+
104+
const double tolerance = 1.0 / 1000000000;
105+
const int max = 100;
106+
for (int i = 0; i <= max; ++i) {
107+
data.Add(i);
108+
testing::TestUtils::AddToDistribution(&expected_distribution, i);
109+
110+
Distribution actual_distribution = base_distribution;
111+
data.AddToDistribution(&actual_distribution);
112+
113+
EXPECT_EQ(expected_distribution.count(), actual_distribution.count());
114+
EXPECT_DOUBLE_EQ(expected_distribution.mean(), actual_distribution.mean());
115+
EXPECT_NEAR(expected_distribution.sum_of_squared_deviation(),
116+
actual_distribution.sum_of_squared_deviation(), tolerance);
117+
EXPECT_DOUBLE_EQ(expected_distribution.min(), actual_distribution.min());
118+
EXPECT_DOUBLE_EQ(expected_distribution.max(), actual_distribution.max());
119+
EXPECT_THAT(
120+
actual_distribution.bucket_counts(),
121+
::testing::ElementsAreArray(expected_distribution.bucket_counts()));
122+
}
123+
}
124+
125+
TEST(MeasureDataDeathTest, AddToDistributionWithUnknownBuckets) {
126+
BucketBoundaries buckets = BucketBoundaries::Explicit({0, 10});
127+
MeasureData data(absl::MakeSpan(&buckets, 1));
128+
data.Add(1);
129+
130+
BucketBoundaries distribution_buckets = BucketBoundaries::Explicit({0});
131+
Distribution distribution =
132+
testing::TestUtils::MakeDistribution(&distribution_buckets);
133+
EXPECT_DEBUG_DEATH(
134+
{
135+
data.AddToDistribution(&distribution);
136+
EXPECT_THAT(distribution.bucket_counts(), ::testing::ElementsAre(1, 0));
137+
},
138+
"No matching BucketBoundaries in AddToDistribution");
139+
}
140+
141+
} // namespace
142+
} // namespace stats
143+
} // namespace opencensus

0 commit comments

Comments
 (0)