Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: adding is_monotonic flag to sum #1793

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,8 @@ def as_otlp_metrics(metrics)
aggregation_temporality: as_otlp_aggregation_temporality(metrics.aggregation_temporality),
data_points: metrics.data_points.map do |ndp|
number_data_point(ndp)
end
end,
is_monotonic: metrics.is_monotonic
)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,9 @@
counter = meter.create_counter('test_counter', unit: 'smidgen', description: 'a small amount of something')
counter.add(5, attributes: { 'foo' => 'bar' })

up_down_counter = meter.create_up_down_counter('test_up_down_counter', unit: 'smidgen', description: 'a small amount of something')
up_down_counter.add(5, attributes: { 'foo' => 'bar' })

histogram = meter.create_histogram('test_histogram', unit: 'smidgen', description: 'a small amount of something')
histogram.record(10, attributes: { 'oof' => 'rab' })

Expand Down Expand Up @@ -623,6 +626,27 @@
exemplars: nil
)
],
is_monotonic: true,
aggregation_temporality: Opentelemetry::Proto::Metrics::V1::AggregationTemporality::AGGREGATION_TEMPORALITY_DELTA
)
),
Opentelemetry::Proto::Metrics::V1::Metric.new(
name: 'test_up_down_counter',
description: 'a small amount of something',
unit: 'smidgen',
sum: Opentelemetry::Proto::Metrics::V1::Sum.new(
data_points: [
Opentelemetry::Proto::Metrics::V1::NumberDataPoint.new(
attributes: [
Opentelemetry::Proto::Common::V1::KeyValue.new(key: 'foo', value: Opentelemetry::Proto::Common::V1::AnyValue.new(string_value: 'bar'))
],
as_int: 5,
start_time_unix_nano: 1_699_593_427_329_946_585,
time_unix_nano: 1_699_593_427_329_946_586,
exemplars: nil
)
],
is_monotonic: false,
aggregation_temporality: Opentelemetry::Proto::Metrics::V1::AggregationTemporality::AGGREGATION_TEMPORALITY_DELTA
)
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ module Aggregation
# Contains the implementation of the Sum aggregation
# https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/sdk.md#sum-aggregation
class Sum
attr_reader :aggregation_temporality
attr_reader :aggregation_temporality, :is_monotonic
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@xuan-cao-swi typically I would think of these kinds of attributes hidden behind predicates.

Would it not make sense to make the public interface of this attribute monotonic?

Copy link
Contributor

@kaylareopelle kaylareopelle Jan 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The value in the protobuf is is_monotonic, so I think the goal was to match. I also found it jarring* with Ruby's predicates.

descriptor_data = "\n,opentelemetry/proto/metrics/v1/metrics.proto\x12\x1eopentelemetry.proto.metrics.v1\x1a*opentelemetry/proto/common/v1/common.proto\x1a.opentelemetry/proto/resource/v1/resource.proto\"X\n\x0bMetricsData\x12I\n\x10resource_metrics\x18\x01 \x03(\x0b\x32/.opentelemetry.proto.metrics.v1.ResourceMetrics\"\xaf\x01\n\x0fResourceMetrics\x12;\n\x08resource\x18\x01 \x01(\x0b\x32).opentelemetry.proto.resource.v1.Resource\x12\x43\n\rscope_metrics\x18\x02 \x03(\x0b\x32,.opentelemetry.proto.metrics.v1.ScopeMetrics\x12\x12\n\nschema_url\x18\x03 \x01(\tJ\x06\x08\xe8\x07\x10\xe9\x07\"\x9f\x01\n\x0cScopeMetrics\x12\x42\n\x05scope\x18\x01 \x01(\x0b\x32\x33.opentelemetry.proto.common.v1.InstrumentationScope\x12\x37\n\x07metrics\x18\x02 \x03(\x0b\x32&.opentelemetry.proto.metrics.v1.Metric\x12\x12\n\nschema_url\x18\x03 \x01(\t\"\x92\x03\n\x06Metric\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x02 \x01(\t\x12\x0c\n\x04unit\x18\x03 \x01(\t\x12\x36\n\x05gauge\x18\x05 \x01(\x0b\x32%.opentelemetry.proto.metrics.v1.GaugeH\x00\x12\x32\n\x03sum\x18\x07 \x01(\x0b\x32#.opentelemetry.proto.metrics.v1.SumH\x00\x12>\n\thistogram\x18\t \x01(\x0b\x32).opentelemetry.proto.metrics.v1.HistogramH\x00\x12U\n\x15\x65xponential_histogram\x18\n \x01(\x0b\x32\x34.opentelemetry.proto.metrics.v1.ExponentialHistogramH\x00\x12:\n\x07summary\x18\x0b \x01(\x0b\x32\'.opentelemetry.proto.metrics.v1.SummaryH\x00\x42\x06\n\x04\x64\x61taJ\x04\x08\x04\x10\x05J\x04\x08\x06\x10\x07J\x04\x08\x08\x10\t\"M\n\x05Gauge\x12\x44\n\x0b\x64\x61ta_points\x18\x01 \x03(\x0b\x32/.opentelemetry.proto.metrics.v1.NumberDataPoint\"\xba\x01\n\x03Sum\x12\x44\n\x0b\x64\x61ta_points\x18\x01 \x03(\x0b\x32/.opentelemetry.proto.metrics.v1.NumberDataPoint\x12W\n\x17\x61ggregation_temporality\x18\x02 \x01(\x0e\x32\x36.opentelemetry.proto.metrics.v1.AggregationTemporality\x12\x14\n\x0cis_monotonic\x18\x03 \x01(\x08\"\xad\x01\n\tHistogram\x12G\n\x0b\x64\x61ta_points\x18\x01 \x03(\x0b\x32\x32.opentelemetry.proto.metrics.v1.HistogramDataPoint\x12W\n\x17\x61ggregation_temporality\x18\x02 \x01(\x0e\x32\x36.opentelemetry.proto.metrics.v1.AggregationTemporality\"\xc3\x01\n\x14\x45xponentialHistogram\x12R\n\x0b\x64\x61ta_points\x18\x01 \x03(\x0b\x32=.opentelemetry.proto.metrics.v1.ExponentialHistogramDataPoint\x12W\n\x17\x61ggregation_temporality\x18\x02 \x01(\x0e\x32\x36.opentelemetry.proto.metrics.v1.AggregationTemporality\"P\n\x07Summary\x12\x45\n\x0b\x64\x61ta_points\x18\x01 \x03(\x0b\x32\x30.opentelemetry.proto.metrics.v1.SummaryDataPoint\"\x86\x02\n\x0fNumberDataPoint\x12;\n\nattributes\x18\x07 \x03(\x0b\x32\'.opentelemetry.proto.common.v1.KeyValue\x12\x1c\n\x14start_time_unix_nano\x18\x02 \x01(\x06\x12\x16\n\x0etime_unix_nano\x18\x03 \x01(\x06\x12\x13\n\tas_double\x18\x04 \x01(\x01H\x00\x12\x10\n\x06\x61s_int\x18\x06 \x01(\x10H\x00\x12;\n\texemplars\x18\x05 \x03(\x0b\x32(.opentelemetry.proto.metrics.v1.Exemplar\x12\r\n\x05\x66lags\x18\x08 \x01(\rB\x07\n\x05valueJ\x04\x08\x01\x10\x02\"\xe6\x02\n\x12HistogramDataPoint\x12;\n\nattributes\x18\t \x03(\x0b\x32\'.opentelemetry.proto.common.v1.KeyValue\x12\x1c\n\x14start_time_unix_nano\x18\x02 \x01(\x06\x12\x16\n\x0etime_unix_nano\x18\x03 \x01(\x06\x12\r\n\x05\x63ount\x18\x04 \x01(\x06\x12\x10\n\x03sum\x18\x05 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\rbucket_counts\x18\x06 \x03(\x06\x12\x17\n\x0f\x65xplicit_bounds\x18\x07 \x03(\x01\x12;\n\texemplars\x18\x08 \x03(\x0b\x32(.opentelemetry.proto.metrics.v1.Exemplar\x12\r\n\x05\x66lags\x18\n \x01(\r\x12\x10\n\x03min\x18\x0b \x01(\x01H\x01\x88\x01\x01\x12\x10\n\x03max\x18\x0c \x01(\x01H\x02\x88\x01\x01\x42\x06\n\x04_sumB\x06\n\x04_minB\x06\n\x04_maxJ\x04\x08\x01\x10\x02\"\xda\x04\n\x1d\x45xponentialHistogramDataPoint\x12;\n\nattributes\x18\x01 \x03(\x0b\x32\'.opentelemetry.proto.common.v1.KeyValue\x12\x1c\n\x14start_time_unix_nano\x18\x02 \x01(\x06\x12\x16\n\x0etime_unix_nano\x18\x03 \x01(\x06\x12\r\n\x05\x63ount\x18\x04 \x01(\x06\x12\x10\n\x03sum\x18\x05 \x01(\x01H\x00\x88\x01\x01\x12\r\n\x05scale\x18\x06 \x01(\x11\x12\x12\n\nzero_count\x18\x07 \x01(\x06\x12W\n\x08positive\x18\x08 \x01(\x0b\x32\x45.opentelemetry.proto.metrics.v1.ExponentialHistogramDataPoint.Buckets\x12W\n\x08negative\x18\t \x01(\x0b\x32\x45.opentelemetry.proto.metrics.v1.ExponentialHistogramDataPoint.Buckets\x12\r\n\x05\x66lags\x18\n \x01(\r\x12;\n\texemplars\x18\x0b \x03(\x0b\x32(.opentelemetry.proto.metrics.v1.Exemplar\x12\x10\n\x03min\x18\x0c \x01(\x01H\x01\x88\x01\x01\x12\x10\n\x03max\x18\r \x01(\x01H\x02\x88\x01\x01\x12\x16\n\x0ezero_threshold\x18\x0e \x01(\x01\x1a\x30\n\x07\x42uckets\x12\x0e\n\x06offset\x18\x01 \x01(\x11\x12\x15\n\rbucket_counts\x18\x02 \x03(\x04\x42\x06\n\x04_sumB\x06\n\x04_minB\x06\n\x04_max\"\xc5\x02\n\x10SummaryDataPoint\x12;\n\nattributes\x18\x07 \x03(\x0b\x32\'.opentelemetry.proto.common.v1.KeyValue\x12\x1c\n\x14start_time_unix_nano\x18\x02 \x01(\x06\x12\x16\n\x0etime_unix_nano\x18\x03 \x01(\x06\x12\r\n\x05\x63ount\x18\x04 \x01(\x06\x12\x0b\n\x03sum\x18\x05 \x01(\x01\x12Y\n\x0fquantile_values\x18\x06 \x03(\x0b\x32@.opentelemetry.proto.metrics.v1.SummaryDataPoint.ValueAtQuantile\x12\r\n\x05\x66lags\x18\x08 \x01(\r\x1a\x32\n\x0fValueAtQuantile\x12\x10\n\x08quantile\x18\x01 \x01(\x01\x12\r\n\x05value\x18\x02 \x01(\x01J\x04\x08\x01\x10\x02\"\xc1\x01\n\x08\x45xemplar\x12\x44\n\x13\x66iltered_attributes\x18\x07 \x03(\x0b\x32\'.opentelemetry.proto.common.v1.KeyValue\x12\x16\n\x0etime_unix_nano\x18\x02 \x01(\x06\x12\x13\n\tas_double\x18\x03 \x01(\x01H\x00\x12\x10\n\x06\x61s_int\x18\x06 \x01(\x10H\x00\x12\x0f\n\x07span_id\x18\x04 \x01(\x0c\x12\x10\n\x08trace_id\x18\x05 \x01(\x0c\x42\x07\n\x05valueJ\x04\x08\x01\x10\x02*\x8c\x01\n\x16\x41ggregationTemporality\x12\'\n#AGGREGATION_TEMPORALITY_UNSPECIFIED\x10\x00\x12!\n\x1d\x41GGREGATION_TEMPORALITY_DELTA\x10\x01\x12&\n\"AGGREGATION_TEMPORALITY_CUMULATIVE\x10\x02*^\n\x0e\x44\x61taPointFlags\x12\x1f\n\x1b\x44\x41TA_POINT_FLAGS_DO_NOT_USE\x10\x00\x12+\n\'DATA_POINT_FLAGS_NO_RECORDED_VALUE_MASK\x10\x01\x42\x7f\n!io.opentelemetry.proto.metrics.v1B\x0cMetricsProtoP\x01Z)go.opentelemetry.io/proto/otlp/metrics/v1\xaa\x02\x1eOpenTelemetry.Proto.Metrics.V1b\x06proto3"

But, I believe some agents (Js?) just reduce this to monotonic in the SDK, so there's precedent to go the other way.


def initialize(aggregation_temporality: ENV.fetch('OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE', :delta))
def initialize(aggregation_temporality: ENV.fetch('OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE', :delta), is_monotonic: false)
# TODO: the default should be :cumulative, see issue #1555
@aggregation_temporality = aggregation_temporality.to_sym
@is_monotonic = is_monotonic
end

def collect(start_time, end_time, data_points)
Expand Down Expand Up @@ -47,6 +48,8 @@ def update(increment, attributes, data_points)
nil
)

return if is_monotonic && increment < 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this suppose to be instance variable @is_monotonic?

monotonic also need to check with aggregation_temporality

For delta monotonic sums, this means the reader SHOULD expect non-negative values.
For cumulative monotonic sums, this means the reader SHOULD expect values that are not less than the previous value.

We also need to move this logic before ndp = data_points[attributes] || data_points[attributes] = NumberDataPoint.new; because update doesn't return anything; it will update direct on data_points hash.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think checking if increment is negative fulfills the specs for cumulative sums. for delta sums it might be the case that we need to check if the sum (value + increment) is negative? only checking if increment is negative ensures the sum value is never negative but might reject some valid increments. is my logic correct?

Copy link
Contributor

@xuan-cao-swi xuan-cao-swi Jan 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think checking if increment is negative fulfills the specs for cumulative sums.

Yes, I think your logic is correct (also checked other implementation as your referenced).

The specification is somewhat confusing, but In this case of metrics, this means the sum is nominally increasing should reduce confusion. (Now I think the specification is actually wrong regarding "for delta" and "for cum," and they should be swapped.)

for delta sums it might be the case that we need to check if the sum (value + increment) is negative?

For delta sum, it's kind tricky because the sum will be zeroed depends on when the instrument's value got exported. I see other implementations don't care about it. I would suggest follow their approach.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think my current implementation is very similar to the js one. Do you think there is something else that needs to be implemented?

Copy link
Contributor

@xuan-cao-swi xuan-cao-swi Jan 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM; may need other maintainers take a look too.


ndp.value += increment
nil
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def add(increment, attributes: {})
private

def default_aggregation
OpenTelemetry::SDK::Metrics::Aggregation::Sum.new
OpenTelemetry::SDK::Metrics::Aggregation::Sum.new(is_monotonic: true)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was going to ask that is there any spec to define the default for monotonic, but found the spec that explain it.

end
end
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def add(amount, attributes: {})
private

def default_aggregation
OpenTelemetry::SDK::Metrics::Aggregation::Sum.new
OpenTelemetry::SDK::Metrics::Aggregation::Sum.new(is_monotonic: false)
end
end
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ module State
:data_points, # Hash{Hash{String => String, Numeric, Boolean, Array<String, Numeric, Boolean>} => Numeric}
:aggregation_temporality, # Symbol
:start_time_unix_nano, # Integer nanoseconds since Epoch
:time_unix_nano) # Integer nanoseconds since Epoch
:time_unix_nano, # Integer nanoseconds since Epoch
:is_monotonic) # Boolean
end
end
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ def update(value, attributes)

def aggregate_metric_data(start_time, end_time, aggregation: nil)
aggregator = aggregation || @default_aggregation
is_monotonic = aggregator.respond_to?(:is_monotonic) && aggregator.is_monotonic
kaylareopelle marked this conversation as resolved.
Show resolved Hide resolved

MetricData.new(
@name,
@description,
Expand All @@ -77,7 +79,8 @@ def aggregate_metric_data(start_time, end_time, aggregation: nil)
aggregator.collect(start_time, end_time, @data_points),
aggregator.aggregation_temporality,
start_time,
end_time
end_time,
is_monotonic
)
end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@

describe OpenTelemetry::SDK::Metrics::Aggregation::Sum do
let(:data_points) { {} }
let(:sum_aggregation) { OpenTelemetry::SDK::Metrics::Aggregation::Sum.new(aggregation_temporality: aggregation_temporality) }
let(:sum_aggregation) { OpenTelemetry::SDK::Metrics::Aggregation::Sum.new(aggregation_temporality: aggregation_temporality, is_monotonic: is_monotonic) }
let(:aggregation_temporality) { :delta }
let(:is_monotonic) { false }

# Time in nano
let(:start_time) { (Time.now.to_r * 1_000_000_000).to_i }
Expand Down Expand Up @@ -58,6 +59,14 @@
_(ndps[1].attributes).must_equal('foo' => 'bar')
end

it 'aggregates and collects negative values' do
sum_aggregation.update(1, {}, data_points)
sum_aggregation.update(-2, {}, data_points)

ndps = sum_aggregation.collect(start_time, end_time, data_points)
_(ndps[0].value).must_equal(-1)
end

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may want more test case once the logic for mixing up temporality and monotonic is there

it 'does not aggregate between collects' do
sum_aggregation.update(1, {}, data_points)
sum_aggregation.update(2, {}, data_points)
Expand Down Expand Up @@ -94,4 +103,17 @@
_(ndps[0].value).must_equal(4)
end
end

describe 'when sum type is monotonic' do
let(:aggregation_temporality) { :not_delta }
let(:is_monotonic) { true }

it 'does not allow negative values to accumulate' do
sum_aggregation.update(1, {}, data_points)
sum_aggregation.update(-2, {}, data_points)
ndps = sum_aggregation.collect(start_time, end_time, data_points)

_(ndps[0].value).must_equal(1)
end
end
end
Loading