Skip to content
This repository was archived by the owner on Jul 31, 2023. It is now read-only.

Commit a639371

Browse files
Introduce hard span storage size limit in sweeper (#121)
introduce hard span storage size limit in sweeper
1 parent 9ae08af commit a639371

File tree

3 files changed

+99
-29
lines changed

3 files changed

+99
-29
lines changed

elvis.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
{elvis_style, dont_repeat_yourself, #{min_complexity => 20}},
1212
{elvis_style, line_length, #{limit => 120}},
1313
{elvis_style, state_record_and_type, disable},
14+
{elvis_style, no_if_expression, disable},
1415
{elvis_style, function_naming_convention, #{regex => "^_{0,2}([a-z][a-z0-9]*_?)*_{0,2}$"}},
1516
%% sequential reporter calls other reporters dynamically
1617
%% stat view proxies measures

src/oc_span_sweeper.erl

Lines changed: 59 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@
2929

3030
-record(data, {interval :: integer() | infinity,
3131
strategy :: drop | finish | failed_attribute_and_finish | fun((opencensus:span()) -> ok),
32-
ttl :: integer()}).
32+
ttl :: integer() | infinity,
33+
storage_size :: integer() | infinity}).
3334

3435
start_link() ->
3536
gen_statem:start_link({local, ?MODULE}, ?MODULE, [], []).
@@ -40,54 +41,84 @@ init([]) ->
4041
Interval = maps:get(interval, SweeperConfig, timer:minutes(5)),
4142
Strategy = maps:get(strategy, SweeperConfig, drop),
4243
TTL = maps:get(span_ttl, SweeperConfig, timer:minutes(5)),
44+
StorageSize = maps:get(storage_size, SweeperConfig, infinity),
4345
{ok, ready, #data{interval=Interval,
4446
strategy=Strategy,
45-
ttl=erlang:convert_time_unit(TTL, millisecond, native)},
47+
ttl=maybe_convert_time_unit(TTL),
48+
storage_size=StorageSize},
4649
[hibernate, {state_timeout, Interval, sweep}]}.
4750

51+
52+
maybe_convert_time_unit(infinity) ->
53+
infinity;
54+
maybe_convert_time_unit(TTL) ->
55+
erlang:convert_time_unit(TTL, millisecond, native).
56+
4857
callback_mode() ->
4958
handle_event_function.
5059

51-
handle_event(state_timeout, sweep, _, #data{interval=Interval,
52-
strategy=drop,
53-
ttl=TTL}) ->
60+
handle_event(state_timeout, sweep, _, #data{interval=Interval} = Data) ->
61+
do_gc(Data),
62+
{keep_state_and_data, [hibernate, {state_timeout, Interval, sweep}]};
63+
handle_event(_, _, _, _Data) ->
64+
keep_state_and_data.
65+
66+
code_change(_, State, Data, _) ->
67+
{ok, State, Data}.
68+
69+
terminate(_Reason, _State, _Data) ->
70+
ok.
71+
72+
%%
73+
do_gc(#data{strategy=Strategy,
74+
ttl=TTL,
75+
storage_size=infinity}) ->
76+
sweep_spans(Strategy, TTL);
77+
do_gc(#data{strategy=Strategy,
78+
ttl=TTL,
79+
storage_size=MaxSize}) ->
80+
81+
StorageSize = ets:info(?SPAN_TAB, memory) * erlang:system_info({wordsize, external}),
82+
83+
if
84+
StorageSize >= 2 * MaxSize ->
85+
%% High overload kill storage.
86+
ets:delete_all_objects(?SPAN_TAB);
87+
StorageSize >= MaxSize ->
88+
%% Low overload, reduce TTL
89+
sweep_spans(Strategy, overload_ttl(TTL));
90+
true ->
91+
sweep_spans(Strategy, TTL)
92+
end.
93+
94+
overload_ttl(infinity) ->
95+
infinity;
96+
overload_ttl(TTL) ->
97+
TTL div 10.
98+
99+
sweep_spans(_, infinity) ->
100+
ok;
101+
sweep_spans(drop, TTL) ->
54102
TooOld = erlang:monotonic_time() - TTL,
55103
case ets:select_delete(?SPAN_TAB, expired_match_spec(TooOld, true)) of
56104
0 ->
57105
ok;
58106
NumDeleted ->
59107
?LOG_INFO("sweep old spans: ttl=~p num_dropped=~p", [TTL, NumDeleted])
60-
end,
61-
{keep_state_and_data, [hibernate, {state_timeout, Interval, sweep}]};
62-
handle_event(state_timeout, sweep, _, #data{interval=Interval,
63-
strategy=finish,
64-
ttl=TTL}) ->
108+
end;
109+
sweep_spans(finish, TTL) ->
65110
Expired = select_expired(TTL),
66111
[finish_span(Span) || Span <- Expired],
67-
{keep_state_and_data, [hibernate, {state_timeout, Interval, sweep}]};
68-
handle_event(state_timeout, sweep, _, #data{interval=Interval,
69-
strategy=failed_attribute_and_finish,
70-
ttl=TTL}) ->
112+
ok;
113+
sweep_spans(failed_attribute_and_finish, TTL) ->
71114
Expired = select_expired(TTL),
72115
[finish_span(oc_span:put_attribute(<<"finished_by_sweeper">>, true, Span)) || Span <- Expired],
73-
{keep_state_and_data, [hibernate, {state_timeout, Interval, sweep}]};
74-
handle_event(state_timeout, sweep, _, #data{interval=Interval,
75-
strategy=Fun,
76-
ttl=TTL}) when is_function(Fun) ->
116+
ok;
117+
sweep_spans(Fun, TTL) when is_function(Fun) ->
77118
Expired = select_expired(TTL),
78119
[Fun(Span) || Span <- Expired],
79-
{keep_state_and_data, [hibernate, {state_timeout, Interval, sweep}]};
80-
handle_event(_, _, _, _Data) ->
81-
keep_state_and_data.
82-
83-
code_change(_, State, Data, _) ->
84-
{ok, State, Data}.
85-
86-
terminate(_Reason, _State, _Data) ->
87120
ok.
88121

89-
%%
90-
91122
%% ignore these functions because dialyzer doesn't like match spec use of '_'
92123
-dialyzer({nowarn_function, expired_match_spec/2}).
93124
-dialyzer({nowarn_function, finish_span/1}).

test/oc_sweeper_SUITE.erl

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
-include("opencensus.hrl").
1515

1616
all() ->
17-
[drop,
17+
[storage_size,
18+
drop,
1819
finish,
1920
failed_attribute_and_finish].
2021

@@ -25,6 +26,17 @@ init_per_suite(Config) ->
2526
end_per_suite(_Config) ->
2627
ok.
2728

29+
init_per_testcase(storage_size, Config) ->
30+
application:set_env(opencensus, sweeper, #{interval => 250,
31+
strategy => finish,
32+
span_ttl => 500,
33+
storage_size => 100}),
34+
35+
application:set_env(opencensus, send_interval_ms, 1),
36+
application:set_env(opencensus, reporter, {oc_reporter_pid, []}),
37+
application:set_env(opencensus, pid_reporter, #{pid => self()}),
38+
{ok, _} = application:ensure_all_started(opencensus),
39+
Config;
2840
init_per_testcase(Type, Config) ->
2941
application:set_env(opencensus, sweeper, #{interval => 250,
3042
strategy => Type,
@@ -40,6 +52,32 @@ end_per_testcase(_, _Config) ->
4052
ok = application:stop(opencensus),
4153
ok.
4254

55+
storage_size(_Config) ->
56+
SpanName1 = <<"span-1">>,
57+
SpanCtx = oc_trace:start_span(SpanName1, undefined),
58+
59+
ChildSpanName1 = <<"child-span-1">>,
60+
ChildSpanCtx = oc_trace:start_span(ChildSpanName1, SpanCtx),
61+
62+
[ChildSpanData] = ets:lookup(?SPAN_TAB, ChildSpanCtx#span_ctx.span_id),
63+
?assertEqual(ChildSpanName1, ChildSpanData#span.name),
64+
?assertEqual(SpanCtx#span_ctx.span_id, ChildSpanData#span.parent_span_id),
65+
66+
%% wait until the sweeper sweeps away the parent span
67+
?UNTIL(ets:tab2list(?SPAN_TAB) =:= []),
68+
69+
%% sleep long enough that the reporter would have run again for sure
70+
timer:sleep(10),
71+
72+
%% should be no reported spans
73+
?assertEqual(no_span, receive
74+
{span, #span{name=N}} when N =:= SpanName1 ->
75+
got_span
76+
after
77+
0 ->
78+
no_span
79+
end).
80+
4381
drop(_Config) ->
4482
SpanName1 = <<"span-1">>,
4583
SpanCtx = oc_trace:start_span(SpanName1, undefined),

0 commit comments

Comments
 (0)