Skip to content

Commit 7dc8c0b

Browse files
committed
Fix false positives for boundary points and low-volume packages
Skip points without a full symmetric window to avoid flattening real growth at series edges ("great start" problem). Use a relative check when MAD is 0 instead of flagging any deviation, so sparse packages like [0,0,0,1,0,0,0] keep their real activity.
1 parent 55388db commit 7dc8c0b

2 files changed

Lines changed: 50 additions & 6 deletions

File tree

app/utils/download-anomalies.ts

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,12 @@ export function applyHampelCorrection(
4545
// Clone to avoid mutating the original data.
4646
const result = (data as Array<Record<string, any>>).map(d => ({ ...d }))
4747

48-
for (let i = 0; i < values.length; i++) {
49-
// Build a sliding window around the current point, clamped to array bounds.
50-
const start = Math.max(0, i - halfWindow)
51-
const end = Math.min(values.length - 1, i + halfWindow)
48+
// Only evaluate points that have a full symmetric window.
49+
// Boundary points lack enough context on one side, making them
50+
// prone to false positives (e.g., a "great start" at the end of a series).
51+
for (let i = halfWindow; i < values.length - halfWindow; i++) {
52+
const start = i - halfWindow
53+
const end = i + halfWindow
5254
const window = values.slice(start, end + 1)
5355

5456
// The median is robust to outliers — unlike the mean, a single spike
@@ -63,9 +65,13 @@ export function applyHampelCorrection(
6365
const deviation = Math.abs(values[i]! - windowMedian)
6466

6567
// MAD of 0 means most values in the window are identical.
66-
// If this point differs from the median at all, it's an outlier.
68+
// We can't use the standard MAD-based score here, so fall back to a
69+
// relative check: only flag if the deviation is large relative to the
70+
// median. When the median is 0, there's no baseline to measure against
71+
// so we skip — this avoids erasing real low-volume activity like
72+
// [0,0,0,1,0,0,0].
6773
if (windowMad === 0) {
68-
if (deviation > 0) {
74+
if (windowMedian > 0 && deviation > windowMedian * threshold) {
6975
result[i]!.value = Math.round(windowMedian)
7076
result[i]!.hasAnomaly = true
7177
}

test/unit/app/utils/download-anomalies.spec.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,44 @@ describe('applyHampelCorrection', () => {
6767
expect(result[1]!.value).toBe(1000) // not enough data to detect
6868
})
6969

70+
it('does not flatten a "great start" (sudden real growth at the end)', () => {
71+
// A package going from zero to real adoption — this is NOT a spike.
72+
const data: WeeklyDataPoint[] = [
73+
makeWeeklyPoint('2022-11-07', 0),
74+
makeWeeklyPoint('2022-11-14', 0),
75+
makeWeeklyPoint('2022-11-21', 0),
76+
makeWeeklyPoint('2022-11-28', 0),
77+
makeWeeklyPoint('2022-12-05', 0),
78+
makeWeeklyPoint('2022-12-12', 0),
79+
makeWeeklyPoint('2022-12-19', 20000),
80+
]
81+
82+
const result = applyHampelCorrection(data) as WeeklyDataPoint[]
83+
84+
// The 20000 should NOT be erased — it's real growth, not an anomaly.
85+
expect(result[6]!.value).toBe(20000)
86+
expect(result[6]!.hasAnomaly).toBeUndefined()
87+
})
88+
89+
it('does not flatten low-volume real activity', () => {
90+
// Sparse package with occasional real downloads
91+
const data: WeeklyDataPoint[] = [
92+
makeWeeklyPoint('2022-11-07', 0),
93+
makeWeeklyPoint('2022-11-14', 0),
94+
makeWeeklyPoint('2022-11-21', 0),
95+
makeWeeklyPoint('2022-11-28', 1),
96+
makeWeeklyPoint('2022-12-05', 0),
97+
makeWeeklyPoint('2022-12-12', 0),
98+
makeWeeklyPoint('2022-12-19', 0),
99+
]
100+
101+
const result = applyHampelCorrection(data) as WeeklyDataPoint[]
102+
103+
// A single download is not an anomaly
104+
expect(result[3]!.value).toBe(1)
105+
expect(result[3]!.hasAnomaly).toBeUndefined()
106+
})
107+
70108
it('does not mutate the original data', () => {
71109
const data: WeeklyDataPoint[] = [
72110
makeWeeklyPoint('2022-11-07', 100),

0 commit comments

Comments
 (0)