|
| 1 | +function median(sorted: number[]): number { |
| 2 | + const mid = Math.floor(sorted.length / 2) |
| 3 | + return sorted.length % 2 === 0 ? (sorted[mid - 1]! + sorted[mid]!) / 2 : sorted[mid]! |
| 4 | +} |
| 5 | + |
1 | 6 | /** |
2 | | - * Hampel filter: replaces outlier values with the local median. |
3 | | - * Uses Median Absolute Deviation (MAD) to detect outliers. |
| 7 | + * IQR-based outlier filter. |
| 8 | + * |
| 9 | + * 1. Compute Q1, Q3, IQR across all values globally |
| 10 | + * 2. Flag any point outside [Q1 - k×IQR, Q3 + k×IQR] as an outlier |
| 11 | + * 3. Replace each outlier with the median of its non-outlier local neighbors |
| 12 | + * |
| 13 | + * This handles sustained anomalies (multi-week spikes) regardless of |
| 14 | + * their position in the dataset — including at boundaries. |
4 | 15 | * |
5 | 16 | * @param data - array of objects with a `value` property |
6 | | - * @param windowSize - half-window size (0 = disabled) |
7 | | - * @param threshold - number of MADs above which a value is considered an outlier |
8 | | - * @returns a new array with outlier values replaced by the local median |
| 17 | + * @param multiplier - IQR multiplier k (0 = disabled, standard: 1.5, less aggressive: 3) |
| 18 | + * @param windowSize - half-window for local median replacement (default: 6) |
9 | 19 | */ |
10 | | -export function hampelFilter<T extends { value: number }>( |
| 20 | +export function iqrFilter<T extends { value: number }>( |
11 | 21 | data: T[], |
12 | | - windowSize: number, |
13 | | - threshold: number, |
| 22 | + multiplier: number, |
| 23 | + windowSize: number = 6, |
14 | 24 | ): T[] { |
15 | | - if (windowSize <= 0 || data.length === 0) return data |
| 25 | + if (multiplier <= 0 || data.length < 4) return data |
| 26 | + |
| 27 | + const sorted = data.map(d => d.value).sort((a, b) => a - b) |
| 28 | + const q1 = median(sorted.slice(0, Math.floor(sorted.length / 2))) |
| 29 | + const q3 = median(sorted.slice(Math.ceil(sorted.length / 2))) |
| 30 | + const iqr = q3 - q1 |
| 31 | + |
| 32 | + if (iqr <= 0) return data |
| 33 | + |
| 34 | + const lowerBound = q1 - multiplier * iqr |
| 35 | + const upperBound = q3 + multiplier * iqr |
| 36 | + |
| 37 | + const isOutlier = data.map(d => d.value < lowerBound || d.value > upperBound) |
| 38 | + if (!isOutlier.some(Boolean)) return data |
16 | 39 |
|
17 | 40 | const result = data.map(d => ({ ...d })) |
18 | 41 | const n = data.length |
19 | 42 |
|
20 | | - for (let i = 0; i < n; i++) { |
| 43 | + for (let i = 1; i < n - 1; i++) { |
| 44 | + if (!isOutlier[i]) continue |
| 45 | + |
| 46 | + // Collect non-outlier neighbors within the window |
21 | 47 | const lo = Math.max(0, i - windowSize) |
22 | 48 | const hi = Math.min(n - 1, i + windowSize) |
23 | | - |
24 | | - const windowValues: number[] = [] |
| 49 | + const neighbors: number[] = [] |
25 | 50 | for (let j = lo; j <= hi; j++) { |
26 | | - windowValues.push(data[j]!.value) |
| 51 | + if (!isOutlier[j]) neighbors.push(data[j]!.value) |
27 | 52 | } |
28 | | - windowValues.sort((a, b) => a - b) |
29 | | - |
30 | | - const median = windowValues[Math.floor(windowValues.length / 2)]! |
31 | | - const deviations = windowValues.map(v => Math.abs(v - median)).sort((a, b) => a - b) |
32 | | - const mad = deviations[Math.floor(deviations.length / 2)]! |
33 | | - |
34 | | - // 1.4826 converts MAD to an estimate of the standard deviation |
35 | | - const scaledMad = 1.4826 * mad |
36 | 53 |
|
37 | | - if (scaledMad > 0 && Math.abs(data[i]!.value - median) > threshold * scaledMad) { |
38 | | - result[i]!.value = median |
| 54 | + if (neighbors.length > 0) { |
| 55 | + neighbors.sort((a, b) => a - b) |
| 56 | + result[i]!.value = median(neighbors) |
39 | 57 | } |
40 | 58 | } |
41 | 59 |
|
42 | 60 | return result |
43 | 61 | } |
44 | 62 |
|
45 | | -/** |
46 | | - * Low-pass (exponential smoothing) filter. |
47 | | - * |
48 | | - * @param data - array of objects with a `value` property |
49 | | - * @param tau - smoothing time constant (0 = disabled, higher = smoother) |
50 | | - * @returns a new array with smoothed values |
51 | | - */ |
52 | | -export function lowPassFilter<T extends { value: number }>(data: T[], tau: number): T[] { |
53 | | - if (tau <= 0 || data.length === 0) return data |
54 | | - |
55 | | - const result = data.map(d => ({ ...d })) |
56 | | - const alpha = 1 / (1 + tau) |
57 | | - |
58 | | - result[0]!.value = data[0]!.value |
59 | | - for (let i = 1; i < data.length; i++) { |
60 | | - result[i]!.value = alpha * data[i]!.value + (1 - alpha) * result[i - 1]!.value |
61 | | - } |
62 | | - |
63 | | - return result |
64 | | -} |
65 | | - |
66 | 63 | export interface ChartFilterSettings { |
67 | | - hampelWindow: number |
68 | | - hampelThreshold: number |
69 | | - smoothingTau: number |
| 64 | + iqrMultiplier: number |
70 | 65 | } |
71 | 66 |
|
72 | 67 | /** |
73 | | - * Applies Hampel filter then low-pass smoothing in sequence. |
| 68 | + * Applies IQR-based outlier filter to download data. |
74 | 69 | */ |
75 | 70 | export function applyDownloadFilter<T extends { value: number }>( |
76 | 71 | data: T[], |
77 | 72 | settings: ChartFilterSettings, |
78 | 73 | ): T[] { |
79 | | - if (data.length < 2) return data |
80 | | - |
81 | | - const firstValue = data[0]!.value |
82 | | - const lastValue = data[data.length - 1]!.value |
83 | | - |
84 | | - let result = data |
85 | | - result = hampelFilter(result, settings.hampelWindow, settings.hampelThreshold) |
86 | | - result = lowPassFilter(result, settings.smoothingTau) |
87 | | - |
88 | | - // Preserve original first and last values |
89 | | - if (result !== data) { |
90 | | - result[0]!.value = firstValue |
91 | | - result[result.length - 1]!.value = lastValue |
92 | | - } |
93 | | - |
94 | | - return result |
| 74 | + return iqrFilter(data, settings.iqrMultiplier) |
95 | 75 | } |
0 commit comments