Skip to content

Commit cf285be

Browse files
committed
Stabilize window metric color scaling
1 parent 07cbaa6 commit cf285be

2 files changed

Lines changed: 27 additions & 4 deletions

File tree

bookviz/gallery.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ def index_html(
134134
const DEFAULT_WINDOW_STEP = {window_step_json};
135135
const TOKEN_METRICS = ["word-freq", "word-freq-linear", "bigram-prob", "bigram-diversity", "word-length", "word-position", "unique-word"];
136136
const WINDOW_METRICS = ["avg-word-length", "lexical-diversity", "punctuation-density", "repetition-density", "sentence-length"];
137+
const FIXED_SCALE_METRICS = ["lexical-diversity", "punctuation-density", "repetition-density", "word-freq-linear", "bigram-prob", "word-length", "word-position", "unique-word"];
137138
const ALL_METRICS = [...TOKEN_METRICS, ...WINDOW_METRICS];
138139
const tokenRe = /[\\p{{L}}\\p{{N}}_]+|[^\\p{{L}}\\p{{N}}_\\s]/gu;
139140
const state = {{ text: "", tokens: [], values: [], labels: [], size: 0 }};
@@ -202,12 +203,14 @@ def index_html(
202203
const result = useWindows
203204
? windowValues(state.tokens, metric, Number(els.windowSize.value), Number(els.windowStep.value))
204205
: tokenValues(state.tokens, metric);
205-
state.values = normalize(result.values);
206+
state.values = normalize(result.values, scaleDomain(metric));
206207
state.labels = result.labels;
207208
draw(state.values);
208209
els.meta.innerHTML = [
209210
`<strong>${{state.tokens.length.toLocaleString()}}</strong> tokens`,
210211
`<strong>${{state.values.length.toLocaleString()}}</strong> pixels`,
212+
`raw range ${{formatRange(valueRange(result.values))}}`,
213+
scaleDomain(metric) ? "color scale 0-1" : "color scale current min-max",
211214
useWindows ? `windowed, size ${{els.windowSize.value}}, step ${{els.windowStep.value}}` : "word-level pixels",
212215
].join("<br>");
213216
setDetails("Hover over the image", "");
@@ -257,6 +260,7 @@ def index_html(
257260
for (let start = 0; start < tokens.length; start += step) {{
258261
const chunk = tokens.slice(start, start + size);
259262
if (!chunk.length) continue;
263+
if (chunk.length < size * 0.5) continue;
260264
labels.push(windowLabel(chunk, start));
261265
if (WINDOW_METRICS.includes(metric)) values.push(windowMetric(chunk, metric));
262266
else {{
@@ -294,16 +298,32 @@ def index_html(
294298
return !/[\\p{{L}}\\p{{N}}_]/u.test(token);
295299
}}
296300
297-
function normalize(values) {{
301+
function normalize(values, domain = null) {{
298302
if (!values.length) return [];
303+
const range = domain || valueRange(values);
304+
const min = range[0];
305+
const max = range[1];
306+
if (min === max) return values.map(() => 0);
307+
return values.map(value => (value - min) / (max - min));
308+
}}
309+
310+
function scaleDomain(metric) {{
311+
return FIXED_SCALE_METRICS.includes(metric) ? [0, 1] : null;
312+
}}
313+
314+
function valueRange(values) {{
315+
if (!values.length) return [0, 0];
299316
let min = values[0];
300317
let max = values[0];
301318
for (const value of values) {{
302319
if (value < min) min = value;
303320
if (value > max) max = value;
304321
}}
305-
if (min === max) return values.map(() => 0);
306-
return values.map(value => (value - min) / (max - min));
322+
return [min, max];
323+
}}
324+
325+
function formatRange(range) {{
326+
return `${{range[0].toFixed(4)}}-${{range[1].toFixed(4)}}`;
307327
}}
308328
309329
function maxOf(values) {{

tests/test_gallery.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,7 @@ def test_gallery_generates_client_side_explorer(tmp_path: Path):
2424
assert "const BOOKS" in index_html
2525
assert "windowSize" in index_html
2626
assert "lockStep" in index_html
27+
assert "FIXED_SCALE_METRICS" in index_html
28+
assert "chunk.length < size * 0.5" in index_html
29+
assert "raw range" in index_html
2730
assert "lexical-diversity" in index_html

0 commit comments

Comments
 (0)