99
1010Usage:
1111 python book_png.py <text_file> [--metric METRIC] [--color COLOR] [--output FILE]
12+ python book_png.py <text_file> --html # generate interactive HTML viewer
1213 python book_png.py --list # show available metrics and color schemes
1314"""
1415
1516import argparse
17+ import base64
18+ import io
19+ import json
1620import math
1721import random
1822import sys
@@ -171,32 +175,313 @@ def unique_word_id(words):
171175# RENDERER
172176# =============================================================================
173177
174- def render (values , color_fn , output_path ):
178+ def render (values , color_fn , output_path , words = None ):
175179 """
176180 Render a sequence of values as a square image.
177181
178182 Args:
179183 values: iterable of floats (0.0-1.0)
180184 color_fn: function that maps float -> RGB tuple
181185 output_path: where to save the PNG
186+ words: optional list of words (for JSON export)
187+
188+ Returns:
189+ tuple: (PIL.Image, size, list of (word, value) pairs)
182190 """
183191 values = list (values )
184192 if not values :
185193 print ("No values to render" , file = sys .stderr )
186- return
194+ return None , 0 , []
187195
188196 size = int (math .ceil (math .sqrt (len (values ))))
189197 img = Image .new ("RGB" , (size , size ), color = (0 , 0 , 0 ))
190198
199+ word_data = []
191200 for i , val in enumerate (values ):
192201 x = i % size
193202 y = i // size
194203 # Clamp value to 0-1 range
195- val = max (0.0 , min (1.0 , val ))
196- img .putpixel ((x , y ), color_fn (val ))
204+ clamped = max (0.0 , min (1.0 , val ))
205+ img .putpixel ((x , y ), color_fn (clamped ))
206+ if words and i < len (words ):
207+ word_data .append ((words [i ], val ))
197208
198209 img .save (output_path )
199210 print (f"Saved: { output_path } ({ size } x{ size } pixels, { len (values )} values)" )
211+ return img , size , word_data
212+
213+
214+ def render_html (img , size , word_data , output_path , metric_name , color_name , source_file ):
215+ """
216+ Generate an interactive HTML viewer with zoom/pan and word tooltips.
217+
218+ Args:
219+ img: PIL.Image object
220+ size: image dimension (size x size)
221+ word_data: list of (word, value) tuples
222+ output_path: where to save the HTML file
223+ metric_name: name of the metric used
224+ color_name: name of the color scheme used
225+ source_file: original text file name
226+ """
227+ # Convert image to base64
228+ img_buffer = io .BytesIO ()
229+ img .save (img_buffer , format = 'PNG' )
230+ img_base64 = base64 .b64encode (img_buffer .getvalue ()).decode ('utf-8' )
231+
232+ # Create word lookup (just the words array - position is implicit)
233+ words_json = json .dumps ([w for w , v in word_data ])
234+ values_json = json .dumps ([round (v , 4 ) for w , v in word_data ])
235+
236+ html = f'''<!DOCTYPE html>
237+ <html lang="en">
238+ <head>
239+ <meta charset="UTF-8">
240+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
241+ <title>{ source_file } - { metric_name } </title>
242+ <style>
243+ * {{ margin: 0; padding: 0; box-sizing: border-box; }}
244+ body {{
245+ background: #1a1a1a;
246+ color: #fff;
247+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, monospace;
248+ overflow: hidden;
249+ height: 100vh;
250+ }}
251+ #header {{
252+ position: fixed;
253+ top: 0;
254+ left: 0;
255+ right: 0;
256+ padding: 10px 20px;
257+ background: rgba(0,0,0,0.8);
258+ z-index: 100;
259+ display: flex;
260+ justify-content: space-between;
261+ align-items: center;
262+ }}
263+ #header h1 {{
264+ font-size: 14px;
265+ font-weight: normal;
266+ }}
267+ #header .info {{
268+ font-size: 12px;
269+ color: #888;
270+ }}
271+ #controls {{
272+ display: flex;
273+ gap: 10px;
274+ align-items: center;
275+ }}
276+ #controls button {{
277+ background: #333;
278+ border: 1px solid #555;
279+ color: #fff;
280+ padding: 5px 12px;
281+ cursor: pointer;
282+ border-radius: 3px;
283+ }}
284+ #controls button:hover {{
285+ background: #444;
286+ }}
287+ #zoom-level {{
288+ font-size: 12px;
289+ color: #888;
290+ min-width: 60px;
291+ }}
292+ #container {{
293+ position: absolute;
294+ top: 50px;
295+ left: 0;
296+ right: 0;
297+ bottom: 0;
298+ overflow: hidden;
299+ cursor: grab;
300+ }}
301+ #container.dragging {{
302+ cursor: grabbing;
303+ }}
304+ #canvas-wrapper {{
305+ position: absolute;
306+ transform-origin: 0 0;
307+ }}
308+ #image {{
309+ display: block;
310+ image-rendering: pixelated;
311+ image-rendering: crisp-edges;
312+ }}
313+ #tooltip {{
314+ position: fixed;
315+ background: rgba(0,0,0,0.9);
316+ border: 1px solid #444;
317+ padding: 8px 12px;
318+ border-radius: 4px;
319+ pointer-events: none;
320+ z-index: 200;
321+ display: none;
322+ font-size: 13px;
323+ max-width: 300px;
324+ }}
325+ #tooltip .word {{
326+ font-size: 16px;
327+ font-weight: bold;
328+ color: #fff;
329+ margin-bottom: 4px;
330+ }}
331+ #tooltip .details {{
332+ color: #aaa;
333+ font-size: 11px;
334+ }}
335+ #help {{
336+ position: fixed;
337+ bottom: 10px;
338+ left: 10px;
339+ font-size: 11px;
340+ color: #555;
341+ }}
342+ </style>
343+ </head>
344+ <body>
345+ <div id="header">
346+ <h1>{ source_file } </h1>
347+ <div class="info">{ metric_name } · { color_name } · { size } ×{ size } px · { len (word_data ):,} words</div>
348+ <div id="controls">
349+ <button onclick="zoomIn()">+ Zoom</button>
350+ <button onclick="zoomOut()">− Zoom</button>
351+ <button onclick="resetView()">Reset</button>
352+ <span id="zoom-level">100%</span>
353+ </div>
354+ </div>
355+ <div id="container">
356+ <div id="canvas-wrapper">
357+ <img id="image" src="data:image/png;base64,{ img_base64 } " width="{ size } " height="{ size } ">
358+ </div>
359+ </div>
360+ <div id="tooltip">
361+ <div class="word"></div>
362+ <div class="details"></div>
363+ </div>
364+ <div id="help">Scroll to zoom · Drag to pan · Hover for words</div>
365+
366+ <script>
367+ const SIZE = { size } ;
368+ const WORDS = { words_json } ;
369+ const VALUES = { values_json } ;
370+
371+ const container = document.getElementById('container');
372+ const wrapper = document.getElementById('canvas-wrapper');
373+ const image = document.getElementById('image');
374+ const tooltip = document.getElementById('tooltip');
375+ const zoomLabel = document.getElementById('zoom-level');
376+
377+ let scale = 1;
378+ let panX = 0;
379+ let panY = 0;
380+ let isDragging = false;
381+ let dragStartX = 0;
382+ let dragStartY = 0;
383+ let dragStartPanX = 0;
384+ let dragStartPanY = 0;
385+
386+ function updateTransform() {{
387+ wrapper.style.transform = `translate(${{panX}}px, ${{panY}}px) scale(${{scale}})`;
388+ zoomLabel.textContent = Math.round(scale * 100) + '%';
389+ }}
390+
391+ function centerImage() {{
392+ const rect = container.getBoundingClientRect();
393+ panX = (rect.width - SIZE * scale) / 2;
394+ panY = (rect.height - SIZE * scale) / 2;
395+ updateTransform();
396+ }}
397+
398+ function zoomIn() {{
399+ scale = Math.min(scale * 1.5, 200);
400+ centerImage();
401+ }}
402+
403+ function zoomOut() {{
404+ scale = Math.max(scale / 1.5, 0.1);
405+ centerImage();
406+ }}
407+
408+ function resetView() {{
409+ scale = 1;
410+ centerImage();
411+ }}
412+
413+ // Mouse wheel zoom
414+ container.addEventListener('wheel', (e) => {{
415+ e.preventDefault();
416+ const rect = container.getBoundingClientRect();
417+ const mouseX = e.clientX - rect.left;
418+ const mouseY = e.clientY - rect.top;
419+
420+ // Position relative to image before zoom
421+ const imgX = (mouseX - panX) / scale;
422+ const imgY = (mouseY - panY) / scale;
423+
424+ // Apply zoom
425+ const zoomFactor = e.deltaY < 0 ? 1.2 : 0.8;
426+ scale = Math.max(0.1, Math.min(200, scale * zoomFactor));
427+
428+ // Adjust pan to keep mouse position stable
429+ panX = mouseX - imgX * scale;
430+ panY = mouseY - imgY * scale;
431+
432+ updateTransform();
433+ }});
434+
435+ // Pan with mouse drag
436+ container.addEventListener('mousedown', (e) => {{
437+ isDragging = true;
438+ container.classList.add('dragging');
439+ dragStartX = e.clientX;
440+ dragStartY = e.clientY;
441+ dragStartPanX = panX;
442+ dragStartPanY = panY;
443+ }});
444+
445+ window.addEventListener('mousemove', (e) => {{
446+ if (isDragging) {{
447+ panX = dragStartPanX + (e.clientX - dragStartX);
448+ panY = dragStartPanY + (e.clientY - dragStartY);
449+ updateTransform();
450+ }}
451+
452+ // Tooltip
453+ const rect = image.getBoundingClientRect();
454+ const x = Math.floor((e.clientX - rect.left) / (rect.width / SIZE));
455+ const y = Math.floor((e.clientY - rect.top) / (rect.height / SIZE));
456+ const idx = y * SIZE + x;
457+
458+ if (x >= 0 && x < SIZE && y >= 0 && y < SIZE && idx < WORDS.length) {{
459+ const word = WORDS[idx];
460+ const value = VALUES[idx];
461+ tooltip.querySelector('.word').textContent = word;
462+ tooltip.querySelector('.details').textContent = `Position: ${{idx.toLocaleString()}} · Value: ${{value.toFixed(4)}}`;
463+ tooltip.style.display = 'block';
464+ tooltip.style.left = (e.clientX + 15) + 'px';
465+ tooltip.style.top = (e.clientY + 15) + 'px';
466+ }} else {{
467+ tooltip.style.display = 'none';
468+ }}
469+ }});
470+
471+ window.addEventListener('mouseup', () => {{
472+ isDragging = false;
473+ container.classList.remove('dragging');
474+ }});
475+
476+ // Initial centering
477+ window.addEventListener('load', centerImage);
478+ window.addEventListener('resize', centerImage);
479+ </script>
480+ </body>
481+ </html>'''
482+
483+ Path (output_path ).write_text (html )
484+ print (f"Saved: { output_path } (interactive HTML viewer)" )
200485
201486
202487# =============================================================================
@@ -237,6 +522,14 @@ def main():
237522 choices = list (COLOR_MAPPERS .keys ()),
238523 help = 'Color scheme (default: red-blue)' )
239524 parser .add_argument ('-o' , '--output' , help = 'Output filename (default: <input>-<metric>.png)' )
525+ parser .add_argument ('--html' , action = 'store_true' ,
526+ help = 'Generate interactive HTML viewer with zoom and word tooltips' )
527+ parser .add_argument ('-i' , '--ignore-case' , action = 'store_true' ,
528+ help = 'Treat words case-insensitively (lowercase all)' )
529+ parser .add_argument ('--ignore-punctuation' , action = 'store_true' ,
530+ help = 'Filter out punctuation tokens' )
531+ parser .add_argument ('--ignore-numbers' , action = 'store_true' ,
532+ help = 'Filter out numeric tokens' )
240533
241534 args = parser .parse_args ()
242535
@@ -256,7 +549,23 @@ def main():
256549 # Read and tokenize
257550 text = input_path .read_text (encoding = 'utf-8' , errors = 'replace' )
258551 words = nltk .word_tokenize (text )
259- print (f"Loaded { len (words )} words from { args .file } " )
552+ original_count = len (words )
553+
554+ # Apply filters
555+ if args .ignore_case :
556+ words = [w .lower () for w in words ]
557+
558+ if args .ignore_punctuation :
559+ words = [w for w in words if w .isalnum ()]
560+
561+ if args .ignore_numbers :
562+ words = [w for w in words if not w .isnumeric ()]
563+
564+ print (f"Loaded { original_count } tokens from { args .file } " , end = '' )
565+ if len (words ) != original_count :
566+ print (f" ({ len (words )} after filtering)" )
567+ else :
568+ print ()
260569
261570 # Generate output filename
262571 if args .output :
@@ -268,9 +577,20 @@ def main():
268577 metric_fn = METRICS [args .metric ]
269578 color_fn = COLOR_MAPPERS [args .color ]
270579
580+ # Determine which words to pass (for bigram metrics, we need the bigram pairs)
581+ if args .metric in ('bigram-prob' , 'bigram-diversity' ):
582+ display_words = [f"{ w1 } → { w2 } " for w1 , w2 in nltk .bigrams (words )]
583+ else :
584+ display_words = words
585+
271586 # Render
272587 values = metric_fn (words )
273- render (values , color_fn , output_path )
588+ img , size , word_data = render (values , color_fn , output_path , words = display_words )
589+
590+ # Generate HTML viewer if requested
591+ if args .html and img :
592+ html_path = Path (output_path ).stem + '.html'
593+ render_html (img , size , word_data , html_path , args .metric , args .color , input_path .name )
274594
275595
276596if __name__ == '__main__' :
0 commit comments