Merge pull request #8492 from jepler/maketranslation-levels

tannewt · web-flow · commit f9e63d9ac652 · 2023-10-23T14:41:12.000-07:00
Add CIRCUITPY_MESSAGE_COMPRESSION_LEVEL
diff --git a/mpy-cross/Makefile b/mpy-cross/Makefile
@@ -63,6 +63,8 @@ endif
 OBJ = $(PY_CORE_O)
 OBJ += $(addprefix $(BUILD)/, $(SRC_C:.c=.o))
 
+# CIRCUITPY
 $(BUILD)/supervisor/shared/translate/translate.o: $(HEADER_BUILD)/qstrdefs.generated.h $(HEADER_BUILD)/compressed_translations.generated.h
+CIRCUITPY_MESSAGE_COMPRESSION_LEVEL = 1
 
 include $(TOP)/py/mkrules.mk
diff --git a/ports/espressif/mpconfigport.mk b/ports/espressif/mpconfigport.mk
@@ -148,3 +148,6 @@ endif
 # only if something else is turned off, such as HID.
 USB_NUM_ENDPOINT_PAIRS = 7
 USB_NUM_IN_ENDPOINTS = 5
+
+# Usually lots of flash space available
+CIRCUITPY_MESSAGE_COMPRESSION_LEVEL ?= 1
diff --git a/ports/raspberrypi/mpconfigport.mk b/ports/raspberrypi/mpconfigport.mk
@@ -52,3 +52,6 @@ USB_NUM_ENDPOINT_PAIRS = 8
 
 INTERNAL_FLASH_FILESYSTEM = 1
 CIRCUITPY_SETTABLE_PROCESSOR_FREQUENCY = 1
+
+# Usually lots of flash space available
+CIRCUITPY_MESSAGE_COMPRESSION_LEVEL ?= 1
diff --git a/ports/unix/mpconfigport.mk b/ports/unix/mpconfigport.mk
@@ -50,5 +50,6 @@ MICROPY_VFS_LFS2 = 0
 
 # CIRCUITPY
 CIRCUITPY_ULAB = 1
+CIRCUITPY_MESSAGE_COMPRESSION_LEVEL = 1
 MICROPY_EMIT_NATIVE = 0
 CFLAGS += -DCIRCUITPY=1
diff --git a/ports/unix/variants/coverage/mpconfigvariant.mk b/ports/unix/variants/coverage/mpconfigvariant.mk
@@ -92,3 +92,4 @@ CFLAGS += \
 
 SRC_C += coverage.c
 SRC_CXX += coveragecpp.cpp
+CIRCUITPY_MESSAGE_COMPRESSION_LEVEL = 1
diff --git a/py/circuitpy_mpconfig.mk b/py/circuitpy_mpconfig.mk
@@ -52,6 +52,10 @@ CFLAGS += -DCIRCUITPY=$(CIRCUITPY)
 CIRCUITPY_FULL_BUILD ?= 1
 CFLAGS += -DCIRCUITPY_FULL_BUILD=$(CIRCUITPY_FULL_BUILD)
 
+# By default, aggressively reduce the size of in-flash messages, at the cost of
+# increased build time
+CIRCUITPY_MESSAGE_COMPRESSION_LEVEL ?= 9
+
 # Reduce the size of in-flash properties. Requires support in the .ld linker
 # file, so not enabled by default.
 CIRCUITPY_OPTIMIZE_PROPERTY_FLASH_SIZE ?= 0
diff --git a/py/maketranslationdata.py b/py/maketranslationdata.py
@@ -174,7 +174,7 @@ class EncodingTable:
     qstrs_inv: object
 
 
-def compute_huffman_coding(qstrs, translation_name, translations, f):
+def compute_huffman_coding(qstrs, translation_name, translations, f, compression_level):
     # possible future improvement: some languages are better when consider len(k) > 2. try both?
     qstrs = dict((k, v) for k, v in qstrs.items() if len(k) > 3)
     qstr_strs = list(qstrs.keys())
@@ -209,6 +209,8 @@ def remove_offset(c):
             if 0x80 <= ord_c < 0xFF:
                 end_unused = min(ord_c, end_unused)
     max_words = end_unused - 0x80
+    if compression_level < 5:
+        max_words = 0
 
     bits_per_codepoint = 16 if max_ord > 255 else 8
     values_type = "uint16_t" if max_ord > 255 else "uint8_t"
@@ -298,8 +300,12 @@ def est_net_savings(s, occ):
         word = scores[0][0]
         words.append(word)
 
+    splitters = words[:]
+    if compression_level > 3:
+        splitters.extend(qstr_strs)
+
     words.sort(key=len)
-    extractor = TextSplitter(words + qstr_strs)
+    extractor = TextSplitter(splitters)
     counter = collections.Counter()
     used_qstr = 0
     for t in texts:
@@ -356,8 +362,8 @@ def est_net_savings(s, occ):
         len(translation.encode("utf-8")) for (original, translation) in translations
     )
 
-    maxlen = len(words[-1])
-    minlen = len(words[0])
+    maxlen = len(words[-1]) if words else 0
+    minlen = len(words[0]) if words else 0
     wlencount = [len([None for w in words if len(w) == l]) for l in range(minlen, maxlen + 1)]
 
     translation_qstr_bits = used_qstr.bit_length()
@@ -596,6 +602,12 @@ def output_translation_data(encoding_table, i18ns, out):
     parser.add_argument(
         "--translation", default=None, type=str, help="translations for i18n() items"
     )
+    parser.add_argument(
+        "--compression_level",
+        type=int,
+        default=9,
+        help="degree of compression (>5: construct dictionary; >3: use qstrs)",
+    )
     parser.add_argument(
         "--compression_filename",
         type=argparse.FileType("w", encoding="UTF-8"),
@@ -619,6 +631,6 @@ def output_translation_data(encoding_table, i18ns, out):
     i18ns = sorted(i18ns)
     translations = translate(args.translation, i18ns)
     encoding_table = compute_huffman_coding(
-        qstrs, args.translation, translations, args.compression_filename
+        qstrs, args.translation, translations, args.compression_filename, args.compression_level
     )
     output_translation_data(encoding_table, translations, args.translation_filename)
diff --git a/py/py.mk b/py/py.mk
@@ -269,7 +269,7 @@ $(PY_BUILD)/translations-$(TRANSLATION).c: $(HEADER_BUILD)/compressed_translatio
 $(HEADER_BUILD)/compressed_translations.generated.h: $(PY_SRC)/maketranslationdata.py $(HEADER_BUILD)/$(TRANSLATION).mo $(HEADER_BUILD)/qstrdefs.generated.h
 	$(STEPECHO) "GEN $@"
 	$(Q)mkdir -p $(PY_BUILD)
-	$(Q)$(PYTHON) $(PY_SRC)/maketranslationdata.py --compression_filename $(HEADER_BUILD)/compressed_translations.generated.h --translation $(HEADER_BUILD)/$(TRANSLATION).mo --translation_filename $(PY_BUILD)/translations-$(TRANSLATION).c --qstrdefs_filename  $(HEADER_BUILD)/qstrdefs.generated.h $(HEADER_BUILD)/qstrdefs.preprocessed.h
+	$(Q)$(PYTHON) $(PY_SRC)/maketranslationdata.py --compression_filename $(HEADER_BUILD)/compressed_translations.generated.h --translation $(HEADER_BUILD)/$(TRANSLATION).mo --translation_filename $(PY_BUILD)/translations-$(TRANSLATION).c --qstrdefs_filename  $(HEADER_BUILD)/qstrdefs.generated.h --compression_level $(CIRCUITPY_MESSAGE_COMPRESSION_LEVEL) $(HEADER_BUILD)/qstrdefs.preprocessed.h
 
 PY_CORE_O += $(PY_BUILD)/translations-$(TRANSLATION).o
 

Original file line number	Diff line number	Diff line change
`@@ -92,3 +92,4 @@ CFLAGS += \`
`92`	`92`
`93`	`93`	`SRC_C += coverage.c`
`94`	`94`	`SRC_CXX += coveragecpp.cpp`
	`95`	`+CIRCUITPY_MESSAGE_COMPRESSION_LEVEL = 1`