Merge pull request #220 in LCL/wolframclientforpython from feature/binary_deserialize_concatenation to master

crazyeng · crazyeng · commit 876ad1dbccbb · 2019-10-30T11:20:35.000-05:00
* commit '634e3b4f9a0d544b9c15a900ace8b505c460b780':
  minor tweaks
  using 2 funcs
  adding binary_deserialize benchmark
  using concatenate_bytes
diff --git a/wolframclient/cli/commands/benchmark.py b/wolframclient/cli/commands/benchmark.py
@@ -6,6 +6,7 @@
 import tempfile
 
 from wolframclient.cli.utils import SimpleCommand
+from wolframclient.deserializers import binary_deserialize
 from wolframclient.language import wl
 from wolframclient.serializers import export
 from wolframclient.utils.debug import timed
@@ -39,13 +40,9 @@ def complexity_handler(self, complexity):
             "functions": repeat(wl.Function(1, 2, 3), complexity),
         }
 
-    @timed
-    def export(self, *args, **opts):
-        return export(*args, **opts)
+    def formatted_time(self, function, *args, **opts):
 
-    def formatted_time(self, *args, **opts):
-
-        time = sum(first(self.export(*args, **opts)) for i in range(self.repetitions))
+        time = sum(first(timed(function)(*args, **opts)) for i in range(self.repetitions))
 
         return "%.5f" % (time / self.repetitions)
 
@@ -55,27 +52,50 @@ def table_line(self, *iterable):
     def table_divider(self, length):
         self.print(*("-" * self.col_size for i in range(length)))
 
+    def stream_generators(self, path):
+        yield "Memory", lambda complexity, export_format, path=path: None
+        yield "File", lambda complexity, export_format, path=path: os.path.join(
+            path, "benchmark-test-%s.%s" % (force_text(complexity).zfill(7), export_format)
+        )
+
     def report(self):
 
         path = tempfile.gettempdir()
 
         benchmarks = [(c, self.complexity_handler(c)) for c in self.complexity]
 
-        self.print("dumping results in", path)
+        self.table_line("dumping results in %s" % path)
+        self.table_line()
 
         # running export to do all lazy loadings
         export(1)
 
-        for title, stream_generator in (
-            ("Memory", lambda complexity: None),
-            (
-                "File",
-                lambda complexity: os.path.join(
-                    path,
-                    "benchmark-test-%s.%s" % (force_text(complexity).zfill(7), export_format),
-                ),
-            ),
-        ):
+        self.table_line("* Binary deserialize")
+        self.table_line()
+
+        self.table_line(
+            "Memory", *(force_text(c).ljust(self.col_size) for c in self.complexity)
+        )
+        self.table_divider(len(self.complexity) + 1)
+
+        for label, opts in (("wxf", dict()), ("wxf zip", dict(compress=True))):
+
+            self.table_line(
+                label,
+                *(
+                    self.formatted_time(
+                        binary_deserialize, export(expr, target_format="wxf", **opts)
+                    )
+                    for complexity, expr in benchmarks
+                )
+            )
+
+        self.table_line()
+
+        self.table_line("* Export")
+        self.table_line()
+
+        for title, stream_generator in self.stream_generators(path):
 
             self.table_line(
                 title, *(force_text(c).ljust(self.col_size) for c in self.complexity)
@@ -91,8 +111,9 @@ def report(self):
                     label,
                     *(
                         self.formatted_time(
+                            export,
                             expr,
-                            stream=stream_generator(complexity),
+                            stream=stream_generator(complexity, export_format),
                             target_format=export_format,
                             **opts
                         )
@@ -102,8 +123,6 @@ def report(self):
 
             self.table_line()
 
-        self.table_line()
-
     def handle(self, profile, **opts):
         if profile:
             cProfile.runctx("report()", {"report": self.report}, {})
diff --git a/wolframclient/serializers/wxfencoder/streaming.py b/wolframclient/serializers/wxfencoder/streaming.py
@@ -1,8 +1,8 @@
 from __future__ import absolute_import, print_function, unicode_literals
 
-from wolframclient.utils import six
 from wolframclient.utils.api import zlib
-from wolframclient.utils.encoding import force_bytes
+from wolframclient.utils.decorators import decorate
+from wolframclient.utils.encoding import concatenate_bytes, force_bytes
 
 
 class ZipCompressedWriter(object):
@@ -32,6 +32,7 @@ class ExactSizeReader(object):
     def __init__(self, reader):
         self._reader = reader
 
+    
     def read(self, size=-1):
         """Read from an underlying readable object.
 
@@ -45,17 +46,19 @@ def read(self, size=-1):
         # Also a fast path when the requested amount of bytes is returned in one go.
         if size <= 0 or len(data) == size:
             return data
+
+        return self._read_rest(data, size)
+
+    @decorate(concatenate_bytes)
+    def _read_rest(self, data, size=-1):
         # need an intermediary buffer
         out_len = len(data)
-        data = six.BytesIO(data)
         while out_len < size:
             chunk = self._reader.read(size - out_len)
-            if chunk == b"":
+            if not chunk:
                 raise EOFError("Not enough data to read.")
-            data.write(chunk)
-            out_len = out_len + len(chunk)
-        return data.getvalue()
-
+            yield chunk
+            out_len += len(chunk)
 
 class ZipCompressedReader(object):
     """A buffer implementation reading zip compressed data from a source buffer and returning uncompressed data.
@@ -70,6 +73,7 @@ def __init__(self, reader):
         self._compressor = zlib.decompressobj()
         self._reader = reader
 
+    @decorate(concatenate_bytes)
     def read(self, size=-1):
         """Read from a compressed stream of bytes and return the inflated byte sequence.
 
@@ -81,30 +85,29 @@ def read(self, size=-1):
             size = -1
         else:
             chunk_size = ZipCompressedReader.CHUNK_SIZE
-        out_data = six.BytesIO()
+
         out_len = 0
         while True:
             # first step find try to find some data to uncompress.
             # sometimes some bytes are left over. We have to send them first to zlib.
-            if self._compressor.unconsumed_tail != b"":
+            if self._compressor.unconsumed_tail:
                 data_in = self._compressor.unconsumed_tail
             else:
                 # read more data from input reader. Read in chunk since we can't guess how
                 # big the inflated result is.
                 data_in = self._reader.read(chunk_size)
                 # no more data is available.
-                if data_in == b"":
+                if not data_in:
                     break
             # second step, decompress the new chunk
             if size > 0:
                 chunk = self._compressor.decompress(data_in, size - out_len)
             else:
                 chunk = self._compressor.decompress(data_in)
             # increment output len.
-            out_len = out_len + len(chunk)
+            out_len += len(chunk)
             # write to buffer
-            out_data.write(chunk)
+            yield chunk
             # check requested size against output length.
             if size > 0 and out_len == size:
                 break
-        return out_data.getvalue()