Skip to content

Commit 0bc5d67

Browse files
committed
Merge pull request #238 in LCL/wolframclientforpython from feature/opt to master
* commit '9f9d5e8e3c4b8f5888ac168f0c97e33942f8ac38': adding binary_deserialize load PackedArray must do the same _valid_type_or_fail now returns type code refactor using force_bytes for py2 compatibility using pack ability to serialiaze multiple values at once code refactor NumericArray is supposed to be initialized by a list of python numbers adding PackedArray benchmarks avoiding attribute lookup for every single element in the array speed optimizations in wxf serializer and zlib encoder
2 parents a5381c8 + 9f9d5e8 commit 0bc5d67

4 files changed

Lines changed: 89 additions & 73 deletions

File tree

wolframclient/cli/commands/benchmark.py

Lines changed: 47 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from wolframclient.cli.utils import SimpleCommand
88
from wolframclient.deserializers import binary_deserialize
99
from wolframclient.language import wl
10+
from wolframclient.language.array import NumericArray, PackedArray
1011
from wolframclient.serializers import export
1112
from wolframclient.utils.debug import timed
1213
from wolframclient.utils.encoding import force_text
@@ -21,23 +22,31 @@ def repeat(el, n=1):
2122
class Command(SimpleCommand):
2223

2324
col_size = 8
25+
title_size = 14
2426
repetitions = 10
2527
complexity = [1, 2, 5, 10, 100, 1000]
2628

2729
def add_arguments(self, parser):
2830
parser.add_argument("--profile", dest="profile", default=False, action="store_true")
2931

30-
def complexity_handler(self, complexity):
32+
def expression_handler(self, complexity):
3133
return {
32-
"symbols": repeat(wl.Symbol, complexity),
33-
"strings": repeat("string", complexity),
34-
"bytes": repeat(b"bytes", complexity),
35-
"integers": repeat(1, complexity),
36-
"decimals": repeat(decimal.Decimal("1.23"), complexity),
37-
"floats": repeat(1.23, complexity),
38-
"dict": repeat({1: 2, 3: 4, 5: 6}, complexity),
39-
"list": repeat([1, 2, 3], complexity),
40-
"functions": repeat(wl.Function(1, 2, 3), complexity),
34+
"expr": {
35+
"symbols": repeat(wl.Symbol, complexity),
36+
"strings": repeat("string", complexity),
37+
"bytes": repeat(b"bytes", complexity),
38+
"integers": repeat(1, complexity),
39+
"decimals": repeat(decimal.Decimal("1.23"), complexity),
40+
"floats": repeat(1.23, complexity),
41+
"dict": repeat({1: 2, 3: 4, 5: 6}, complexity),
42+
"list": repeat([1, 2, 3], complexity),
43+
"functions": repeat(wl.Function(1, 2, 3), complexity),
44+
},
45+
"array": {
46+
"%s_%s" % (func.__name__, t): func(tuple(range(complexity * 100)), t)
47+
for func in (PackedArray, NumericArray)
48+
for t in ("Integer64", "Real64")
49+
},
4150
}
4251

4352
def formatted_time(self, function, *args, **opts):
@@ -47,10 +56,15 @@ def formatted_time(self, function, *args, **opts):
4756
return "%.5f" % (time / self.repetitions)
4857

4958
def table_line(self, *iterable):
50-
self.print(*(force_text(c).ljust(self.col_size) for c in iterable))
59+
self.print(
60+
*(
61+
force_text(c).ljust(i and self.col_size or self.title_size)
62+
for i, c in enumerate(iterable)
63+
)
64+
)
5165

5266
def table_divider(self, length):
53-
self.print(*("-" * self.col_size for i in range(length)))
67+
self.print(*("-" * (i and self.col_size or self.title_size) for i in range(length)))
5468

5569
def stream_generators(self, path):
5670
yield "Memory", lambda complexity, export_format, path=path: None
@@ -62,13 +76,13 @@ def report(self):
6276

6377
path = tempfile.gettempdir()
6478

65-
benchmarks = [(c, self.complexity_handler(c)) for c in self.complexity]
79+
benchmarks = [(c, self.expression_handler(c)) for c in self.complexity]
6680

6781
self.table_line("dumping results in %s" % path)
6882
self.table_line()
6983

7084
# running export to do all lazy loadings
71-
export(1)
85+
binary_deserialize(export(1, target_format = 'wxf'))
7286

7387
self.table_line("* Binary deserialize")
7488
self.table_line()
@@ -102,24 +116,26 @@ def report(self):
102116
)
103117
self.table_divider(len(self.complexity) + 1)
104118

105-
for label, export_format, opts in (
106-
("wl", "wl", dict()),
107-
("wxf", "wxf", dict()),
108-
("wxf zip", "wxf", dict(compress=True)),
109-
):
110-
self.table_line(
111-
label,
112-
*(
113-
self.formatted_time(
114-
export,
115-
expr,
116-
stream=stream_generator(complexity, export_format),
117-
target_format=export_format,
118-
**opts
119+
for key in ("expr", "array"):
120+
for label, export_format, opts in (
121+
("wl", "wl", dict()),
122+
("wxf", "wxf", dict()),
123+
("wxf zip", "wxf", dict(compress=True)),
124+
):
125+
if key == "expr" or (key == "array" and not label == "wl"):
126+
self.table_line(
127+
key == "expr" and label or "%s %s" % (label, key),
128+
*(
129+
self.formatted_time(
130+
export,
131+
expr[key],
132+
stream=stream_generator(complexity, export_format),
133+
target_format=export_format,
134+
**opts
135+
)
136+
for complexity, expr in benchmarks
137+
)
119138
)
120-
for complexity, expr in benchmarks
121-
)
122-
)
123139

124140
self.table_line()
125141

wolframclient/language/array.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from __future__ import absolute_import, print_function, unicode_literals
22

3+
import struct
4+
35
from wolframclient.exception import WolframLanguageException
46
from wolframclient.serializers.wxfencoder import constants
5-
from wolframclient.utils.encoding import concatenate_bytes
7+
from wolframclient.utils.encoding import force_bytes
68

79
try:
810
from collections.abc import Sequence
@@ -15,19 +17,21 @@ def __init__(self, array, type, shape=None):
1517

1618
self.array = array
1719
self.shape = shape or (len(array),)
18-
self.type = type
19-
self._valid_type_or_fail(type)
20-
self.struct = constants.STRUCT_MAPPING[type]
20+
self.type = self._valid_type_or_fail(type)
21+
self.struct = constants.STRUCT_MAPPING[self.type]
2122

2223
def _valid_type_or_fail(self, type):
2324
if type not in constants.STRUCT_MAPPING:
2425
raise WolframLanguageException(
2526
"Type %s is not one of the supported array types: %s."
2627
% (type, ", ".join(constants.STRUCT_MAPPING.keys()))
2728
)
29+
return type
2830

2931
def tobytes(self):
30-
return concatenate_bytes(self.struct.pack(el) for el in self.array)
32+
return struct.pack(
33+
b"<%i%s" % (len(self), force_bytes(self.struct.format[1])), *self.array
34+
)
3135

3236
def __getitem__(self, k):
3337
return self.array[k]
@@ -43,3 +47,4 @@ def _valid_type_or_fail(self, type):
4347
"Type %s is not one of the supported packed array types: %s."
4448
% (type, ", ".join(sorted(constants.VALID_PACKED_ARRAY_LABEL_TYPES)))
4549
)
50+
return type

wolframclient/serializers/wl.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import absolute_import, print_function, unicode_literals
22

3-
from itertools import chain
3+
from itertools import chain, starmap
44

55
from wolframclient.serializers.base import FormatSerializer
66
from wolframclient.serializers.utils import py_encode_decimal, py_encode_text
@@ -9,16 +9,14 @@
99
from wolframclient.utils.encoding import force_bytes, force_text
1010

1111

12-
def yield_with_separators(iterable, separator=b", ", first=None, last=None):
13-
if first:
14-
yield first
12+
def yield_with_separators(iterable, first, last, separator=b", "):
13+
yield first
1514
for i, arg in enumerate(iterable):
1615
if i:
1716
yield separator
1817
for sub in arg:
1918
yield sub
20-
if last:
21-
yield last
19+
yield last
2220

2321

2422
class WLSerializer(FormatSerializer):
@@ -60,16 +58,14 @@ def serialize_int(self, number):
6058
yield b"%i" % number
6159

6260
def serialize_rule(self, lhs, rhs):
63-
return yield_with_separators((lhs, rhs), separator=b" -> ")
61+
return chain(lhs, (b" -> ",), rhs)
6462

6563
def serialize_rule_delayed(self, lhs, rhs):
66-
return yield_with_separators((lhs, rhs), separator=b" :> ")
64+
return chain(lhs, (b" :> ",), rhs)
6765

6866
def serialize_mapping(self, mapping, **opts):
6967
return yield_with_separators(
70-
(self.serialize_rule(key, value) for key, value in mapping),
71-
first=b"<|",
72-
last=b"|>",
68+
starmap(self.serialize_rule, mapping), first=b"<|", last=b"|>"
7369
)
7470

7571
def serialize_association(self, mapping, **opts):

wolframclient/serializers/wxf.py

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import absolute_import, print_function, unicode_literals
22

3-
from itertools import chain
3+
from itertools import chain, starmap
44

55
from wolframclient.serializers.base import FormatSerializer
66
from wolframclient.serializers.utils import py_encode_decimal, safe_len
@@ -20,7 +20,12 @@
2020
)
2121
from wolframclient.utils import six
2222
from wolframclient.utils.api import zlib
23-
from wolframclient.utils.encoding import force_bytes, force_text
23+
from wolframclient.utils.encoding import concatenate_bytes, force_bytes, force_text
24+
from wolframclient.utils.functional import partition
25+
26+
27+
def serialize_rule(key, value, sep=(WXF_CONSTANTS.Rule,)):
28+
return chain(sep, key, value)
2429

2530

2631
def get_length(iterable, length=None):
@@ -37,6 +42,16 @@ def get_length(iterable, length=None):
3742
return iterable, len(iterable)
3843

3944

45+
def compress(data):
46+
47+
compressor = zlib.compressobj()
48+
49+
for token in map(compressor.compress, map(concatenate_bytes, partition(data, 100))):
50+
yield token
51+
52+
yield compressor.flush()
53+
54+
4055
class WXFSerializer(FormatSerializer):
4156
""" Serialize python objects to WXF. """
4257

@@ -46,25 +61,14 @@ def __init__(self, normalizer=None, compress=False, **opts):
4661

4762
def generate_bytes(self, data):
4863

49-
yield WXF_VERSION
50-
5164
if self.compress:
52-
yield WXF_HEADER_COMPRESS
5365

54-
yield WXF_HEADER_SEPARATOR
66+
return chain(
67+
(WXF_VERSION, WXF_HEADER_COMPRESS, WXF_HEADER_SEPARATOR),
68+
compress(self.encode(data)),
69+
)
5570

56-
if self.compress:
57-
compressor = zlib.compressobj()
58-
if six.PY2:
59-
for payload in self.encode(data):
60-
yield compressor.compress(six.binary_type(payload))
61-
else:
62-
for payload in self.encode(data):
63-
yield compressor.compress(payload)
64-
yield compressor.flush()
65-
else:
66-
for payload in self.encode(data):
67-
yield payload
71+
return chain((WXF_VERSION, WXF_HEADER_SEPARATOR), self.encode(data))
6872

6973
def serialize_symbol(self, name):
7074
yield WXF_CONSTANTS.Symbol
@@ -116,12 +120,9 @@ def serialize_string(self, string):
116120

117121
def serialize_bytes(self, bytes, as_byte_array=not six.PY2):
118122
if as_byte_array:
119-
yield WXF_CONSTANTS.BinaryString
120-
yield varint_bytes(len(bytes))
121-
yield bytes
123+
return (WXF_CONSTANTS.BinaryString, varint_bytes(len(bytes)), bytes)
122124
else:
123-
for token in self.serialize_string(force_text(bytes, encoding="iso8859-1")):
124-
yield token
125+
return self.serialize_string(force_text(bytes, encoding="iso8859-1"))
125126

126127
def serialize_mapping(self, keyvalue, **opts):
127128
# the normalizer is always sending an generator key, value
@@ -130,9 +131,7 @@ def serialize_mapping(self, keyvalue, **opts):
130131

131132
return chain(
132133
(WXF_CONSTANTS.Association, varint_bytes(length)),
133-
chain.from_iterable(
134-
chain((WXF_CONSTANTS.Rule,), key, value) for key, value in iterable
135-
),
134+
chain.from_iterable(starmap(serialize_rule, iterable)),
136135
)
137136

138137
def serialize_numeric_array(self, data, dimensions, wl_type):

0 commit comments

Comments
 (0)