Skip to content

Commit 567d246

Browse files
committed
Added method to hash a JSON value
1 parent 102c364 commit 567d246

1 file changed

Lines changed: 132 additions & 0 deletions

File tree

src/bd2k/util/hashes.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
def hash_json( hash_obj, value ):
2+
"""
3+
Compute the hash of a parsed JSON value using the given hash object. This function does not
4+
hash the JSON value, it hashes the object tree that is the result of parsing a string in JSON
5+
format. Hashables (JSON objects) are hashed entry by entry in order of the lexicographical
6+
ordering on the keys. Iterables are hashed in their inherent order.
7+
8+
If value or any of its children is an iterable with non-deterministic ordering of its
9+
elements, e.g. a set, this method will yield non-deterministic results.
10+
11+
:param hash_obj: one of the Hash objects in hashlib, or any other object that has an update(s)
12+
method accepting a single string.
13+
14+
:type value: int|str|float|Iterable[type(obj)]|Hashable[str,type(obj)]
15+
:param value: The value to be hashed
16+
17+
>>> import hashlib
18+
>>> def actual(x): h = hashlib.md5(); hash_json(h,x); return h.hexdigest()
19+
>>> def expect(s): h = hashlib.md5(); h.update(s); return h.hexdigest()
20+
21+
>>> actual(0) == expect('0')
22+
True
23+
>>> actual(0.0) == expect('0.0')
24+
True
25+
>>> actual(0.1) == expect('0.1')
26+
True
27+
>>> actual(True) == expect('true')
28+
True
29+
>>> actual(False) == expect('false')
30+
True
31+
>>> actual("") == expect('""')
32+
True
33+
>>> actual([]) == expect('[]')
34+
True
35+
>>> actual([0]) == expect('[0]')
36+
True
37+
>>> actual([0,1]) == expect('[0,1]')
38+
True
39+
>>> actual({}) == expect('{}')
40+
True
41+
>>> actual({'':0}) == expect('{:0}')
42+
True
43+
>>> actual({'0':0}) == expect('{0:0}')
44+
True
45+
>>> actual({'0':0,'1':1}) == expect('{0:0,1:1}')
46+
True
47+
>>> actual({'':[]}) == expect('{:[]}')
48+
True
49+
>>> actual([{}]) == expect('[{}]')
50+
True
51+
>>> actual({0:0})
52+
Traceback (most recent call last):
53+
...
54+
ValueError: Dictionary keys must be strings, not <type 'int'>
55+
>>> actual(object())
56+
Traceback (most recent call last):
57+
...
58+
ValueError: Type <type 'object'> is not supported
59+
"""
60+
try:
61+
items = value.iteritems( )
62+
except AttributeError:
63+
# Must check for string before testing iterability since strings are iterable
64+
if isinstance( value, basestring ):
65+
_hash_string( hash_obj, value )
66+
else:
67+
try:
68+
iterator = iter( value )
69+
except TypeError:
70+
# We must check for bool first since it is subclass of int (wrongly, IMHO)
71+
if isinstance( value, bool ):
72+
_hash_bool( hash_obj, value )
73+
elif isinstance( value, (int, float) ):
74+
_hash_number( hash_obj, value )
75+
else:
76+
raise ValueError( 'Type %s is not supported' % type( value ) )
77+
else:
78+
_hash_iterable( hash_obj, iterator )
79+
else:
80+
_hash_hashable( hash_obj, items )
81+
82+
83+
def _hash_number( hash_obj, n ):
84+
hash_obj.update( str( n ) )
85+
86+
87+
def _hash_bool( hash_obj, b ):
88+
hash_obj.update( 'true' if b else 'false' )
89+
90+
91+
def _hash_string( hash_obj, s ):
92+
hash_obj.update( '"' )
93+
hash_obj.update( s )
94+
hash_obj.update( '"' )
95+
96+
97+
def _hash_iterable( hash_obj, items ):
98+
hash_obj.update( '[' )
99+
try:
100+
item = next( items )
101+
hash_json( hash_obj, item )
102+
while True:
103+
item = next( items )
104+
hash_obj.update( ',' )
105+
hash_json( hash_obj, item )
106+
except StopIteration:
107+
pass
108+
hash_obj.update( ']' )
109+
110+
111+
def _hash_hashable( hash_obj, items ):
112+
items = iter( sorted( items ) )
113+
hash_obj.update( '{' )
114+
try:
115+
item = next( items )
116+
_hash_hashable_item( hash_obj, item )
117+
while True:
118+
item = next( items )
119+
hash_obj.update( ',' )
120+
_hash_hashable_item( hash_obj, item )
121+
except StopIteration:
122+
pass
123+
hash_obj.update( '}' )
124+
125+
126+
def _hash_hashable_item( hash_obj, (k, v) ):
127+
if isinstance( k, basestring ):
128+
hash_obj.update( k )
129+
hash_obj.update( ':' )
130+
hash_json( hash_obj, v )
131+
else:
132+
raise ValueError( 'Dictionary keys must be strings, not %s' % type( k ) )

0 commit comments

Comments
 (0)