Skip to content

Commit 6a6efc3

Browse files
committed
Change implementation to be fully backwards compatible and add option to be Spec compliant
1 parent 7229ca3 commit 6a6efc3

File tree

3 files changed

+63
-205
lines changed

3 files changed

+63
-205
lines changed

codec.go

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,13 @@ type CodecOption struct {
5050
// Primarily used to handle edge cases where some Avro implementations allow string representations of null.
5151
EnableStringNull bool
5252

53-
// EnableDecimalBinaryToTextualBackwardsCompatASCIIDecoding controls backwards compatibility for decimal
54-
// binary decoding. When true, the decoder will first check if binary bytes look like an
55-
// ASCII decimal string (e.g., "40.20") and parse them as such. This handles legacy data
56-
// that was incorrectly encoded as ASCII strings instead of two's-complement bytes.
57-
// WARNING: This can cause incorrect decoding if a valid two's-complement value happens
58-
// to look like ASCII digits. Only enable this if you have legacy incorrectly-encoded data.
59-
// Default: false (use correct two's-complement decoding only)
60-
EnableDecimalBinaryToTextualBackwardsCompatASCIIDecoding bool
53+
// EnableDecimalBinarySpecCompliantEncoding controls whether decimal values use
54+
// Avro 1.10.2 spec-compliant encoding. When true:
55+
// - Binary encoding uses two's-complement representation of the unscaled integer
56+
// - JSON textual encoding uses human-readable decimal strings like "40.20"
57+
// When false (default), legacy encoding is used for backwards compatibility.
58+
// Default: false (legacy encoding for backwards compatibility)
59+
EnableDecimalBinarySpecCompliantEncoding bool
6160
}
6261

6362
// Codec supports decoding binary and text Avro data to Go native data types,
@@ -91,8 +90,8 @@ type codecBuilder struct {
9190
// DefaultCodecOption returns a CodecOption with recommended default settings.
9291
func DefaultCodecOption() *CodecOption {
9392
return &CodecOption{
94-
EnableStringNull: true,
95-
EnableDecimalBinaryToTextualBackwardsCompatASCIIDecoding: false,
93+
EnableStringNull: true,
94+
EnableDecimalBinarySpecCompliantEncoding: false,
9695
}
9796
}
9897

logical_type.go

Lines changed: 34 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -275,20 +275,27 @@ func makeDecimalBytesCodec(st map[string]*Codec, enclosingNamespace string, sche
275275
decimalSearchType := fmt.Sprintf("bytes.decimal.%d.%d", precision, scale)
276276
st[decimalSearchType] = c
277277

278-
// Check if backwards compatibility mode is enabled
279-
enableBackwardsCompat := cb != nil && cb.option != nil && cb.option.EnableDecimalBinaryToTextualBackwardsCompatASCIIDecoding
280-
281-
c.binaryFromNative = decimalBytesFromNative(bytesBinaryFromNative, toSignedBytes, precision, scale)
282-
c.textualFromNative = decimalTextualFromNative(scale)
283-
c.nativeFromBinary = nativeFromDecimalBytes(bytesNativeFromBinary, scale, enableBackwardsCompat)
284-
c.nativeFromTextual = nativeFromDecimalTextual()
278+
// Check if spec-compliant encoding is enabled
279+
specCompliant := cb != nil && cb.option != nil && cb.option.EnableDecimalBinarySpecCompliantEncoding
280+
281+
if specCompliant {
282+
// Spec-compliant encoding: two's complement binary, human-readable textual
283+
c.binaryFromNative = decimalBytesFromNative(bytesBinaryFromNative, toSignedBytes, precision, scale)
284+
c.textualFromNative = decimalTextualFromNative(scale)
285+
c.nativeFromBinary = nativeFromDecimalBytes(bytesNativeFromBinary, scale)
286+
c.nativeFromTextual = nativeFromDecimalTextual()
287+
} else {
288+
// Legacy encoding (default): for backwards compatibility
289+
c.binaryFromNative = decimalBytesFromNative(bytesBinaryFromNative, toSignedBytes, precision, scale)
290+
c.textualFromNative = decimalBytesFromNative(bytesTextualFromNative, toSignedBytes, precision, scale)
291+
c.nativeFromBinary = nativeFromDecimalBytes(bytesNativeFromBinary, scale)
292+
c.nativeFromTextual = nativeFromDecimalBytes(bytesNativeFromTextual, scale)
293+
}
285294
return c, nil
286295
}
287296

288-
// nativeFromDecimalBytes decodes bytes to *big.Rat.
289-
// If enableBackwardsCompat is true, it first checks if bytes look like an ASCII decimal
290-
// string (for backwards compatibility with incorrectly encoded legacy data).
291-
func nativeFromDecimalBytes(fn toNativeFn, scale int, enableBackwardsCompat bool) toNativeFn {
297+
// nativeFromDecimalBytes decodes bytes to *big.Rat using two's-complement representation.
298+
func nativeFromDecimalBytes(fn toNativeFn, scale int) toNativeFn {
292299
return func(bytes []byte) (interface{}, []byte, error) {
293300
d, b, err := fn(bytes)
294301
if err != nil {
@@ -299,15 +306,7 @@ func nativeFromDecimalBytes(fn toNativeFn, scale int, enableBackwardsCompat bool
299306
return nil, bytes, fmt.Errorf("cannot transform to native decimal, expected []byte, received %T", d)
300307
}
301308

302-
// Check if bytes look like ASCII decimal string (backwards compat)
303-
if enableBackwardsCompat && looksLikeASCIIDecimal(bs) {
304-
r := new(big.Rat)
305-
if _, ok := r.SetString(string(bs)); ok {
306-
return r, b, nil
307-
}
308-
}
309-
310-
// Normal two's-complement decoding
309+
// Two's-complement decoding
311310
num := big.NewInt(0)
312311
fromSignedBytes(num, bs)
313312
denom := new(big.Int).Exp(big.NewInt(10), big.NewInt(int64(scale)), nil)
@@ -364,38 +363,6 @@ func nativeFromDecimalTextual() toNativeFn {
364363
}
365364
}
366365

367-
// looksLikeASCIIDecimal checks if the bytes look like an ASCII decimal string
368-
// (for backwards compatibility with incorrectly encoded data).
369-
// Returns true if all bytes are printable ASCII, form a valid decimal pattern,
370-
// and contain a decimal point. Requiring a decimal point reduces false positives
371-
// since 0x2E ('.') is unlikely to appear in valid two's-complement encoded numbers.
372-
func looksLikeASCIIDecimal(bs []byte) bool {
373-
if len(bs) == 0 {
374-
return false
375-
}
376-
hasDigit := false
377-
hasDot := false
378-
for i, b := range bs {
379-
switch {
380-
case b >= '0' && b <= '9':
381-
hasDigit = true
382-
case b == '.':
383-
if hasDot {
384-
return false // multiple dots
385-
}
386-
hasDot = true
387-
case b == '-' || b == '+':
388-
if i != 0 {
389-
return false // sign not at start
390-
}
391-
default:
392-
return false // non-decimal character
393-
}
394-
}
395-
// Require both a digit and a decimal point to reduce false positives
396-
return hasDigit && hasDot
397-
}
398-
399366
func makeDecimalFixedCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}, cb *codecBuilder) (*Codec, error) {
400367
precision, scale, err := precisionAndScaleFromSchemaMap(schemaMap)
401368
if err != nil {
@@ -413,13 +380,22 @@ func makeDecimalFixedCodec(st map[string]*Codec, enclosingNamespace string, sche
413380
return nil, err
414381
}
415382

416-
// Check if backwards compatibility mode is enabled
417-
enableBackwardsCompat := cb != nil && cb.option != nil && cb.option.EnableDecimalBinaryToTextualBackwardsCompatASCIIDecoding
383+
// Check if spec-compliant encoding is enabled
384+
specCompliant := cb != nil && cb.option != nil && cb.option.EnableDecimalBinarySpecCompliantEncoding
418385

419-
c.binaryFromNative = decimalBytesFromNative(c.binaryFromNative, toSignedFixedBytes(size), precision, scale)
420-
c.textualFromNative = decimalTextualFromNative(scale)
421-
c.nativeFromBinary = nativeFromDecimalBytes(c.nativeFromBinary, scale, enableBackwardsCompat)
422-
c.nativeFromTextual = nativeFromDecimalTextual()
386+
if specCompliant {
387+
// Spec-compliant encoding: two's complement binary, human-readable textual
388+
c.binaryFromNative = decimalBytesFromNative(c.binaryFromNative, toSignedFixedBytes(size), precision, scale)
389+
c.textualFromNative = decimalTextualFromNative(scale)
390+
c.nativeFromBinary = nativeFromDecimalBytes(c.nativeFromBinary, scale)
391+
c.nativeFromTextual = nativeFromDecimalTextual()
392+
} else {
393+
// Legacy encoding (default): for backwards compatibility
394+
c.binaryFromNative = decimalBytesFromNative(c.binaryFromNative, toSignedFixedBytes(size), precision, scale)
395+
c.textualFromNative = decimalBytesFromNative(c.textualFromNative, toSignedFixedBytes(size), precision, scale)
396+
c.nativeFromBinary = nativeFromDecimalBytes(c.nativeFromBinary, scale)
397+
c.nativeFromTextual = nativeFromDecimalBytes(c.nativeFromTextual, scale)
398+
}
423399
return c, nil
424400
}
425401

logical_type_test.go

Lines changed: 20 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -183,9 +183,13 @@ func TestDecimalBytesLogicalTypeInRecordDecodeWithDefault(t *testing.T) {
183183
testBinaryCodecPass(t, schema, map[string]interface{}{"mydecimal": big.NewRat(617, 50)}, []byte("\x04\x04\xd2"))
184184
}
185185

186-
func TestDecimalBytesTextualRoundTrip(t *testing.T) {
186+
func TestDecimalBytesSpecCompliantTextualRoundTrip(t *testing.T) {
187+
// Test spec-compliant textual encoding with human-readable decimal strings
187188
schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}`
188-
codec, err := NewCodec(schema)
189+
190+
// Create codec with spec-compliant encoding enabled
191+
opt := &CodecOption{EnableDecimalBinarySpecCompliantEncoding: true}
192+
codec, err := NewCodecWithOptions(schema, opt)
189193
if err != nil {
190194
t.Fatal(err)
191195
}
@@ -229,9 +233,13 @@ func TestDecimalBytesTextualRoundTrip(t *testing.T) {
229233
}
230234
}
231235

232-
func TestDecimalFixedTextualRoundTrip(t *testing.T) {
236+
func TestDecimalFixedSpecCompliantTextualRoundTrip(t *testing.T) {
237+
// Test spec-compliant textual encoding with human-readable decimal strings
233238
schema := `{"type": "fixed", "size": 12, "logicalType": "decimal", "precision": 4, "scale": 2}`
234-
codec, err := NewCodec(schema)
239+
240+
// Create codec with spec-compliant encoding enabled
241+
opt := &CodecOption{EnableDecimalBinarySpecCompliantEncoding: true}
242+
codec, err := NewCodecWithOptions(schema, opt)
235243
if err != nil {
236244
t.Fatal(err)
237245
}
@@ -274,40 +282,8 @@ func TestDecimalFixedTextualRoundTrip(t *testing.T) {
274282
}
275283
}
276284

277-
func TestDecimalBytesBackwardsCompatibility(t *testing.T) {
278-
// Test that binary data incorrectly encoded as ASCII decimal strings
279-
// can still be decoded correctly when backwards compatibility is enabled
280-
schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}`
281-
282-
// Create codec with backwards compatibility enabled
283-
opt := &CodecOption{EnableDecimalBinaryToTextualBackwardsCompatASCIIDecoding: true}
284-
codec, err := NewCodecWithOptions(schema, opt)
285-
if err != nil {
286-
t.Fatal(err)
287-
}
288-
289-
// Simulate incorrectly encoded data: "40.20" as ASCII bytes
290-
// Length prefix (10 = 0x0a in varint) + ASCII bytes for "40.20"
291-
incorrectlyEncodedBytes := append([]byte{0x0a}, []byte("40.20")...)
292-
293-
native, _, err := codec.NativeFromBinary(incorrectlyEncodedBytes)
294-
if err != nil {
295-
t.Fatalf("NativeFromBinary (backwards compat): %v", err)
296-
}
297-
298-
rat, ok := native.(*big.Rat)
299-
if !ok {
300-
t.Fatalf("NativeFromBinary: expected *big.Rat, got %T", native)
301-
}
302-
303-
expected := big.NewRat(4020, 100)
304-
if rat.Cmp(expected) != 0 {
305-
t.Errorf("NativeFromBinary (backwards compat): got %v, want %v", rat, expected)
306-
}
307-
}
308-
309285
func TestDecimalBytesCorrectBinaryEncoding(t *testing.T) {
310-
// Test that correctly encoded binary data (two's complement) still works
286+
// Test that binary encoding uses two's complement (same for both legacy and spec-compliant)
311287
schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}`
312288
codec, err := NewCodec(schema)
313289
if err != nil {
@@ -335,10 +311,13 @@ func TestDecimalBytesCorrectBinaryEncoding(t *testing.T) {
335311
}
336312
}
337313

338-
func TestDecimalTextualToBinaryRoundTrip(t *testing.T) {
339-
// Test the full flow: textual -> native -> binary -> native -> textual
314+
func TestDecimalSpecCompliantTextualToBinaryRoundTrip(t *testing.T) {
315+
// Test the full flow with spec-compliant encoding: textual -> native -> binary -> native -> textual
340316
schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}`
341-
codec, err := NewCodec(schema)
317+
318+
// Create codec with spec-compliant encoding enabled
319+
opt := &CodecOption{EnableDecimalBinarySpecCompliantEncoding: true}
320+
codec, err := NewCodecWithOptions(schema, opt)
342321
if err != nil {
343322
t.Fatal(err)
344323
}
@@ -357,7 +336,7 @@ func TestDecimalTextualToBinaryRoundTrip(t *testing.T) {
357336
t.Fatalf("BinaryFromNative: %v", err)
358337
}
359338

360-
// Verify binary is two's complement, not ASCII string
339+
// Verify binary is two's complement
361340
// 4020 = 0x0FB4 in hex
362341
expectedBinary := []byte{0x04, 0x0f, 0xb4}
363342
if string(binary) != string(expectedBinary) {
@@ -381,102 +360,6 @@ func TestDecimalTextualToBinaryRoundTrip(t *testing.T) {
381360
}
382361
}
383362

384-
func TestLooksLikeASCIIDecimal(t *testing.T) {
385-
testCases := []struct {
386-
input []byte
387-
expected bool
388-
}{
389-
{[]byte("40.20"), true},
390-
{[]byte("-40.20"), true},
391-
{[]byte("+40.20"), true},
392-
{[]byte(".5"), true},
393-
{[]byte("5."), true},
394-
{[]byte("0.0"), true},
395-
{[]byte("0"), false}, // no decimal point - could be valid two's complement
396-
{[]byte("123456"), false}, // no decimal point - could be valid two's complement
397-
{[]byte(""), false}, // empty
398-
{[]byte("-"), false}, // no digit
399-
{[]byte("."), false}, // no digit
400-
{[]byte("40.20.30"), false}, // multiple dots
401-
{[]byte("40-20"), false}, // sign not at start
402-
{[]byte("40a20"), false}, // non-decimal char
403-
{[]byte("\x0f\xb4"), false}, // binary data (two's complement)
404-
{[]byte{0x00}, false}, // null byte
405-
{[]byte{0xff, 0xff}, false}, // high bytes (negative two's complement)
406-
{[]byte("12.34e5"), false}, // scientific notation not supported
407-
}
408-
409-
for _, tc := range testCases {
410-
result := looksLikeASCIIDecimal(tc.input)
411-
if result != tc.expected {
412-
t.Errorf("looksLikeASCIIDecimal(%q): got %v, want %v", tc.input, result, tc.expected)
413-
}
414-
}
415-
}
416-
417-
func TestDecimalDefaultNoBackwardsCompat(t *testing.T) {
418-
// Test that by default, ASCII-looking bytes are NOT interpreted as ASCII
419-
// but rather as two's-complement (which is the correct spec behavior)
420-
schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}`
421-
422-
// Create codec with default options (no backwards compat)
423-
codec, err := NewCodec(schema)
424-
if err != nil {
425-
t.Fatal(err)
426-
}
427-
428-
// "09" as ASCII bytes is [0x30, 0x39] which is 12345 in decimal
429-
// This could be misinterpreted as the string "09" if backwards compat were on
430-
// But with default settings, it should be decoded as 12345/100 = 123.45
431-
asciiLookingBytes := []byte{0x04, 0x30, 0x39} // length=2, bytes="09"
432-
433-
native, _, err := codec.NativeFromBinary(asciiLookingBytes)
434-
if err != nil {
435-
t.Fatalf("NativeFromBinary: %v", err)
436-
}
437-
438-
rat, ok := native.(*big.Rat)
439-
if !ok {
440-
t.Fatalf("NativeFromBinary: expected *big.Rat, got %T", native)
441-
}
442-
443-
// 0x3039 = 12345, with scale 2 = 123.45 = 12345/100
444-
expected := big.NewRat(12345, 100)
445-
if rat.Cmp(expected) != 0 {
446-
t.Errorf("NativeFromBinary (default, no backwards compat): got %v, want %v", rat, expected)
447-
}
448-
}
449-
450-
func TestDecimalNegativeBackwardsCompatibility(t *testing.T) {
451-
// Test backwards compatibility with negative numbers encoded as ASCII
452-
schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}`
453-
454-
// Create codec with backwards compatibility enabled
455-
opt := &CodecOption{EnableDecimalBinaryToTextualBackwardsCompatASCIIDecoding: true}
456-
codec, err := NewCodecWithOptions(schema, opt)
457-
if err != nil {
458-
t.Fatal(err)
459-
}
460-
461-
// Simulate incorrectly encoded data: "-40.20" as ASCII bytes
462-
incorrectlyEncodedBytes := append([]byte{0x0c}, []byte("-40.20")...)
463-
464-
native, _, err := codec.NativeFromBinary(incorrectlyEncodedBytes)
465-
if err != nil {
466-
t.Fatalf("NativeFromBinary (backwards compat): %v", err)
467-
}
468-
469-
rat, ok := native.(*big.Rat)
470-
if !ok {
471-
t.Fatalf("NativeFromBinary: expected *big.Rat, got %T", native)
472-
}
473-
474-
expected := big.NewRat(-4020, 100)
475-
if rat.Cmp(expected) != 0 {
476-
t.Errorf("NativeFromBinary (backwards compat): got %v, want %v", rat, expected)
477-
}
478-
}
479-
480363
func TestValidatedStringLogicalTypeInRecordEncode(t *testing.T) {
481364
schema := `{
482365
"type": "record",

0 commit comments

Comments
 (0)