Skip to content

Commit 7229ca3

Browse files
committed
Added option for enabling backwards compatiblity decoding for obvious use cases (i.e. has a .)
1 parent c56c6b6 commit 7229ca3

File tree

3 files changed

+82
-20
lines changed

3 files changed

+82
-20
lines changed

codec.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,15 @@ type CodecOption struct {
4949
// When true, the string literal "null" in textual Avro data will be coerced to Go's nil.
5050
// Primarily used to handle edge cases where some Avro implementations allow string representations of null.
5151
EnableStringNull bool
52+
53+
// EnableDecimalBinaryToTextualBackwardsCompatASCIIDecoding controls backwards compatibility for decimal
54+
// binary decoding. When true, the decoder will first check if binary bytes look like an
55+
// ASCII decimal string (e.g., "40.20") and parse them as such. This handles legacy data
56+
// that was incorrectly encoded as ASCII strings instead of two's-complement bytes.
57+
// WARNING: This can cause incorrect decoding if a valid two's-complement value happens
58+
// to look like ASCII digits. Only enable this if you have legacy incorrectly-encoded data.
59+
// Default: false (use correct two's-complement decoding only)
60+
EnableDecimalBinaryToTextualBackwardsCompatASCIIDecoding bool
5261
}
5362

5463
// Codec supports decoding binary and text Avro data to Go native data types,
@@ -83,6 +92,7 @@ type codecBuilder struct {
8392
func DefaultCodecOption() *CodecOption {
8493
return &CodecOption{
8594
EnableStringNull: true,
95+
EnableDecimalBinaryToTextualBackwardsCompatASCIIDecoding: false,
8696
}
8797
}
8898

@@ -739,9 +749,9 @@ func buildCodecForTypeDescribedByString(st map[string]*Codec, enclosingNamespace
739749
case "record":
740750
return makeRecordCodec(st, enclosingNamespace, schemaMap, cb)
741751
case "bytes.decimal":
742-
return makeDecimalBytesCodec(st, enclosingNamespace, schemaMap)
752+
return makeDecimalBytesCodec(st, enclosingNamespace, schemaMap, cb)
743753
case "fixed.decimal":
744-
return makeDecimalFixedCodec(st, enclosingNamespace, schemaMap)
754+
return makeDecimalFixedCodec(st, enclosingNamespace, schemaMap, cb)
745755
case "string.validated-string":
746756
return makeValidatedStringCodec(st, enclosingNamespace, schemaMap)
747757
default:

logical_type.go

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ func precisionAndScaleFromSchemaMap(schemaMap map[string]interface{}) (int, int,
258258

259259
var one = big.NewInt(1)
260260

261-
func makeDecimalBytesCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}) (*Codec, error) {
261+
func makeDecimalBytesCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}, cb *codecBuilder) (*Codec, error) {
262262
precision, scale, err := precisionAndScaleFromSchemaMap(schemaMap)
263263
if err != nil {
264264
return nil, err
@@ -275,16 +275,20 @@ func makeDecimalBytesCodec(st map[string]*Codec, enclosingNamespace string, sche
275275
decimalSearchType := fmt.Sprintf("bytes.decimal.%d.%d", precision, scale)
276276
st[decimalSearchType] = c
277277

278+
// Check if backwards compatibility mode is enabled
279+
enableBackwardsCompat := cb != nil && cb.option != nil && cb.option.EnableDecimalBinaryToTextualBackwardsCompatASCIIDecoding
280+
278281
c.binaryFromNative = decimalBytesFromNative(bytesBinaryFromNative, toSignedBytes, precision, scale)
279282
c.textualFromNative = decimalTextualFromNative(scale)
280-
c.nativeFromBinary = nativeFromDecimalBytes(bytesNativeFromBinary, scale)
283+
c.nativeFromBinary = nativeFromDecimalBytes(bytesNativeFromBinary, scale, enableBackwardsCompat)
281284
c.nativeFromTextual = nativeFromDecimalTextual()
282285
return c, nil
283286
}
284287

285-
// nativeFromDecimalBytes decodes bytes to *big.Rat with backwards compatibility
286-
// for incorrectly encoded ASCII decimal strings.
287-
func nativeFromDecimalBytes(fn toNativeFn, scale int) toNativeFn {
288+
// nativeFromDecimalBytes decodes bytes to *big.Rat.
289+
// If enableBackwardsCompat is true, it first checks if bytes look like an ASCII decimal
290+
// string (for backwards compatibility with incorrectly encoded legacy data).
291+
func nativeFromDecimalBytes(fn toNativeFn, scale int, enableBackwardsCompat bool) toNativeFn {
288292
return func(bytes []byte) (interface{}, []byte, error) {
289293
d, b, err := fn(bytes)
290294
if err != nil {
@@ -296,7 +300,7 @@ func nativeFromDecimalBytes(fn toNativeFn, scale int) toNativeFn {
296300
}
297301

298302
// Check if bytes look like ASCII decimal string (backwards compat)
299-
if looksLikeASCIIDecimal(bs) {
303+
if enableBackwardsCompat && looksLikeASCIIDecimal(bs) {
300304
r := new(big.Rat)
301305
if _, ok := r.SetString(string(bs)); ok {
302306
return r, b, nil
@@ -362,7 +366,9 @@ func nativeFromDecimalTextual() toNativeFn {
362366

363367
// looksLikeASCIIDecimal checks if the bytes look like an ASCII decimal string
364368
// (for backwards compatibility with incorrectly encoded data).
365-
// Returns true if all bytes are printable ASCII and form a valid decimal pattern.
369+
// Returns true if all bytes are printable ASCII, form a valid decimal pattern,
370+
// and contain a decimal point. Requiring a decimal point reduces false positives
371+
// since 0x2E ('.') is unlikely to appear in valid two's-complement encoded numbers.
366372
func looksLikeASCIIDecimal(bs []byte) bool {
367373
if len(bs) == 0 {
368374
return false
@@ -386,10 +392,11 @@ func looksLikeASCIIDecimal(bs []byte) bool {
386392
return false // non-decimal character
387393
}
388394
}
389-
return hasDigit
395+
// Require both a digit and a decimal point to reduce false positives
396+
return hasDigit && hasDot
390397
}
391398

392-
func makeDecimalFixedCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}) (*Codec, error) {
399+
func makeDecimalFixedCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}, cb *codecBuilder) (*Codec, error) {
393400
precision, scale, err := precisionAndScaleFromSchemaMap(schemaMap)
394401
if err != nil {
395402
return nil, err
@@ -405,9 +412,13 @@ func makeDecimalFixedCodec(st map[string]*Codec, enclosingNamespace string, sche
405412
if err != nil {
406413
return nil, err
407414
}
415+
416+
// Check if backwards compatibility mode is enabled
417+
enableBackwardsCompat := cb != nil && cb.option != nil && cb.option.EnableDecimalBinaryToTextualBackwardsCompatASCIIDecoding
418+
408419
c.binaryFromNative = decimalBytesFromNative(c.binaryFromNative, toSignedFixedBytes(size), precision, scale)
409420
c.textualFromNative = decimalTextualFromNative(scale)
410-
c.nativeFromBinary = nativeFromDecimalBytes(c.nativeFromBinary, scale)
421+
c.nativeFromBinary = nativeFromDecimalBytes(c.nativeFromBinary, scale, enableBackwardsCompat)
411422
c.nativeFromTextual = nativeFromDecimalTextual()
412423
return c, nil
413424
}

logical_type_test.go

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -276,15 +276,18 @@ func TestDecimalFixedTextualRoundTrip(t *testing.T) {
276276

277277
func TestDecimalBytesBackwardsCompatibility(t *testing.T) {
278278
// Test that binary data incorrectly encoded as ASCII decimal strings
279-
// can still be decoded correctly (backwards compatibility)
279+
// can still be decoded correctly when backwards compatibility is enabled
280280
schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}`
281-
codec, err := NewCodec(schema)
281+
282+
// Create codec with backwards compatibility enabled
283+
opt := &CodecOption{EnableDecimalBinaryToTextualBackwardsCompatASCIIDecoding: true}
284+
codec, err := NewCodecWithOptions(schema, opt)
282285
if err != nil {
283286
t.Fatal(err)
284287
}
285288

286289
// Simulate incorrectly encoded data: "40.20" as ASCII bytes
287-
// Length prefix (10 = 0x14 in varint) + ASCII bytes for "40.20"
290+
// Length prefix (10 = 0x0a in varint) + ASCII bytes for "40.20"
288291
incorrectlyEncodedBytes := append([]byte{0x0a}, []byte("40.20")...)
289292

290293
native, _, err := codec.NativeFromBinary(incorrectlyEncodedBytes)
@@ -386,12 +389,14 @@ func TestLooksLikeASCIIDecimal(t *testing.T) {
386389
{[]byte("40.20"), true},
387390
{[]byte("-40.20"), true},
388391
{[]byte("+40.20"), true},
389-
{[]byte("0"), true},
390-
{[]byte("123456"), true},
391392
{[]byte(".5"), true},
392393
{[]byte("5."), true},
393-
{[]byte(""), false},
394-
{[]byte("-"), false},
394+
{[]byte("0.0"), true},
395+
{[]byte("0"), false}, // no decimal point - could be valid two's complement
396+
{[]byte("123456"), false}, // no decimal point - could be valid two's complement
397+
{[]byte(""), false}, // empty
398+
{[]byte("-"), false}, // no digit
399+
{[]byte("."), false}, // no digit
395400
{[]byte("40.20.30"), false}, // multiple dots
396401
{[]byte("40-20"), false}, // sign not at start
397402
{[]byte("40a20"), false}, // non-decimal char
@@ -409,10 +414,46 @@ func TestLooksLikeASCIIDecimal(t *testing.T) {
409414
}
410415
}
411416

417+
func TestDecimalDefaultNoBackwardsCompat(t *testing.T) {
418+
// Test that by default, ASCII-looking bytes are NOT interpreted as ASCII
419+
// but rather as two's-complement (which is the correct spec behavior)
420+
schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}`
421+
422+
// Create codec with default options (no backwards compat)
423+
codec, err := NewCodec(schema)
424+
if err != nil {
425+
t.Fatal(err)
426+
}
427+
428+
// "09" as ASCII bytes is [0x30, 0x39] which is 12345 in decimal
429+
// This could be misinterpreted as the string "09" if backwards compat were on
430+
// But with default settings, it should be decoded as 12345/100 = 123.45
431+
asciiLookingBytes := []byte{0x04, 0x30, 0x39} // length=2, bytes="09"
432+
433+
native, _, err := codec.NativeFromBinary(asciiLookingBytes)
434+
if err != nil {
435+
t.Fatalf("NativeFromBinary: %v", err)
436+
}
437+
438+
rat, ok := native.(*big.Rat)
439+
if !ok {
440+
t.Fatalf("NativeFromBinary: expected *big.Rat, got %T", native)
441+
}
442+
443+
// 0x3039 = 12345, with scale 2 = 123.45 = 12345/100
444+
expected := big.NewRat(12345, 100)
445+
if rat.Cmp(expected) != 0 {
446+
t.Errorf("NativeFromBinary (default, no backwards compat): got %v, want %v", rat, expected)
447+
}
448+
}
449+
412450
func TestDecimalNegativeBackwardsCompatibility(t *testing.T) {
413451
// Test backwards compatibility with negative numbers encoded as ASCII
414452
schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}`
415-
codec, err := NewCodec(schema)
453+
454+
// Create codec with backwards compatibility enabled
455+
opt := &CodecOption{EnableDecimalBinaryToTextualBackwardsCompatASCIIDecoding: true}
456+
codec, err := NewCodecWithOptions(schema, opt)
416457
if err != nil {
417458
t.Fatal(err)
418459
}

0 commit comments

Comments
 (0)