@@ -485,8 +485,8 @@ def test_calculate_levenshtein_distance_weights_length_doesnt_match():
485485
486486
487487def test_all_numeric_columns ():
488- df_db = pd .DataFrame ({"A" : [1 , 2 , 3 ], "B" : [4.5 , 5.5 , 6.5 ]})
489- df_query = pd .DataFrame ({"A" : [4 , 5 , "" ], "B" : [4.5 , 5.5 , 6.5 ]})
488+ df_db = pd .DataFrame ({"A" : ["1" , "2" , "3" ], "B" : [" 4.5" , " 5.5" , " 6.5" ]})
489+ df_query = pd .DataFrame ({"A" : ["4" , "5" , "" ], "B" : [" 4.5" , " 5.5" , " 6.5" ]})
490490 weights = [0.1 , 0.2 ]
491491 expected_output = (
492492 ["A" , "B" ], # numerical_features
@@ -500,16 +500,16 @@ def test_all_numeric_columns():
500500
501501
502502def test_numeric_columns__one_non_numeric ():
503- df_db = pd .DataFrame ({"A" : [1 , 2 , 3 ], "B" : [4.5 , 5.5 , 6.5 ]})
504- df_query = pd .DataFrame ({"A" : [4 , 5 , 6 ], "B" : ["abcd" , 5.5 , 6.5 ]})
503+ df_db = pd .DataFrame ({"A" : ["1" , "2" , "3" ], "B" : [" 4.5" , " 5.5" , " 6.5" ]})
504+ df_query = pd .DataFrame ({"A" : ["4" , "5" , "6" ], "B" : ["abcd" , " 5.5" , " 6.5" ]})
505505 weights = [0.1 , 0.2 ]
506506 expected_output = (
507507 ["A" ], # numerical_features
508508 [], # boolean_features
509- [], # categorical_features
509+ ["B" ], # categorical_features
510510 [0.1 ], # numerical_weights
511511 [], # boolean_weights
512- [], # categorical_weights
512+ [0.2 ], # categorical_weights
513513 )
514514 assert determine_field_types (df_db , df_query , weights ) == expected_output
515515
@@ -555,16 +555,16 @@ def test_all_categorical_columns():
555555def test_mixed_types ():
556556 df_db = pd .DataFrame (
557557 {
558- "A" : [1 , 2 , 3 ],
558+ "A" : ["1" , "2" , "3" ],
559559 "B" : ["true" , "false" , "true" ],
560560 "C" : ["apple" , "banana" , "cherry" ],
561561 }
562562 )
563563 df_query = pd .DataFrame (
564564 {
565- "A" : [1 , 3 , "" ],
565+ "A" : ["1" , "3" , "" ],
566566 "B" : ["true" , "true" , "true" ],
567- "C" : ["apple" , "" , 3 ],
567+ "C" : ["apple" , "" , "3" ],
568568 }
569569 )
570570 weights = [0.7 , 0.8 , 0.9 ]
0 commit comments