From 54e540ce68defaa1a06d866629edc901fdeefbc6 Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Wed, 27 May 2026 20:22:29 +0800 Subject: [PATCH 1/3] fix factorial(21) should not overflow when PostgreSQL returns a numeric answer --- datafusion/functions/src/math/factorial.rs | 61 ++++++++++++++----- datafusion/sqllogictest/test_files/scalar.slt | 24 +++++++- 2 files changed, 66 insertions(+), 19 deletions(-) diff --git a/datafusion/functions/src/math/factorial.rs b/datafusion/functions/src/math/factorial.rs index 3b4f973f19d62..2234461ef281d 100644 --- a/datafusion/functions/src/math/factorial.rs +++ b/datafusion/functions/src/math/factorial.rs @@ -15,14 +15,16 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{ArrayRef, AsArray, Int64Array}; +use arrow::array::{ArrayRef, AsArray, Decimal256Array}; use std::sync::Arc; -use arrow::datatypes::DataType::Int64; -use arrow::datatypes::{DataType, Int64Type}; +use arrow::datatypes::DataType::{Decimal256, Int64}; +use arrow::datatypes::{DECIMAL256_MAX_PRECISION, DataType, Int64Type}; +use arrow_buffer::i256; use datafusion_common::{ - Result, ScalarValue, exec_err, internal_err, utils::take_function_args, + DataFusionError, Result, ScalarValue, exec_err, internal_err, + utils::take_function_args, }; use datafusion_expr::{ ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, @@ -63,6 +65,8 @@ impl FactorialFunc { } } +const FACTORIAL_RETURN_TYPE: DataType = Decimal256(DECIMAL256_MAX_PRECISION, 0); + impl ScalarUDFImpl for FactorialFunc { fn name(&self) -> &str { "factorial" @@ -73,7 +77,7 @@ impl ScalarUDFImpl for FactorialFunc { } fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(Int64) + Ok(FACTORIAL_RETURN_TYPE) } fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { @@ -82,13 +86,21 @@ impl ScalarUDFImpl for FactorialFunc { match arg { ColumnarValue::Scalar(scalar) => { if scalar.is_null() { - return Ok(ColumnarValue::Scalar(ScalarValue::Int64(None))); + return Ok(ColumnarValue::Scalar(ScalarValue::Decimal256( + None, + DECIMAL256_MAX_PRECISION, + 0, + ))); } match scalar { ScalarValue::Int64(Some(v)) => { let result = compute_factorial(v)?; - Ok(ColumnarValue::Scalar(ScalarValue::Int64(Some(result)))) + Ok(ColumnarValue::Scalar(ScalarValue::Decimal256( + Some(result), + DECIMAL256_MAX_PRECISION, + 0, + ))) } _ => { internal_err!( @@ -100,9 +112,13 @@ impl ScalarUDFImpl for FactorialFunc { } ColumnarValue::Array(array) => match array.data_type() { Int64 => { - let result: Int64Array = array + let result = array .as_primitive::() - .try_unary(compute_factorial)?; + .iter() + .map(|value| value.map(compute_factorial).transpose()) + .collect::>>()?; + let result = Decimal256Array::from(result) + .with_precision_and_scale(DECIMAL256_MAX_PRECISION, 0)?; Ok(ColumnarValue::Array(Arc::new(result) as ArrayRef)) } other => { @@ -139,14 +155,27 @@ const FACTORIALS: [i64; 21] = [ 6402373705728000, 121645100408832000, 2432902008176640000, -]; // if return type changes, this constant needs to be updated accordingly +]; -fn compute_factorial(n: i64) -> Result { +fn compute_factorial(n: i64) -> Result { if n < 0 { - exec_err!("factorial of a negative number is undefined") - } else if n < FACTORIALS.len() as i64 { - Ok(FACTORIALS[n as usize]) - } else { - exec_err!("Overflow happened on FACTORIAL({n})") + return exec_err!("factorial of a negative number is undefined"); + } + + if n < FACTORIALS.len() as i64 { + return Ok(i256::from(FACTORIALS[n as usize])); + } + + let mut result = i256::from(FACTORIALS[FACTORIALS.len() - 1]); + for value in FACTORIALS.len() as i64..=n { + result = result.checked_mul(i256::from(value)).ok_or_else(|| { + DataFusionError::Execution(format!("Overflow happened on FACTORIAL({n})")) + })?; + } + + if result.to_string().len() > DECIMAL256_MAX_PRECISION as usize { + return exec_err!("Overflow happened on FACTORIAL({n})"); } + + Ok(result) } diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index 38f76f13151bc..9e8bea05a412e 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -461,19 +461,37 @@ select round(exp(a), 5), round(exp(e), 5), round(exp(f), 5) from signed_integers ## factorial # factorial scalar function -query III rowsort +query RRR rowsort select factorial(0), factorial(10), factorial(15); ---- 1 3628800 1307674368000 +query TR +select arrow_typeof(factorial(21)), factorial(21); +---- +Decimal256(76, 0) 51090942171709440000 + +query R +select factorial(21); +---- +51090942171709440000 + +query TR +select arrow_typeof(factorial(56)), factorial(56); +---- +Decimal256(76, 0) 710998587804863451854045647463724949736497978881168458687447040000000000000 + +query error DataFusion error: Execution error: Overflow happened on FACTORIAL\(57\) +select factorial(57); + # factorial scalar nulls -query I rowsort +query R rowsort select factorial(null); ---- NULL # factorial with columns -query III rowsort +query RRR rowsort select factorial(a), factorial(e), factorial(f) from unsigned_integers; ---- 1 24 3628800 From 6fe8e838bfa06dafece0bc127bdc0e73d4d2167d Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Sat, 30 May 2026 09:55:46 +0800 Subject: [PATCH 2/3] fix --- datafusion/functions/src/math/factorial.rs | 144 +++++++++++++++------ 1 file changed, 101 insertions(+), 43 deletions(-) diff --git a/datafusion/functions/src/math/factorial.rs b/datafusion/functions/src/math/factorial.rs index 2234461ef281d..c62d664801fbe 100644 --- a/datafusion/functions/src/math/factorial.rs +++ b/datafusion/functions/src/math/factorial.rs @@ -15,15 +15,15 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{ArrayRef, AsArray, Decimal256Array}; +use arrow::array::{ArrayRef, AsArray}; use std::sync::Arc; use arrow::datatypes::DataType::{Decimal256, Int64}; -use arrow::datatypes::{DECIMAL256_MAX_PRECISION, DataType, Int64Type}; +use arrow::datatypes::{DECIMAL256_MAX_PRECISION, DataType, Decimal256Type, Int64Type}; use arrow_buffer::i256; use datafusion_common::{ - DataFusionError, Result, ScalarValue, exec_err, internal_err, + Result, ScalarValue, exec_err, internal_err, utils::take_function_args, }; use datafusion_expr::{ @@ -114,10 +114,7 @@ impl ScalarUDFImpl for FactorialFunc { Int64 => { let result = array .as_primitive::() - .iter() - .map(|value| value.map(compute_factorial).transpose()) - .collect::>>()?; - let result = Decimal256Array::from(result) + .try_unary::<_, Decimal256Type, _>(compute_factorial)? .with_precision_and_scale(DECIMAL256_MAX_PRECISION, 0)?; Ok(ColumnarValue::Array(Arc::new(result) as ArrayRef)) } @@ -133,28 +130,100 @@ impl ScalarUDFImpl for FactorialFunc { } } -const FACTORIALS: [i64; 21] = [ - 1, - 1, - 2, - 6, - 24, - 120, - 720, - 5040, - 40320, - 362880, - 3628800, - 39916800, - 479001600, - 6227020800, - 87178291200, - 1307674368000, - 20922789888000, - 355687428096000, - 6402373705728000, - 121645100408832000, - 2432902008176640000, +const FACTORIALS: [i256; 57] = [ + i256::from_parts(1, 0), + i256::from_parts(1, 0), + i256::from_parts(2, 0), + i256::from_parts(6, 0), + i256::from_parts(24, 0), + i256::from_parts(120, 0), + i256::from_parts(720, 0), + i256::from_parts(5040, 0), + i256::from_parts(40320, 0), + i256::from_parts(362880, 0), + i256::from_parts(3628800, 0), + i256::from_parts(39916800, 0), + i256::from_parts(479001600, 0), + i256::from_parts(6227020800, 0), + i256::from_parts(87178291200, 0), + i256::from_parts(1307674368000, 0), + i256::from_parts(20922789888000, 0), + i256::from_parts(355687428096000, 0), + i256::from_parts(6402373705728000, 0), + i256::from_parts(121645100408832000, 0), + i256::from_parts(2432902008176640000, 0), + i256::from_parts(51090942171709440000, 0), + i256::from_parts(1124000727777607680000, 0), + i256::from_parts(25852016738884976640000, 0), + i256::from_parts(620448401733239439360000, 0), + i256::from_parts(15511210043330985984000000, 0), + i256::from_parts(403291461126605635584000000, 0), + i256::from_parts(10888869450418352160768000000, 0), + i256::from_parts(304888344611713860501504000000, 0), + i256::from_parts(8841761993739701954543616000000, 0), + i256::from_parts(265252859812191058636308480000000, 0), + i256::from_parts(8222838654177922817725562880000000, 0), + i256::from_parts(263130836933693530167218012160000000, 0), + i256::from_parts(8683317618811886495518194401280000000, 0), + i256::from_parts(295232799039604140847618609643520000000, 0), + i256::from_parts(124676958757991025765413114570153656320, 30), + i256::from_parts(64699745315476902531002227912544878592, 1093), + i256::from_parts(11914008226076149403460180741783027712, 40448), + i256::from_parts(112449945669955213868112260755986841600, 1537025), + i256::from_parts(302159478076991779295882880302268284928, 59943987), + i256::from_parts(176496280846824950617203951978843996160, 2397759515), + i256::from_parts(90417809380115242574495275065471401984, 98308140136), + i256::from_parts(54441957834517090031680871000348557312, 4128941885723), + i256::from_parts(299309985358604090582029808424378695680, 177544501086095), + i256::from_parts(238909412782918374001076488265470574592, 7811958047788218), + i256::from_parts( + 202170200682234462683829141561361301504, + 351538112150469841, + ), + i256::from_parts( + 112205324517446769945026111164878159872, + 16170753158921612713, + ), + i256::from_parts( + 169414748505921235465608113272750342144, + 760025398469315797526, + ), + i256::from_parts( + 305413489102634642691573466161347559424, + 36481219126527158281271, + ), + i256::from_parts( + 333119188428743562961991722339997319168, + 1787579737199830755782322, + ), + i256::from_parts( + 322405809232131901857604960274991808512, + 89378986859991537789116148, + ), + i256::from_parts( + 109142658633680748495871817299708084224, + 4558328329859568427244923596, + ), + i256::from_parts( + 230900378216383506371340780676528996352, + 237033073152697558216736027008, + ), + i256::from_parts( + 327837203235479616462950115744149405696, + 12562752877092970585487009431459, + ), + i256::from_parts( + 8525894827099188903826663732120911872, + 678388655363020411616298509298838, + ), + i256::from_parts( + 128641848569516926247091897834881941504, + 37311376044966122638896418011436091, + ), + i256::from_parts( + 58013814553240137106279522686256283648, + 2089437058518102867778199408640421117, + ), ]; fn compute_factorial(n: i64) -> Result { @@ -162,20 +231,9 @@ fn compute_factorial(n: i64) -> Result { return exec_err!("factorial of a negative number is undefined"); } - if n < FACTORIALS.len() as i64 { - return Ok(i256::from(FACTORIALS[n as usize])); + if let Some(value) = FACTORIALS.get(n as usize) { + return Ok(*value); } - let mut result = i256::from(FACTORIALS[FACTORIALS.len() - 1]); - for value in FACTORIALS.len() as i64..=n { - result = result.checked_mul(i256::from(value)).ok_or_else(|| { - DataFusionError::Execution(format!("Overflow happened on FACTORIAL({n})")) - })?; - } - - if result.to_string().len() > DECIMAL256_MAX_PRECISION as usize { - return exec_err!("Overflow happened on FACTORIAL({n})"); - } - - Ok(result) + exec_err!("Overflow happened on FACTORIAL({n})") } From 9c9227a0da3920267ce33f94d284c2de4bfc63f4 Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Sun, 31 May 2026 23:47:30 +0800 Subject: [PATCH 3/3] fmt --- datafusion/functions/src/math/factorial.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/datafusion/functions/src/math/factorial.rs b/datafusion/functions/src/math/factorial.rs index c62d664801fbe..5f9416c2ebc6c 100644 --- a/datafusion/functions/src/math/factorial.rs +++ b/datafusion/functions/src/math/factorial.rs @@ -23,8 +23,7 @@ use arrow::datatypes::{DECIMAL256_MAX_PRECISION, DataType, Decimal256Type, Int64 use arrow_buffer::i256; use datafusion_common::{ - Result, ScalarValue, exec_err, internal_err, - utils::take_function_args, + Result, ScalarValue, exec_err, internal_err, utils::take_function_args, }; use datafusion_expr::{ ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, @@ -176,10 +175,7 @@ const FACTORIALS: [i256; 57] = [ i256::from_parts(54441957834517090031680871000348557312, 4128941885723), i256::from_parts(299309985358604090582029808424378695680, 177544501086095), i256::from_parts(238909412782918374001076488265470574592, 7811958047788218), - i256::from_parts( - 202170200682234462683829141561361301504, - 351538112150469841, - ), + i256::from_parts(202170200682234462683829141561361301504, 351538112150469841), i256::from_parts( 112205324517446769945026111164878159872, 16170753158921612713,