diff --git a/engine/src/main/java/com/github/jinba1/cuckoodb/ColumnMeta.java b/engine/src/main/java/com/github/jinba1/cuckoodb/ColumnMeta.java
new file mode 100644
index 0000000..815af91
--- /dev/null
+++ b/engine/src/main/java/com/github/jinba1/cuckoodb/ColumnMeta.java
@@ -0,0 +1,22 @@
+package com.github.jinba1.cuckoodb;
+
+/**
+ * Describes one output column of a {@link QueryResultSet}, in result-column order.
+ *
+ *
Result rows are strictly positional and column names are not unique — a
+ * join {@code SELECT *} can emit the same bare name twice (e.g. two {@code a} columns).
+ * {@code qualifiedName} disambiguates those: it is the dotted schema origin
+ * (e.g. {@code student.a} vs {@code enrolled.a}) when one exists, and {@code null} for
+ * aggregate/computed columns and for single-table scans whose schema carries no table
+ * prefix. Clients must address columns by position, not by name.
+ *
+ *
{@code type} is best-effort, inferred from the first row's runtime value, so it is
+ * {@code null} for an empty result (no row to infer from). The authoritative typed schema
+ * for a base table comes from the catalog, not this field.
+ *
+ * @param name the bare header name (table prefix stripped; aggregate keys kept whole)
+ * @param qualifiedName the dotted schema origin, or null when there is none
+ * @param type the inferred column type, or null when the result is empty
+ */
+public record ColumnMeta(String name, String qualifiedName, ColumnType type) {
+}
diff --git a/engine/src/main/java/com/github/jinba1/cuckoodb/CuckooDB.java b/engine/src/main/java/com/github/jinba1/cuckoodb/CuckooDB.java
index 511a421..4b0d28d 100644
--- a/engine/src/main/java/com/github/jinba1/cuckoodb/CuckooDB.java
+++ b/engine/src/main/java/com/github/jinba1/cuckoodb/CuckooDB.java
@@ -4,7 +4,9 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
+import java.util.Map;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
@@ -125,15 +127,13 @@ public static QueryResult execute(Operator root, String outputFile) {
try {
try (CSVPrinter printer = new CSVPrinter(new FileWriter(outputFile), format)) {
printer.printRecord(headers);
- Tuple tuple;
- while ((tuple = root.getNextTuple()) != null) {
+ rows = drain(root, tuple -> {
List fields = new ArrayList<>(tuple.getTuple().size());
for (Value v : tuple.getTuple()) {
fields.add(v.toString());
}
printer.printRecord(fields);
- rows++;
- }
+ });
}
} catch (RuntimeException e) {
// QueryExecutionException and internal errors alike: never leave a
@@ -148,8 +148,7 @@ public static QueryResult execute(Operator root, String outputFile) {
"Failed to write output file '" + outputFile + "': " + e.getMessage());
}
- // The planner places Limit topmost, so the root knows whether the cap cut the result
- boolean truncated = root instanceof LimitOperator limitOp && limitOp.wasTruncated();
+ boolean truncated = wasTruncated(root);
QueryResult result = truncated ? QueryResult.truncated(rows) : QueryResult.complete(rows);
System.out.println("Query executed successfully!");
@@ -158,6 +157,118 @@ public static QueryResult execute(Operator root, String outputFile) {
return result;
}
+ /**
+ * Drains the plan into an in-memory result set — column metadata, positional rows, and the
+ * same truncation/hint signal {@link #execute} reports, but with no stdout and no file write.
+ * This is the library/REST entry point; the CLI keeps using {@link #execute}. Both share
+ * {@link #drain}, so they cannot diverge in iteration, row count, or truncation semantics.
+ * @param root the root operator of an executable plan (never null; EXPLAIN is handled before here)
+ * @return the fully-materialized result
+ */
+ public static QueryResultSet executeToResultSet(Operator root) {
+ String schemaId = root.propagateSchemaId();
+ List names = root.getContext().getOrderedColumnNames(schemaId);
+ List> rows = new ArrayList<>();
+ try {
+ drain(root, tuple -> rows.add(List.copyOf(tuple.getTuple())));
+ } catch (IOException e) {
+ // The in-memory sink does no I/O; drain only declares IOException for the CSV path.
+ throw new QueryExecutionException(ErrorCode.INTERNAL,
+ "Unexpected I/O while draining query result: " + e.getMessage());
+ }
+ boolean truncated = wasTruncated(root);
+ String hint = truncated ? QueryResult.truncated(rows.size()).hint() : null;
+ List columns = buildColumns(root.getContext(), schemaId, names, rows);
+ // The result is a value handed to library/REST callers; the outer lists are
+ // unmodifiable so a consumer cannot mutate the result after the fact (each inner
+ // row is already immutable via List.copyOf in the drain sink).
+ return new QueryResultSet(Collections.unmodifiableList(columns),
+ Collections.unmodifiableList(rows), truncated, hint);
+ }
+
+ /**
+ * Pulls every tuple from the plan to EOF, handing each to {@code sink}, and returns the row
+ * count. The single drain point for both result paths: truncation can only be read once this
+ * has run to a null tuple, so sharing it keeps the file and in-memory paths consistent.
+ */
+ private static long drain(Operator root, TupleSink sink) throws IOException {
+ long rows = 0;
+ Tuple tuple;
+ while ((tuple = root.getNextTuple()) != null) {
+ sink.accept(tuple);
+ rows++;
+ }
+ return rows;
+ }
+
+ /** A per-tuple action that may fail with an I/O error (the CSV writer's printRecord does). */
+ @FunctionalInterface
+ private interface TupleSink {
+ void accept(Tuple tuple) throws IOException;
+ }
+
+ /**
+ * Whether a LIMIT cut the result short. The planner places LIMIT topmost, so only the root
+ * can report this, and only after the drain reached EOF (the peek past the cap has happened).
+ */
+ private static boolean wasTruncated(Operator root) {
+ return root instanceof LimitOperator limitOp && limitOp.wasTruncated();
+ }
+
+ /**
+ * Builds per-position column metadata for a result set. {@code name} is the bare header
+ * (table prefix stripped, aggregate keys kept whole); {@code qualifiedName} is the dotted,
+ * non-aggregate schema key for that index when one exists (null for single-table scans and
+ * computed columns); {@code type} is inferred from the first row, or null for an empty result.
+ */
+ private static List buildColumns(PlanContext ctx, String schemaId,
+ List names, List> rows) {
+ int width = names.size();
+ String[] qualified = new String[width];
+ Map schema = ctx.getSchema(schemaId);
+ if (schema != null) {
+ // Mirror getOrderedColumnNames' deterministic rule — sorted keys, first non-null
+ // wins per index — so `name` and `qualifiedName` are chosen from a single key
+ // choice and never disagree run-to-run. Skip internal intermediate-schema keys
+ // (temp_.col) outright: a join over a pushed-down source registers BOTH a
+ // base-qualified key (Enrolled.a) and a temp_ key at the same index, and the
+ // temp_ id is not a real origin (and is plan/JVM-unstable).
+ List keys = new ArrayList<>(schema.keySet());
+ Collections.sort(keys);
+ for (String key : keys) {
+ int idx = schema.get(key);
+ if (idx < 0 || idx >= width || qualified[idx] != null
+ || key.startsWith(Constants.INTERMEDIATE_SCHEMA_PREFIX)) {
+ continue;
+ }
+ // A dotted, non-aggregate key (e.g. "Student.a") is the column's qualified
+ // origin. Aggregate keys like "sum(student.c)" also contain '.', so the '('
+ // check excludes them. Lowercased to match the bare name (-> "student.a").
+ if (key.indexOf('.') >= 0 && key.indexOf('(') < 0) {
+ qualified[idx] = key.toLowerCase();
+ }
+ }
+ }
+ List firstRow = rows.isEmpty() ? null : rows.get(0);
+ List columns = new ArrayList<>(width);
+ for (int i = 0; i < width; i++) {
+ ColumnType type = firstRow == null ? null : inferType(firstRow.get(i));
+ columns.add(new ColumnMeta(names.get(i), qualified[i], type));
+ }
+ return columns;
+ }
+
+ /** Maps a runtime value to its column type. Exhaustive over the sealed {@link Value}. */
+ private static ColumnType inferType(Value value) {
+ if (value instanceof IntValue) {
+ return ColumnType.INT;
+ } else if (value instanceof StringValue) {
+ return ColumnType.STRING;
+ }
+ throw new QueryExecutionException(ErrorCode.INTERNAL,
+ "Unknown value type: " + value.getClass().getName());
+ }
+
/** Never leave a truncated file that looks like a complete result. */
private static void deletePartialOutput(File outputFileObj, String outputFile) {
if (outputFileObj.isFile() && !outputFileObj.delete()) {
diff --git a/engine/src/main/java/com/github/jinba1/cuckoodb/DBCatalog.java b/engine/src/main/java/com/github/jinba1/cuckoodb/DBCatalog.java
index 0360d24..9ac5549 100644
--- a/engine/src/main/java/com/github/jinba1/cuckoodb/DBCatalog.java
+++ b/engine/src/main/java/com/github/jinba1/cuckoodb/DBCatalog.java
@@ -10,6 +10,7 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Stream;
/**
@@ -29,17 +30,19 @@ public class DBCatalog {
private static DBCatalog instance;
- private final Map dbLocations;
- private final Map> dbSchemata;
- private final Map> dbColumnTypes;
+ /**
+ * One entry per table, each a fully-populated {@link TableMeta} (location + schema +
+ * types). A single map means a table is published with one atomic write and read with
+ * one atomic lookup — no torn state across what used to be three parallel maps. This is
+ * what makes runtime {@link #registerTable} safe against concurrent readers.
+ */
+ private final Map tables;
/**
* Private constructor to ensure singleton design.
*/
private DBCatalog() {
- dbLocations = new java.util.concurrent.ConcurrentHashMap<>();
- dbSchemata = new java.util.concurrent.ConcurrentHashMap<>();
- dbColumnTypes = new java.util.concurrent.ConcurrentHashMap<>();
+ tables = new ConcurrentHashMap<>();
}
/**
@@ -94,7 +97,7 @@ private void loadDBCatalog(String dBDirectory) {
for (Path csv : csvs) {
String fileName = csv.getFileName().toString();
String tableName = fileName.substring(0, fileName.length() - 4);
- loadTable(tableName, csv);
+ tables.put(tableName, parseTable(tableName, csv));
}
} catch (IOException e) {
throw new QueryExecutionException(ErrorCode.DATA_ERROR,
@@ -102,7 +105,40 @@ private void loadDBCatalog(String dBDirectory) {
}
}
- private void loadTable(String tableName, Path csv) throws IOException {
+ /**
+ * Registers a single table at runtime from a CSV file, for callers (the REST gateway)
+ * that add tables after startup. Parses and infers the schema with the same rules as
+ * directory load, then publishes the table with one atomic {@code putIfAbsent}.
+ *
+ * Returns {@code false} (and leaves the existing table untouched) when a table of that
+ * name already exists — the caller's 409 signal. Never {@code containsKey}-then-{@code put},
+ * which would reintroduce the race this design removes.
+ * @param tableName the catalog name to publish under
+ * @param csv the CSV file backing the table; must outlive every query that scans it
+ * @return {@code true} if this call registered the table; {@code false} if the name was taken
+ * @throws QueryExecutionException with {@link ErrorCode#DATA_ERROR} if the CSV is malformed
+ */
+ public boolean registerTable(String tableName, Path csv) {
+ TableMeta meta;
+ try {
+ meta = parseTable(tableName, csv);
+ } catch (IOException e) {
+ throw new QueryExecutionException(ErrorCode.DATA_ERROR,
+ "Could not read table '" + tableName + "' from " + csv + ": " + e.getMessage());
+ }
+ return tables.putIfAbsent(tableName, meta) == null;
+ }
+
+ /**
+ * Parses one CSV into table metadata: column names from the header row, types inferred
+ * from the data rows. Pure (no map writes) so both directory load and {@link #registerTable}
+ * share identical parse-and-infer rules. The returned schema and types are unmodifiable.
+ *
Type inference starts every column as INT and demotes to STRING on the first
+ * non-integer field, so a header-only CSV (no data rows) infers every column as
+ * INT — there is no evidence to refute INT. REST callers uploading a schema-only file get
+ * an all-INT table until rows arrive.
+ */
+ private static TableMeta parseTable(String tableName, Path csv) throws IOException {
CSVFormat format = CSVFormat.RFC4180.builder()
.setIgnoreSurroundingSpaces(true)
.build();
@@ -146,9 +182,9 @@ private void loadTable(String tableName, Path csv) throws IOException {
types.add(isInt[i] ? ColumnType.INT : ColumnType.STRING);
}
- dbSchemata.put(tableName, Collections.unmodifiableMap(columnMap));
- dbColumnTypes.put(tableName, Collections.unmodifiableList(types));
- dbLocations.put(tableName, csv);
+ return new TableMeta(csv,
+ Collections.unmodifiableMap(columnMap),
+ Collections.unmodifiableList(types));
}
}
@@ -162,13 +198,25 @@ private static boolean parsesAsInt(String field) {
}
}
+ /**
+ * Returns the full metadata for a table in one atomic lookup, or null if absent.
+ * Callers that need more than one of location/schema/types (e.g. ScanOperator) use
+ * this so the fields they read are guaranteed to come from the same registration.
+ * @param tableName The name of the table
+ * @return The table's metadata, or null if the table is not in the catalog
+ */
+ public TableMeta getTableMeta(String tableName) {
+ return tables.get(tableName);
+ }
+
/**
* Returns the file path for a specified table.
* @param tableName The name of the table
* @return The Path object representing the table's data file location
*/
public Path getDBLocation(String tableName) {
- return dbLocations.get(tableName);
+ TableMeta meta = tables.get(tableName);
+ return meta == null ? null : meta.path();
}
/**
@@ -178,7 +226,8 @@ public Path getDBLocation(String tableName) {
* @return A map from column names to their positions (indices)
*/
public Map getDBSchemata(String tableName) {
- return dbSchemata.get(tableName);
+ TableMeta meta = tables.get(tableName);
+ return meta == null ? null : meta.schema();
}
/**
@@ -187,7 +236,8 @@ public Map getDBSchemata(String tableName) {
* @return A list of ColumnType values in column order, or null if table not found
*/
public List getColumnTypes(String tableName) {
- return dbColumnTypes.get(tableName);
+ TableMeta meta = tables.get(tableName);
+ return meta == null ? null : meta.types();
}
/**
@@ -196,7 +246,7 @@ public List getColumnTypes(String tableName) {
* @return The sorted list of loaded table names
*/
public List getTableNames() {
- List names = new ArrayList<>(dbLocations.keySet());
+ List names = new ArrayList<>(tables.keySet());
Collections.sort(names);
return names;
}
@@ -220,7 +270,7 @@ public QueryExecutionException unknownTable(String tableName) {
* @return true if the table exists, false otherwise
*/
public boolean tableExists(String tableName) {
- return (dbLocations.containsKey(tableName) && dbSchemata.containsKey(tableName));
+ return tables.containsKey(tableName);
}
/**
@@ -230,9 +280,10 @@ public boolean tableExists(String tableName) {
* @return true if the column exists in the table, false otherwise
*/
public boolean columnExists(String tableName, String columnName) {
- if (!tableExists(tableName)) {
+ TableMeta meta = tables.get(tableName);
+ if (meta == null) {
return false;
}
- return dbSchemata.get(tableName).containsKey(columnName.toLowerCase());
+ return meta.schema().containsKey(columnName.toLowerCase());
}
}
diff --git a/engine/src/main/java/com/github/jinba1/cuckoodb/QueryPlanner.java b/engine/src/main/java/com/github/jinba1/cuckoodb/QueryPlanner.java
index fdf51f8..ad1ce3d 100644
--- a/engine/src/main/java/com/github/jinba1/cuckoodb/QueryPlanner.java
+++ b/engine/src/main/java/com/github/jinba1/cuckoodb/QueryPlanner.java
@@ -79,8 +79,6 @@ public static PlannedQuery planQuery(String filename) {
* with an {@link ErrorCode} and a message the caller can act on
*/
public static PlannedQuery planQuery(String filename, QueryConfig config) {
- PlanContext ctx = new PlanContext(config);
-
Statement statement;
try {
statement = CCJSqlParserUtil.parse(new FileReader(filename));
@@ -91,6 +89,53 @@ public static PlannedQuery planQuery(String filename, QueryConfig config) {
throw new QueryExecutionException(ErrorCode.DATA_ERROR,
"Could not read query file '" + filename + "': " + e.getMessage());
}
+ return planFrom(statement, config);
+ }
+
+ /**
+ * Plans one query from SQL held in memory — the library/REST entry point. Parses the
+ * text, then runs the identical statement-level pipeline as the file overload, so a
+ * query planned from a string and the same query planned from a file are byte-for-byte
+ * the same plan (and EXPLAIN renders identically).
+ *
+ * Named {@code planSql} rather than overloading {@code planQuery(String, QueryConfig)}
+ * because that signature already exists for the file path; both take a {@code String}, so
+ * a distinct name is the only unambiguous way to mean "this argument is SQL text, not a
+ * path". Read-only-by-construction holds across the parser: a non-SELECT statement is
+ * rejected as {@link ErrorCode#UNSUPPORTED_SQL} and multi-statement input fails to parse
+ * ({@link ErrorCode#PARSE_ERROR}). Unlike the file overload there is no I/O, so no
+ * {@code DATA_ERROR} branch.
+ * @param sql the SQL query text (optionally EXPLAIN-prefixed); not a file path
+ * @param config the per-query planner configuration
+ * @return the planned query; the root is never null
+ * @throws QueryExecutionException with an {@link ErrorCode} the caller can act on
+ */
+ public static PlannedQuery planSql(String sql, QueryConfig config) {
+ if (sql == null || sql.isBlank()) {
+ // Keep the "every failure is a classified QueryExecutionException" invariant:
+ // CCJSqlParserUtil.parse(null) throws a bare NPE that would otherwise escape as
+ // an unclassified RuntimeException (the server would map it to 500, not 400).
+ throw new QueryExecutionException(ErrorCode.PARSE_ERROR,
+ "SQL text must not be null or blank");
+ }
+ Statement statement;
+ try {
+ statement = CCJSqlParserUtil.parse(sql);
+ } catch (JSQLParserException e) {
+ throw new QueryExecutionException(ErrorCode.PARSE_ERROR,
+ "SQL syntax error: " + parserMessage(e));
+ }
+ return planFrom(statement, config);
+ }
+
+ /**
+ * The shared planning core: takes an already-parsed statement and produces the optimized,
+ * executable plan (plus EXPLAIN text when the statement is EXPLAIN-prefixed). Both source
+ * overloads delegate here so source format can never change the resulting plan. EXPLAIN
+ * detection and optimization happen at this statement level, identically for every source.
+ */
+ private static PlannedQuery planFrom(Statement statement, QueryConfig config) {
+ PlanContext ctx = new PlanContext(config);
boolean explain = false;
Select select;
diff --git a/engine/src/main/java/com/github/jinba1/cuckoodb/QueryResultSet.java b/engine/src/main/java/com/github/jinba1/cuckoodb/QueryResultSet.java
new file mode 100644
index 0000000..7008442
--- /dev/null
+++ b/engine/src/main/java/com/github/jinba1/cuckoodb/QueryResultSet.java
@@ -0,0 +1,21 @@
+package com.github.jinba1.cuckoodb;
+
+import java.util.List;
+
+/**
+ * A fully-materialized query result held in memory, for callers (the REST gateway) that
+ * need the rows as data rather than written to a CSV file. The CLI path keeps writing to
+ * a file via {@link CuckooDB#execute}; both share one drain helper so they cannot diverge
+ * in iteration, row count, or truncation semantics.
+ *
+ *
Rows are positional and aligned with {@code columns} by index — see {@link ColumnMeta}
+ * for why callers must not rely on column names being unique.
+ *
+ * @param columns one entry per output column, in column order
+ * @param rows the result rows; each row's values align with {@code columns} by index
+ * @param truncated true when a LIMIT stopped the query although more rows existed
+ * @param hint how to refine the query when truncated; null otherwise
+ */
+public record QueryResultSet(List columns, List> rows,
+ boolean truncated, String hint) {
+}
diff --git a/engine/src/main/java/com/github/jinba1/cuckoodb/TableMeta.java b/engine/src/main/java/com/github/jinba1/cuckoodb/TableMeta.java
new file mode 100644
index 0000000..79154bf
--- /dev/null
+++ b/engine/src/main/java/com/github/jinba1/cuckoodb/TableMeta.java
@@ -0,0 +1,20 @@
+package com.github.jinba1.cuckoodb;
+
+import java.nio.file.Path;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Durable metadata for one base table, held as a single value in {@link DBCatalog}.
+ * Merging location, schema, and column types into one record lets the catalog publish
+ * a table with a single atomic map write — a table is either fully absent or fully
+ * present, with no torn read in any direction. This is the precondition for safe
+ * runtime table registration (REST upload) while queries read concurrently.
+ *
+ * @param path the CSV file backing the table; {@link com.github.jinba1.cuckoodb.operator.ScanOperator}
+ * re-opens it on every {@code reset()}, so it must outlive the query
+ * @param schema column name (lowercased) to position, unmodifiable
+ * @param types column types in column order, unmodifiable
+ */
+public record TableMeta(Path path, Map schema, List types) {
+}
diff --git a/engine/src/main/java/com/github/jinba1/cuckoodb/operator/ScanOperator.java b/engine/src/main/java/com/github/jinba1/cuckoodb/operator/ScanOperator.java
index e727594..64cee45 100644
--- a/engine/src/main/java/com/github/jinba1/cuckoodb/operator/ScanOperator.java
+++ b/engine/src/main/java/com/github/jinba1/cuckoodb/operator/ScanOperator.java
@@ -7,6 +7,7 @@
import com.github.jinba1.cuckoodb.PlanContext;
import com.github.jinba1.cuckoodb.QueryExecutionException;
import com.github.jinba1.cuckoodb.StringValue;
+import com.github.jinba1.cuckoodb.TableMeta;
import com.github.jinba1.cuckoodb.Tuple;
import com.github.jinba1.cuckoodb.Value;
@@ -43,11 +44,14 @@ public class ScanOperator extends Operator {
public ScanOperator(PlanContext ctx, String tableName) {
super(ctx);
this.tableName = tableName;
- tablePath = DBCatalog.getInstance().getDBLocation(tableName);
- this.columnTypes = DBCatalog.getInstance().getColumnTypes(tableName);
- if (tablePath == null || columnTypes == null) {
+ // One atomic lookup: path and types come from the same table registration,
+ // so a concurrent runtime upload can never expose one without the other.
+ TableMeta meta = DBCatalog.getInstance().getTableMeta(tableName);
+ if (meta == null) {
throw DBCatalog.getInstance().unknownTable(tableName);
}
+ this.tablePath = meta.path();
+ this.columnTypes = meta.types();
child = null; // Scan cannot have child operator
this.schemaRegistered = true;
diff --git a/engine/src/test/java/com/github/jinba1/cuckoodb/CuckooDBResultSetTest.java b/engine/src/test/java/com/github/jinba1/cuckoodb/CuckooDBResultSetTest.java
new file mode 100644
index 0000000..2e9b6c5
--- /dev/null
+++ b/engine/src/test/java/com/github/jinba1/cuckoodb/CuckooDBResultSetTest.java
@@ -0,0 +1,200 @@
+package com.github.jinba1.cuckoodb;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import com.github.jinba1.cuckoodb.operator.Operator;
+
+/**
+ * E2/E3: in-memory result materialization ({@link CuckooDB#executeToResultSet}) with
+ * positional columns, qualified names for join-duplicate disambiguation, best-effort
+ * runtime type inference, and truncation/hint parity with the CSV-file path.
+ */
+class CuckooDBResultSetTest {
+
+ private static final String SAMPLE_DB_DIR = "samples/db";
+
+ @TempDir
+ Path tempDir;
+
+ @BeforeEach
+ void initCatalog() throws IOException {
+ DBCatalog.resetDBCatalog();
+ DBCatalog.initDBCatalog(SAMPLE_DB_DIR);
+ // A table with one int column and one string column, registered at runtime,
+ // so type inference and the empty-result case are exercised on known types.
+ Path typed = tempDir.resolve("typed.csv");
+ Files.writeString(typed, "n,s\n1,alice\n2,bob\n");
+ DBCatalog.getInstance().registerTable("Typed", typed);
+ }
+
+ @AfterEach
+ void resetCatalog() {
+ DBCatalog.resetDBCatalog();
+ }
+
+ private QueryResultSet run(String sql) {
+ Operator root = QueryPlanner.planSql(sql, QueryConfig.defaults()).root();
+ return CuckooDB.executeToResultSet(root);
+ }
+
+ @Test
+ void joinDuplicateNamesGetDistinctQualifiedNames() {
+ QueryResultSet rs = run("SELECT * FROM Student, Enrolled WHERE Student.A = Enrolled.A");
+
+ List names = rs.columns().stream().map(ColumnMeta::name).toList();
+ assertEquals(List.of("a", "b", "c", "d", "a", "e", "h"), names,
+ "join SELECT * emits duplicate bare 'a' (verified header shape)");
+
+ assertEquals("student.a", rs.columns().get(0).qualifiedName());
+ assertEquals("enrolled.a", rs.columns().get(4).qualifiedName());
+ assertNotEquals(rs.columns().get(0).qualifiedName(), rs.columns().get(4).qualifiedName(),
+ "the two 'a' columns must be distinguishable by qualifiedName");
+ }
+
+ @Test
+ void typeInferredFromFirstRow() {
+ QueryResultSet rs = run("SELECT * FROM Typed");
+
+ assertEquals(2, rs.columns().size());
+ assertEquals(ColumnType.INT, rs.columns().get(0).type(), "n column is all-int");
+ assertEquals(ColumnType.STRING, rs.columns().get(1).type(), "s column is string");
+ assertFalse(rs.truncated());
+ assertNull(rs.hint());
+ assertEquals(2, rs.rows().size());
+ }
+
+ @Test
+ void emptyResultKeepsNamesButHasNullTypes() {
+ QueryResultSet rs = run("SELECT * FROM Typed WHERE Typed.n > 999");
+
+ assertTrue(rs.rows().isEmpty(), "no rows match");
+ List names = rs.columns().stream().map(ColumnMeta::name).toList();
+ assertEquals(List.of("n", "s"), names, "column names survive an empty result");
+ for (ColumnMeta col : rs.columns()) {
+ assertNull(col.type(), "no row means no inferable type");
+ }
+ }
+
+ @Test
+ void truncationAndHintMatchTheFilePath() throws IOException {
+ // Student has 6 rows; LIMIT 2 truncates.
+ QueryResultSet rs = run("SELECT * FROM Student LIMIT 2");
+ assertTrue(rs.truncated());
+ assertEquals(2, rs.rows().size());
+ assertEquals(QueryResult.truncated(2).hint(), rs.hint(),
+ "hint text must reuse the CLI path's wording verbatim");
+ }
+
+ @Test
+ void limitAtOrAboveSizeIsNotTruncated() {
+ QueryResultSet rs = run("SELECT * FROM Student LIMIT 100");
+ assertFalse(rs.truncated());
+ assertNull(rs.hint());
+ assertEquals(6, rs.rows().size());
+ }
+
+ @Test
+ void resultSetRowsMatchTheCsvFileBytesForSameQuery() throws IOException {
+ String sql = "SELECT Student.A, Student.D FROM Student WHERE Student.D > 30";
+
+ // File path
+ Path out = tempDir.resolve("out.csv");
+ Operator fileRoot = QueryPlanner.planSql(sql, QueryConfig.defaults()).root();
+ CuckooDB.execute(fileRoot, out.toString());
+ List fileLines = Files.readAllLines(out);
+ List fileDataLines = fileLines.subList(1, fileLines.size()); // drop header
+
+ // Result-set path
+ QueryResultSet rs = run(sql);
+ List rsDataLines = new ArrayList<>();
+ for (List row : rs.rows()) {
+ List fields = new ArrayList<>();
+ for (Value v : row) {
+ fields.add(v.toString());
+ }
+ rsDataLines.add(String.join(",", fields));
+ }
+
+ assertEquals(fileDataLines, rsDataLines,
+ "the two drain paths must produce identical row data");
+ }
+
+ /**
+ * Regression for the review's HIGH finding: a one-sided WHERE filter makes the optimizer
+ * push a selection under the join, so the join source is an intermediate (temp_) schema
+ * that carries both a base-qualified key (enrolled.a) and an internal temp_.a key at
+ * the same index. qualifiedName must report the real origin, never the internal id.
+ */
+ @Test
+ void qualifiedNameNeverLeaksInternalSchemaIdsAcrossPushdownJoin() {
+ QueryResultSet rs = run(
+ "SELECT * FROM Student, Enrolled WHERE Student.A = Enrolled.A AND Enrolled.H > 0");
+
+ List names = rs.columns().stream().map(ColumnMeta::name).toList();
+ assertEquals(List.of("a", "b", "c", "d", "a", "e", "h"), names);
+
+ List quals = rs.columns().stream().map(ColumnMeta::qualifiedName).toList();
+ for (String q : quals) {
+ assertFalse(q != null && q.startsWith("temp_"),
+ "qualifiedName must be a real table origin, not an internal id: " + quals);
+ }
+ assertEquals("student.a", rs.columns().get(0).qualifiedName());
+ assertEquals("enrolled.a", rs.columns().get(4).qualifiedName(),
+ "the pushed-down side still resolves to its base table origin");
+ }
+
+ @Test
+ void resultSetCollectionsAreUnmodifiable() {
+ // The record is a value handed to the REST layer; callers must not be able to mutate
+ // rows or columns after the fact.
+ QueryResultSet rs = run("SELECT * FROM Student");
+ assertThrows(UnsupportedOperationException.class, () -> rs.rows().add(List.of()));
+ assertThrows(UnsupportedOperationException.class,
+ () -> rs.columns().add(new ColumnMeta("x", null, null)));
+ }
+
+ @Test
+ void limitZeroIsEmptyButTruncated() {
+ // The one state where rows.isEmpty() coincides with truncated=true: LIMIT 0 over a
+ // non-empty source. Names survive, every type is null, and the truncation hint is set.
+ QueryResultSet rs = run("SELECT * FROM Student LIMIT 0");
+
+ assertTrue(rs.rows().isEmpty());
+ assertTrue(rs.truncated());
+ assertEquals(QueryResult.truncated(0).hint(), rs.hint());
+ List names = rs.columns().stream().map(ColumnMeta::name).toList();
+ assertEquals(List.of("a", "b", "c", "d"), names);
+ for (ColumnMeta col : rs.columns()) {
+ assertNull(col.type(), "no row means no inferable type");
+ }
+ }
+
+ @Test
+ void aggregateColumnsHaveNullQualifiedNameAndInferredTypes() {
+ // SUM(Typed.n) / MIN(Typed.s) keys contain a '.', so they pin the '(' exclusion that
+ // keeps aggregate columns out of qualifiedName; a no-group aggregate emits exactly one
+ // row, so first-row inference still types every column.
+ QueryResultSet rs = run("SELECT COUNT(*), SUM(Typed.n), MIN(Typed.s) FROM Typed");
+
+ assertEquals(1, rs.rows().size(), "a no-group aggregate emits exactly one row");
+ List names = rs.columns().stream().map(ColumnMeta::name).toList();
+ assertEquals(List.of("count(*)", "sum(typed.n)", "min(typed.s)"), names);
+ for (ColumnMeta col : rs.columns()) {
+ assertNull(col.qualifiedName(), "aggregate columns have no dotted origin: " + col.name());
+ }
+ assertEquals(ColumnType.INT, rs.columns().get(0).type(), "count(*) is INT");
+ assertEquals(ColumnType.INT, rs.columns().get(1).type(), "sum over int is INT");
+ assertEquals(ColumnType.STRING, rs.columns().get(2).type(), "min over string is STRING");
+ }
+}
diff --git a/engine/src/test/java/com/github/jinba1/cuckoodb/DBCatalogTest.java b/engine/src/test/java/com/github/jinba1/cuckoodb/DBCatalogTest.java
index f6bb940..07e1e76 100644
--- a/engine/src/test/java/com/github/jinba1/cuckoodb/DBCatalogTest.java
+++ b/engine/src/test/java/com/github/jinba1/cuckoodb/DBCatalogTest.java
@@ -247,4 +247,173 @@ public void loadedTableSchemaIsImmutable() throws IOException {
assertThrows(UnsupportedOperationException.class,
() -> DBCatalog.getInstance().getDBSchemata("T").put("c", 9));
}
+
+ // ---- E4: single TableMeta map (getTableMeta) + runtime registration (registerTable) ----
+
+ @Test
+ public void getTableMetaExposesPathSchemaAndTypesTogether() {
+ DBCatalog.initDBCatalog(SAMPLE_DB_DIR);
+ TableMeta meta = DBCatalog.getInstance().getTableMeta("Student");
+ assertNotNull(meta, "loaded table must have meta");
+ assertTrue(meta.path().toString().endsWith("Student.csv"));
+ assertEquals(DBCatalog.getInstance().getDBSchemata("Student"), meta.schema());
+ assertEquals(DBCatalog.getInstance().getColumnTypes("Student"), meta.types());
+ }
+
+ @Test
+ public void getTableMetaReturnsNullForMissing() {
+ DBCatalog.initDBCatalog(SAMPLE_DB_DIR);
+ assertNull(DBCatalog.getInstance().getTableMeta("NoSuchTable"));
+ }
+
+ @Test
+ public void registerTableMakesTableQueryable() throws IOException {
+ DBCatalog.resetDBCatalog();
+ DBCatalog catalog = DBCatalog.getInstance();
+ Path csv = tempDb.resolve("upload.csv");
+ Files.writeString(csv, "id,name\n1,alice\n2,bob\n");
+
+ boolean registered = catalog.registerTable("Uploaded", csv);
+
+ assertTrue(registered, "first registration wins");
+ assertTrue(catalog.tableExists("Uploaded"));
+ assertEquals(Map.of("id", 0, "name", 1), catalog.getDBSchemata("Uploaded"));
+ assertEquals(List.of(ColumnType.INT, ColumnType.STRING), catalog.getColumnTypes("Uploaded"));
+ assertEquals(csv, catalog.getDBLocation("Uploaded"));
+ }
+
+ @Test
+ public void registerTableDuplicateReturnsFalseAndKeepsOriginal() throws IOException {
+ DBCatalog.resetDBCatalog();
+ DBCatalog catalog = DBCatalog.getInstance();
+ Path first = tempDb.resolve("first.csv");
+ Files.writeString(first, "id,name\n1,alice\n");
+ Path second = tempDb.resolve("second.csv");
+ Files.writeString(second, "x,y,z\n1,2,3\n");
+
+ assertTrue(catalog.registerTable("T", first));
+ boolean again = catalog.registerTable("T", second);
+
+ assertFalse(again, "duplicate registration must report not-registered (409 signal)");
+ assertEquals(first, catalog.getDBLocation("T"), "original meta must be untouched");
+ assertEquals(Map.of("id", 0, "name", 1), catalog.getDBSchemata("T"));
+ }
+
+ @Test
+ public void registerTableRejectsBadCsvWithDataError() throws IOException {
+ DBCatalog.resetDBCatalog();
+ DBCatalog catalog = DBCatalog.getInstance();
+ Path csv = tempDb.resolve("ragged.csv");
+ Files.writeString(csv, "a,b\n1,2,3\n");
+
+ QueryExecutionException e = assertThrows(QueryExecutionException.class,
+ () -> catalog.registerTable("Bad", csv));
+ assertEquals(ErrorCode.DATA_ERROR, e.code());
+ assertFalse(catalog.tableExists("Bad"), "failed parse must not leave a partial table");
+ }
+
+ @Test
+ public void registeredTableSchemaIsImmutable() throws IOException {
+ DBCatalog.resetDBCatalog();
+ DBCatalog catalog = DBCatalog.getInstance();
+ Path csv = tempDb.resolve("u.csv");
+ Files.writeString(csv, "a,b\n1,2\n");
+ catalog.registerTable("U", csv);
+ assertThrows(UnsupportedOperationException.class,
+ () -> catalog.getDBSchemata("U").put("c", 9));
+ }
+
+ /**
+ * The D4 invariant under contention: while one thread registers a table, readers
+ * see it either fully absent or fully present — never a torn state (path without
+ * types, schema without path). A single putIfAbsent guarantees this.
+ */
+ @Test
+ public void registerTableIsAtomicAgainstConcurrentReaders() throws Exception {
+ DBCatalog.resetDBCatalog();
+ DBCatalog catalog = DBCatalog.getInstance();
+ Path csv = tempDb.resolve("race.csv");
+ Files.writeString(csv, "a,b\n1,2\n");
+
+ int readers = 12;
+ int iterationsPerReader = 5000;
+ var pool = java.util.concurrent.Executors.newFixedThreadPool(readers + 1);
+ try {
+ var barrier = new java.util.concurrent.CyclicBarrier(readers + 1);
+ var torn = new java.util.concurrent.atomic.AtomicReference();
+ var futures = new java.util.ArrayList>();
+ for (int i = 0; i < readers; i++) {
+ futures.add(pool.submit(() -> {
+ barrier.await();
+ for (int n = 0; n < iterationsPerReader; n++) {
+ TableMeta meta = catalog.getTableMeta("Race");
+ if (meta != null) {
+ // Present => every field must be present and consistent.
+ if (meta.path() == null || meta.schema() == null || meta.types() == null) {
+ torn.compareAndSet(null, "TableMeta exposed with a null field");
+ } else if (meta.schema().size() != meta.types().size()) {
+ torn.compareAndSet(null, "schema/types width mismatch");
+ }
+ }
+ }
+ return null;
+ }));
+ }
+ // Writer thread registers mid-flight.
+ futures.add(pool.submit(() -> {
+ barrier.await();
+ catalog.registerTable("Race", csv);
+ return null;
+ }));
+ for (var f : futures) f.get();
+ assertNull(torn.get(), torn.get());
+ assertTrue(catalog.tableExists("Race"), "table is present after the race");
+ } finally {
+ pool.shutdownNow();
+ }
+ }
+
+ /**
+ * The 409 contract PR 4b's upload path depends on: when N threads register the SAME name
+ * at once, exactly one putIfAbsent wins (returns true), the rest report false, and the
+ * winner's meta is installed intact — never a torn blend of two registrations.
+ */
+ @Test
+ public void registerTableHasExactlyOneWinnerAmongConcurrentSameNameRegisters() throws Exception {
+ DBCatalog.resetDBCatalog();
+ DBCatalog catalog = DBCatalog.getInstance();
+
+ int k = 16;
+ List csvs = new java.util.ArrayList<>();
+ for (int i = 0; i < k; i++) {
+ Path p = tempDb.resolve("w" + i + ".csv");
+ Files.writeString(p, "a,b\n" + i + "," + i + "\n");
+ csvs.add(p);
+ }
+
+ var pool = java.util.concurrent.Executors.newFixedThreadPool(k);
+ try {
+ var barrier = new java.util.concurrent.CyclicBarrier(k);
+ var winners = new java.util.concurrent.atomic.AtomicInteger();
+ var futures = new java.util.ArrayList>();
+ for (int i = 0; i < k; i++) {
+ Path csv = csvs.get(i);
+ futures.add(pool.submit(() -> {
+ barrier.await();
+ if (catalog.registerTable("Solo", csv)) {
+ winners.incrementAndGet();
+ }
+ return null;
+ }));
+ }
+ for (var f : futures) f.get();
+
+ assertEquals(1, winners.get(), "exactly one concurrent register may win");
+ assertTrue(catalog.tableExists("Solo"));
+ assertTrue(csvs.contains(catalog.getDBLocation("Solo")),
+ "the installed meta is one registration's, intact (not a torn blend)");
+ } finally {
+ pool.shutdownNow();
+ }
+ }
}
diff --git a/engine/src/test/java/com/github/jinba1/cuckoodb/QueryPlannerSqlTest.java b/engine/src/test/java/com/github/jinba1/cuckoodb/QueryPlannerSqlTest.java
new file mode 100644
index 0000000..fa16497
--- /dev/null
+++ b/engine/src/test/java/com/github/jinba1/cuckoodb/QueryPlannerSqlTest.java
@@ -0,0 +1,128 @@
+package com.github.jinba1.cuckoodb;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import com.github.jinba1.cuckoodb.operator.Operator;
+
+/**
+ * E1: the String-SQL planning entrypoint ({@link QueryPlanner#planSql}) must behave
+ * identically to the file overload — same plan, same EXPLAIN rendering, same error
+ * classification — because both delegate to one shared parsed-statement core. The
+ * file overload itself stays exercised unchanged by SampleQueryRunner / ExplainEndToEndTest.
+ */
+class QueryPlannerSqlTest {
+
+ private static final String SAMPLE_DB_DIR = "samples/db";
+
+ @TempDir
+ static Path tempDir;
+
+ @BeforeAll
+ static void initCatalog() {
+ DBCatalog.resetDBCatalog();
+ DBCatalog.initDBCatalog(SAMPLE_DB_DIR);
+ }
+
+ @AfterAll
+ static void resetCatalog() {
+ DBCatalog.resetDBCatalog();
+ }
+
+ /** Plans the same SQL from a string and from a file; the rendered trees must match. */
+ private void assertPlanParity(String sql) throws IOException {
+ QueryConfig cfg = QueryConfig.defaults();
+ Path file = tempDir.resolve("q.sql");
+ Files.writeString(file, sql);
+
+ PlannedQuery fromFile = QueryPlanner.planQuery(file.toString(), cfg);
+ PlannedQuery fromSql = QueryPlanner.planSql(sql, cfg);
+
+ assertEquals(PlanPrinter.print(fromFile.root()), PlanPrinter.print(fromSql.root()),
+ "string-source plan must render identically to file-source plan: " + sql);
+ assertEquals(fromFile.explainText(), fromSql.explainText(),
+ "EXPLAIN text must match across sources: " + sql);
+ }
+
+ @Test
+ void selectStarParity() throws IOException {
+ assertPlanParity("SELECT * FROM Student");
+ }
+
+ @Test
+ void projectionAndWhereParity() throws IOException {
+ assertPlanParity("SELECT Student.A, Student.D FROM Student WHERE Student.D > 30");
+ }
+
+ @Test
+ void joinParity() throws IOException {
+ assertPlanParity(
+ "SELECT Student.A, Enrolled.H FROM Student, Enrolled WHERE Student.A = Enrolled.A");
+ }
+
+ @Test
+ void explainParityRendersBeforeAndAfter() throws IOException {
+ PlannedQuery planned = QueryPlanner.planSql(
+ "EXPLAIN SELECT Student.A FROM Student WHERE Student.D > 30", QueryConfig.defaults());
+ assertNotNull(planned.explainText(), "EXPLAIN must populate explainText");
+ assertTrue(planned.explainText().contains("Plan (as written)"));
+ assertTrue(planned.explainText().contains("Plan (optimized)"));
+ }
+
+ @Test
+ void badSqlThrowsParseError() {
+ QueryExecutionException e = assertThrows(QueryExecutionException.class,
+ () -> QueryPlanner.planSql("SELECT FROM WHERE", QueryConfig.defaults()));
+ assertEquals(ErrorCode.PARSE_ERROR, e.code());
+ }
+
+ @Test
+ void nonSelectThrowsUnsupported() {
+ QueryExecutionException e = assertThrows(QueryExecutionException.class,
+ () -> QueryPlanner.planSql("DELETE FROM Student", QueryConfig.defaults()));
+ assertEquals(ErrorCode.UNSUPPORTED_SQL, e.code());
+ }
+
+ @Test
+ void trailingSemicolonIsAccepted() {
+ // A single statement with a trailing ';' is valid SQL — must plan, not error.
+ PlannedQuery planned = QueryPlanner.planSql("SELECT * FROM Student;", QueryConfig.defaults());
+ assertNotNull(planned.root());
+ assertNull(planned.explainText());
+ }
+
+ @Test
+ void nullSqlThrowsParseErrorNotNpe() {
+ // Every planSql failure must be a classified QueryExecutionException, never a raw NPE
+ // that the server would map to 500 instead of 400.
+ QueryExecutionException e = assertThrows(QueryExecutionException.class,
+ () -> QueryPlanner.planSql(null, QueryConfig.defaults()));
+ assertEquals(ErrorCode.PARSE_ERROR, e.code());
+ }
+
+ @Test
+ void blankSqlThrowsParseError() {
+ QueryExecutionException e = assertThrows(QueryExecutionException.class,
+ () -> QueryPlanner.planSql(" \n ", QueryConfig.defaults()));
+ assertEquals(ErrorCode.PARSE_ERROR, e.code());
+ }
+
+ @Test
+ void multipleStatementsAreRejected() {
+ // Read-only-by-construction: a second statement must never sneak through. JSqlParser's
+ // single-statement parse rejects it as a syntax error.
+ QueryExecutionException e = assertThrows(QueryExecutionException.class,
+ () -> QueryPlanner.planSql("SELECT * FROM Student; SELECT * FROM Course;",
+ QueryConfig.defaults()));
+ assertTrue(e.code() == ErrorCode.PARSE_ERROR || e.code() == ErrorCode.UNSUPPORTED_SQL,
+ "multi-statement input must be rejected, got " + e.code());
+ }
+}
diff --git a/engine/src/test/java/com/github/jinba1/cuckoodb/SampleQueryByteIdenticalTest.java b/engine/src/test/java/com/github/jinba1/cuckoodb/SampleQueryByteIdenticalTest.java
new file mode 100644
index 0000000..7449247
--- /dev/null
+++ b/engine/src/test/java/com/github/jinba1/cuckoodb/SampleQueryByteIdenticalTest.java
@@ -0,0 +1,94 @@
+package com.github.jinba1.cuckoodb;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Runs the packaged sample queries through the CLI path and asserts each output is
+ * byte-identical to the committed golden output — row ORDER preserved (no sort), with only
+ * line endings, per-line trailing whitespace, and trailing newlines normalized.
+ *
+ * This is the byte gate {@link SampleQueryRunner} performs as a manual {@code exec:exec}
+ * driver, now wired into {@code mvn test}/CI. The pre-existing {@code CuckooDBTest} sample
+ * check sorts and trims, so it cannot catch a row-ordering or separator regression — exactly
+ * what the {@code execute()}->{@code drain()} refactor must not introduce. This test does
+ * not sort, so it does.
+ */
+class SampleQueryByteIdenticalTest {
+
+ @TempDir
+ Path outDir;
+
+ @Test
+ void allSampleQueriesAreByteIdentical() throws IOException {
+ Path samples = Paths.get(System.getProperty("user.dir"), "samples");
+ Path dbDir = samples.resolve("db");
+ Path inputDir = samples.resolve("input");
+ Path expectedDir = samples.resolve("expected_output");
+
+ File[] queryFiles = inputDir.toFile().listFiles((d, n) -> n.endsWith(".sql"));
+ assertNotNull(queryFiles, "sample input directory must exist: " + inputDir);
+ assertTrue(queryFiles.length >= 20,
+ "expected at least 20 sample queries, found " + queryFiles.length);
+ Arrays.sort(queryFiles);
+
+ List failures = new ArrayList<>();
+ for (File queryFile : queryFiles) {
+ String name = queryFile.getName();
+ String base = name.substring(0, name.length() - 4);
+ Path expectedFile = expectedDir.resolve(base + ".csv");
+ if (!Files.exists(expectedFile)) {
+ continue; // no golden output committed for this query
+ }
+ Path outputFile = outDir.resolve(base + ".csv");
+
+ DBCatalog.resetDBCatalog();
+ DBCatalog.initDBCatalog(dbDir.toString());
+ PlannedQuery planned = QueryPlanner.planQuery(queryFile.getAbsolutePath());
+ if (planned.explainText() != null) {
+ Files.writeString(outputFile, planned.explainText());
+ } else {
+ CuckooDB.execute(planned.root(), outputFile.toString());
+ }
+
+ String actual = normalize(Files.readString(outputFile));
+ String expected = normalize(Files.readString(expectedFile));
+ if (!actual.equals(expected)) {
+ failures.add(base);
+ }
+ }
+ assertTrue(failures.isEmpty(), "byte-identical mismatch against golden output in: " + failures);
+ }
+
+ /**
+ * Mirrors {@link SampleQueryRunner}'s normalization: {@code \n} line endings, per-line
+ * trailing-whitespace strip, no trailing newline. Crucially does NOT sort rows, so a
+ * row-ordering regression in the CLI path fails this comparison.
+ */
+ private static String normalize(String content) {
+ String normalized = content.replace("\r\n", "\n").replace("\r", "\n");
+ String[] lines = normalized.split("\n", -1);
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < lines.length; i++) {
+ if (i > 0) {
+ sb.append("\n");
+ }
+ sb.append(lines[i].stripTrailing());
+ }
+ while (sb.length() > 0 && sb.charAt(sb.length() - 1) == '\n') {
+ sb.deleteCharAt(sb.length() - 1);
+ }
+ return sb.toString();
+ }
+}