@@ -248,7 +248,32 @@ impl TestData {
248
248
}
249
249
}
250
250
251
- /// Sets up test context and tables for both single and multiple partition scenarios
251
+ /// ## Test Helper Function Guide
252
+ ///
253
+ /// This file provides several helper functions to reduce repetitive test patterns:
254
+ ///
255
+ /// ### For tests using TestData:
256
+ /// - `run_complete_snapshot_test()` - Complete test: setup → SQL → assert_snapshot!
257
+ /// - `run_complete_sorted_snapshot_test()` - Same as above but with sorted output
258
+ /// - `run_snapshot_test()` - Setup and execute, returns results for custom assertions
259
+ ///
260
+ /// ### For custom setups:
261
+ /// - `run_simple_snapshot_test()` - Execute SQL on existing context, returns results
262
+ ///
263
+ /// ### Supporting functions:
264
+ /// - `create_test_dict()` - Create dictionary arrays with slices (preferred)
265
+ /// - `setup_test_contexts()` - Setup single and multi-partition contexts
266
+ /// - `test_query_consistency()` - Execute and verify consistency across partitions
267
+ ///
268
+ /// ### Usage examples:
269
+ /// ```rust
270
+ /// // Simple complete test
271
+ /// run_complete_snapshot_test(&test_data, "SELECT * FROM t", @"expected output").await?;
272
+ ///
273
+ /// // Multiple tests with different data
274
+ /// let results = run_snapshot_test(&test_data, "SELECT * FROM t", false).await?;
275
+ /// assert_snapshot!(batches_to_string(&results), @"expected");
276
+ /// ```
252
277
async fn setup_test_contexts (
253
278
test_data : & TestData ,
254
279
) -> Result < ( SessionContext , SessionContext ) > {
@@ -762,11 +787,8 @@ async fn test_aggregates_null_handling_comprehensive() -> Result<()> {
762
787
let test_data_median = TestData :: new_for_median ( ) ;
763
788
764
789
// Test COUNT null exclusion with basic data
765
- let ( ctx_single, ctx_multi) = setup_test_contexts ( & test_data_basic) . await ?;
766
-
767
790
let sql_count = "SELECT dict_null_keys, COUNT(value) as cnt FROM t GROUP BY dict_null_keys ORDER BY dict_null_keys NULLS FIRST" ;
768
- let results_count =
769
- test_query_consistency ( & ctx_single, & ctx_multi, sql_count) . await ?;
791
+ let results_count = run_snapshot_test ( & test_data_basic, sql_count, false ) . await ?;
770
792
771
793
assert_snapshot ! (
772
794
batches_to_string( & results_count) ,
@@ -782,12 +804,8 @@ async fn test_aggregates_null_handling_comprehensive() -> Result<()> {
782
804
) ;
783
805
784
806
// Test SUM null handling with extended data
785
- let ( ctx_single_sum, ctx_multi_sum) =
786
- setup_test_contexts ( & test_data_extended) . await ?;
787
-
788
807
let sql_sum = "SELECT dict_null_vals, SUM(value) as total FROM t GROUP BY dict_null_vals ORDER BY dict_null_vals NULLS FIRST" ;
789
- let results_sum =
790
- test_query_consistency ( & ctx_single_sum, & ctx_multi_sum, sql_sum) . await ?;
808
+ let results_sum = run_snapshot_test ( & test_data_extended, sql_sum, false ) . await ?;
791
809
792
810
assert_snapshot ! (
793
811
batches_to_string( & results_sum) ,
@@ -804,11 +822,8 @@ async fn test_aggregates_null_handling_comprehensive() -> Result<()> {
804
822
) ;
805
823
806
824
// Test MIN null handling with min/max data
807
- let ( ctx_single_min, ctx_multi_min) = setup_test_contexts ( & test_data_min_max) . await ?;
808
-
809
825
let sql_min = "SELECT dict_null_keys, MIN(value) as minimum FROM t GROUP BY dict_null_keys ORDER BY dict_null_keys NULLS FIRST" ;
810
- let results_min =
811
- test_query_consistency ( & ctx_single_min, & ctx_multi_min, sql_min) . await ?;
826
+ let results_min = run_snapshot_test ( & test_data_min_max, sql_min, false ) . await ?;
812
827
813
828
assert_snapshot ! (
814
829
batches_to_string( & results_min) ,
@@ -825,12 +840,8 @@ async fn test_aggregates_null_handling_comprehensive() -> Result<()> {
825
840
) ;
826
841
827
842
// Test MEDIAN null handling with median data
828
- let ( ctx_single_median, ctx_multi_median) =
829
- setup_test_contexts ( & test_data_median) . await ?;
830
-
831
843
let sql_median = "SELECT dict_null_vals, MEDIAN(value) as median_value FROM t GROUP BY dict_null_vals ORDER BY dict_null_vals NULLS FIRST" ;
832
- let results_median =
833
- test_query_consistency ( & ctx_single_median, & ctx_multi_median, sql_median) . await ?;
844
+ let results_median = run_snapshot_test ( & test_data_median, sql_median, false ) . await ?;
834
845
835
846
assert_snapshot ! (
836
847
batches_to_string( & results_median) ,
@@ -852,12 +863,11 @@ async fn test_aggregates_null_handling_comprehensive() -> Result<()> {
852
863
#[ tokio:: test]
853
864
async fn test_first_last_val_null_handling ( ) -> Result < ( ) > {
854
865
let test_data = TestData :: new_for_first_last ( ) ;
855
- let ( ctx_single, ctx_multi) = setup_test_contexts ( & test_data) . await ?;
856
866
857
867
// Test FIRST_VALUE and LAST_VALUE with window functions over groups
858
868
let sql = "SELECT dict_null_keys, value, FIRST_VALUE(value) OVER (PARTITION BY dict_null_keys ORDER BY value NULLS FIRST) as first_val, LAST_VALUE(value) OVER (PARTITION BY dict_null_keys ORDER BY value NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as last_val FROM t ORDER BY dict_null_keys NULLS FIRST, value NULLS FIRST" ;
859
869
860
- let results_single = test_query_consistency ( & ctx_single , & ctx_multi , sql ) . await ?;
870
+ let results_single = run_snapshot_test ( & test_data , sql , false ) . await ?;
861
871
862
872
assert_snapshot ! ( batches_to_string( & results_single) , @r"
863
873
+----------------+-------+-----------+----------+
@@ -1142,7 +1152,6 @@ async fn create_fuzz_context_with_partitions(
1142
1152
1143
1153
Ok ( ctx)
1144
1154
}
1145
-
1146
1155
/// Splits fuzz test data into multiple batches for partitioning
1147
1156
fn split_fuzz_data_into_batches (
1148
1157
test_data : & FuzzTestData ,
@@ -1213,161 +1222,11 @@ async fn test_max_with_fuzz_table_dict_nulls() -> Result<()> {
1213
1222
| 3 | | str_e | str_e | |
1214
1223
| 3 | dict_c | str_f | str_f | value_6 |
1215
1224
+--------+---------------------+----------+-------+---------+
1216
- "
1217
- ) ;
1225
+ " ) ;
1218
1226
1219
1227
Ok ( ( ) )
1220
1228
}
1221
1229
1222
- /// Test data structure for fuzz table with timestamp and dictionary columns containing nulls
1223
- struct FuzzTimestampTestData {
1224
- schema : Arc < Schema > ,
1225
- utf8_low : StringArray ,
1226
- u8_low : UInt8Array ,
1227
- dictionary_utf8_low : DictionaryArray < UInt32Type > ,
1228
- timestamp_us : TimestampMicrosecondArray ,
1229
- }
1230
-
1231
- impl FuzzTimestampTestData {
1232
- fn new ( ) -> Self {
1233
- // Create dictionary columns with null keys and values
1234
- let dictionary_utf8_low = create_test_dict (
1235
- & [ Some ( "dict_x" ) , None , Some ( "dict_y" ) , Some ( "dict_z" ) ] ,
1236
- & [
1237
- Some ( 0 ) , // dict_x
1238
- Some ( 1 ) , // null value
1239
- Some ( 2 ) , // dict_y
1240
- None , // null key
1241
- Some ( 0 ) , // dict_x
1242
- Some ( 1 ) , // null value
1243
- Some ( 3 ) , // dict_z
1244
- None , // null key
1245
- Some ( 2 ) , // dict_y
1246
- ] ,
1247
- ) ;
1248
-
1249
- let utf8_low = StringArray :: from ( vec ! [
1250
- Some ( "alpha" ) ,
1251
- Some ( "beta" ) ,
1252
- Some ( "gamma" ) ,
1253
- Some ( "delta" ) ,
1254
- Some ( "alpha" ) ,
1255
- Some ( "epsilon" ) ,
1256
- Some ( "zeta" ) ,
1257
- Some ( "delta" ) ,
1258
- Some ( "gamma" ) ,
1259
- ] ) ;
1260
-
1261
- let u8_low = UInt8Array :: from ( vec ! [
1262
- Some ( 10 ) ,
1263
- Some ( 20 ) ,
1264
- Some ( 30 ) ,
1265
- Some ( 20 ) ,
1266
- Some ( 10 ) ,
1267
- Some ( 40 ) ,
1268
- Some ( 30 ) ,
1269
- Some ( 20 ) ,
1270
- Some ( 30 ) ,
1271
- ] ) ;
1272
-
1273
- // Create timestamp data with some nulls
1274
- let timestamp_us = TimestampMicrosecondArray :: from ( vec ! [
1275
- Some ( 1000000 ) , // 1970-01-01 00:00:01
1276
- Some ( 2000000 ) , // 1970-01-01 00:00:02
1277
- Some ( 3000000 ) , // 1970-01-01 00:00:03
1278
- None , // null timestamp
1279
- Some ( 1500000 ) , // 1970-01-01 00:00:01.5
1280
- Some ( 4000000 ) , // 1970-01-01 00:00:04
1281
- Some ( 2500000 ) , // 1970-01-01 00:00:02.5
1282
- Some ( 3500000 ) , // 1970-01-01 00:00:03.5
1283
- Some ( 2800000 ) , // 1970-01-01 00:00:02.8
1284
- ] ) ;
1285
-
1286
- let schema = Arc :: new ( Schema :: new ( vec ! [
1287
- Field :: new( "utf8_low" , DataType :: Utf8 , true ) ,
1288
- Field :: new( "u8_low" , DataType :: UInt8 , true ) ,
1289
- Field :: new( "dictionary_utf8_low" , string_dict_type( ) , true ) ,
1290
- Field :: new(
1291
- "timestamp_us" ,
1292
- DataType :: Timestamp ( TimeUnit :: Microsecond , None ) ,
1293
- true ,
1294
- ) ,
1295
- ] ) ) ;
1296
-
1297
- Self {
1298
- schema,
1299
- utf8_low,
1300
- u8_low,
1301
- dictionary_utf8_low,
1302
- timestamp_us,
1303
- }
1304
- }
1305
- }
1306
-
1307
- /// Sets up test contexts for fuzz table with timestamps and both single and multiple partitions
1308
- async fn setup_fuzz_timestamp_test_contexts ( ) -> Result < ( SessionContext , SessionContext ) >
1309
- {
1310
- let test_data = FuzzTimestampTestData :: new ( ) ;
1311
-
1312
- // Single partition context
1313
- let ctx_single = create_fuzz_timestamp_context_with_partitions ( & test_data, 1 ) . await ?;
1314
-
1315
- // Multiple partition context
1316
- let ctx_multi = create_fuzz_timestamp_context_with_partitions ( & test_data, 3 ) . await ?;
1317
-
1318
- Ok ( ( ctx_single, ctx_multi) )
1319
- }
1320
-
1321
- /// Creates a session context with fuzz timestamp table partitioned into specified number of partitions
1322
- async fn create_fuzz_timestamp_context_with_partitions (
1323
- test_data : & FuzzTimestampTestData ,
1324
- num_partitions : usize ,
1325
- ) -> Result < SessionContext > {
1326
- let ctx = SessionContext :: new_with_config (
1327
- SessionConfig :: new ( ) . with_target_partitions ( num_partitions) ,
1328
- ) ;
1329
-
1330
- let batches = split_fuzz_timestamp_data_into_batches ( test_data, num_partitions) ?;
1331
- let provider = MemTable :: try_new ( test_data. schema . clone ( ) , batches) ?;
1332
- ctx. register_table ( "fuzz_table" , Arc :: new ( provider) ) ?;
1333
-
1334
- Ok ( ctx)
1335
- }
1336
-
1337
- /// Splits fuzz timestamp test data into multiple batches for partitioning
1338
- fn split_fuzz_timestamp_data_into_batches (
1339
- test_data : & FuzzTimestampTestData ,
1340
- num_partitions : usize ,
1341
- ) -> Result < Vec < Vec < RecordBatch > > > {
1342
- debug_assert ! ( num_partitions > 0 , "num_partitions must be greater than 0" ) ;
1343
- let total_len = test_data. utf8_low . len ( ) ;
1344
- let chunk_size = total_len. div_ceil ( num_partitions) ;
1345
-
1346
- let mut batches = Vec :: new ( ) ;
1347
- let mut start = 0 ;
1348
-
1349
- while start < total_len {
1350
- let end = min ( start + chunk_size, total_len) ;
1351
- let len = end - start;
1352
-
1353
- if len > 0 {
1354
- let batch = RecordBatch :: try_new (
1355
- test_data. schema . clone ( ) ,
1356
- vec ! [
1357
- Arc :: new( test_data. utf8_low. slice( start, len) ) ,
1358
- Arc :: new( test_data. u8_low. slice( start, len) ) ,
1359
- Arc :: new( test_data. dictionary_utf8_low. slice( start, len) ) ,
1360
- Arc :: new( test_data. timestamp_us. slice( start, len) ) ,
1361
- ] ,
1362
- ) ?;
1363
- batches. push ( vec ! [ batch] ) ;
1364
- }
1365
- start = end;
1366
- }
1367
-
1368
- Ok ( batches)
1369
- }
1370
-
1371
1230
/// Test MIN with fuzz table containing dictionary columns with null keys and values and timestamp data (single and multiple partitions)
1372
1231
#[ tokio:: test]
1373
1232
async fn test_min_timestamp_with_fuzz_table_dict_nulls ( ) -> Result < ( ) > {
@@ -1547,7 +1406,6 @@ async fn create_fuzz_count_context_with_partitions(
1547
1406
1548
1407
Ok ( ctx)
1549
1408
}
1550
-
1551
1409
/// Splits fuzz count test data into multiple batches for partitioning
1552
1410
fn split_fuzz_count_data_into_batches (
1553
1411
test_data : & FuzzCountTestData ,
@@ -1791,7 +1649,6 @@ async fn create_fuzz_median_context_with_partitions(
1791
1649
1792
1650
Ok ( ctx)
1793
1651
}
1794
-
1795
1652
/// Splits fuzz median test data into multiple batches for partitioning
1796
1653
fn split_fuzz_median_data_into_batches (
1797
1654
test_data : & FuzzMedianTestData ,
@@ -1869,3 +1726,52 @@ async fn test_median_distinct_with_fuzz_table_dict_nulls() -> Result<()> {
1869
1726
1870
1727
Ok ( ( ) )
1871
1728
}
1729
+
1730
+ /// Helper function to run snapshot tests with consistent setup, execution, and assertion
1731
+ /// This reduces the repetitive pattern of "setup data → SQL → assert_snapshot!"
1732
+ async fn run_snapshot_test (
1733
+ test_data : & TestData ,
1734
+ sql : & str ,
1735
+ use_sorted_output : bool ,
1736
+ ) -> Result < Vec < RecordBatch > > {
1737
+ let ( ctx_single, ctx_multi) = setup_test_contexts ( test_data) . await ?;
1738
+ let results = test_query_consistency ( & ctx_single, & ctx_multi, sql) . await ?;
1739
+ Ok ( results)
1740
+ }
1741
+
1742
+ /// Helper function for simpler snapshot tests that only need single-partition execution
1743
+ async fn run_simple_snapshot_test (
1744
+ ctx : & SessionContext ,
1745
+ sql : & str ,
1746
+ ) -> Result < Vec < RecordBatch > > {
1747
+ let df = ctx. sql ( sql) . await ?;
1748
+ let results = df. collect ( ) . await ?;
1749
+ Ok ( results)
1750
+ }
1751
+
1752
+ /// Helper function to run a complete snapshot test with TestData
1753
+ /// This fully encapsulates the "setup data → SQL → assert_snapshot!" pattern
1754
+ async fn run_complete_snapshot_test (
1755
+ test_data : & TestData ,
1756
+ sql : & str ,
1757
+ expected_snapshot : & str ,
1758
+ ) -> Result < ( ) > {
1759
+ let results = run_snapshot_test ( test_data, sql, false ) . await ?;
1760
+
1761
+ assert_snapshot ! ( batches_to_string( & results) , expected_snapshot) ;
1762
+
1763
+ Ok ( ( ) )
1764
+ }
1765
+
1766
+ /// Helper function to run a complete snapshot test with sorted output
1767
+ async fn run_complete_sorted_snapshot_test (
1768
+ test_data : & TestData ,
1769
+ sql : & str ,
1770
+ expected_snapshot : & str ,
1771
+ ) -> Result < ( ) > {
1772
+ let results = run_snapshot_test ( test_data, sql, true ) . await ?;
1773
+
1774
+ assert_snapshot ! ( batches_to_sort_string( & results) , expected_snapshot) ;
1775
+
1776
+ Ok ( ( ) )
1777
+ }
0 commit comments