feat!: enforce unique headers by default

osiegmar · osiegmar · commit 5ae17f5b2ae4 · 2025-05-26T06:16:42.000+02:00
FastCSV now rejects duplicate headers by default to prevent data misinterpretation. A new `allowDuplicateHeader` option was introduced, allowing users to override this behavior if needed.
diff --git a/docs/src/content/docs/architecture/interpretation.md b/docs/src/content/docs/architecture/interpretation.md
@@ -146,13 +146,14 @@ header_a,header_aCRLF
 value_1,value_2CRLF
 ```
 
-The `NamedCsvRecord` of FastCSV offers several options to handle this case:
+The `NamedCsvRecord` class in FastCSV offers several options to handle this scenario:
 
-- `getField("header_a")`, `findField("header_a")` and `getFieldsAsMap()` returns only the **first** value (`"value_1"`).
-- `findFields("header_a")` and `getFieldsAsMapList()` returns a List containing **all** values (`"value_1"`
-  and `"value_2"`).
+- By default, FastCSV does **not** allow duplicate headers to prevent misinterpretation of data.
+  This behavior can be changed by calling `allowDuplicateHeader(true)` on the `NamedCsvRecordHandlerBuilder`.
+- Methods like `getField("header_a")`, `findField("header_a")`, and `getFieldsAsMap()` return only the **first** value (`"value_1"`).
+- Methods like `findFields("header_a")` and `getFieldsAsMapList()` return a list containing **all** values (`"value_1"` and `"value_2"`).
 
-Regardless of the chosen option, FastCSV always handles the header as case-sensitive.
+Regardless of the option chosen, FastCSV always treats headers as case-sensitive.
 
 ### Spaces within fields
 
diff --git a/docs/src/content/docs/guides/upgrading.md b/docs/src/content/docs/guides/upgrading.md
@@ -13,6 +13,23 @@ For a full list of changes, including new features, see the [changelog](https://
 - The minimum Java version has been raised from 11 to 17
 - This also raised the required Android API level from version 33 (Android 13) to 34 (Android 14)
 
+## Duplicate header handling
+
+FastCSV 4.x rejects duplicate headers by default, ensuring that each header field is unique and preventing misinterpretation.
+
+You can change this behavior by calling `allowDuplicateHeader(true)` on the `NamedCsvRecordHandlerBuilder`.
+
+```java title="Example"
+var rh = NamedCsvRecordHandler.of(c -> c.allowDuplicateHeader(true));
+try (CsvReader<NamedCsvRecord> csv = CsvReader.builder().build(rh, csvFile)) {
+    // ...
+}
+```
+
+:::caution
+As the default has changed, you may need to check your code and your desired behavior.
+:::
+
 ## Ignoring different field counts
 
 FastCSV 4.x no longer ignores different field counts by default, ensuring that data is not misinterpreted.
diff --git a/docs/src/content/docs/index.mdx b/docs/src/content/docs/index.mdx
@@ -70,6 +70,7 @@ The main features of FastCSV include:
 - Supports single and multi-character field separators
 - Supports trimming of whitespaces around quoted fields
 - Supports optional header records (access fields by name)
+- Supports for duplicate header names
 - Supports skipping empty lines
 - Supports skipping non-CSV header (either by a fixed number of lines or by peeking data)
 - Supports commented lines (skipping & reading) with configurable comment character
diff --git a/lib/src/intTest/java/blackbox/reader/NamedCsvReaderTest.java b/lib/src/intTest/java/blackbox/reader/NamedCsvReaderTest.java
@@ -53,7 +53,9 @@ void findFieldByName() {
 
     @Test
     void findFieldsByName() {
-        assertThat(parse("foo,xoo,foo\nbar,moo,baz").stream())
+        final var cbh = NamedCsvRecordHandler
+            .of(c -> c.allowDuplicateHeader(true));
+        assertThat(CsvReader.builder().build(cbh, "foo,xoo,foo\nbar,moo,baz").stream())
             .singleElement(NamedCsvRecordAssert.NAMED_CSV_RECORD)
             .findFields("foo").containsExactly("bar", "baz");
     }
@@ -86,26 +88,30 @@ void findNonExistingFieldByName2() {
 
     @Test
     void headerToString() {
-        assertThat(parse("headerA,headerB,headerA\nfieldA,fieldB,fieldC\n").stream())
+        assertThat(parse("headerA,headerB,headerC\nfieldA,fieldB,fieldC\n").stream())
             .singleElement()
             .asString()
             .isEqualTo("NamedCsvRecord[startingLineNumber=2, "
                 + "fields=[fieldA, fieldB, fieldC], "
                 + "comment=false, "
-                + "header=[headerA, headerB, headerA]]");
+                + "header=[headerA, headerB, headerC]]");
     }
 
     @Test
     void fieldMap() {
-        assertThat(parse("headerA,headerB,headerA\nfieldA,fieldB,fieldC\n").stream())
+        final var cbh = NamedCsvRecordHandler
+            .of(c -> c.allowDuplicateHeader(true));
+        assertThat(CsvReader.builder().build(cbh, "headerA,headerB,headerA\nfieldA,fieldB,fieldC\n").stream())
             .singleElement(NamedCsvRecordAssert.NAMED_CSV_RECORD)
             .fields()
             .containsExactly(entry("headerA", "fieldA"), entry("headerB", "fieldB"));
     }
 
     @Test
     void allFieldsMap() {
-        assertThat(parse("headerA,headerB,headerA\nfieldA,fieldB,fieldC\n").stream())
+        final var cbh = NamedCsvRecordHandler
+            .of(c -> c.allowDuplicateHeader(true));
+        assertThat(CsvReader.builder().build(cbh, "headerA,headerB,headerA\nfieldA,fieldB,fieldC\n").stream())
             .singleElement(NamedCsvRecordAssert.NAMED_CSV_RECORD)
             .allFields()
             .containsOnly(entry("headerA", List.of("fieldA", "fieldC")), entry("headerB", List.of("fieldB")));
diff --git a/lib/src/intTest/java/blackbox/reader/NamedCsvRecordHandlerTest.java b/lib/src/intTest/java/blackbox/reader/NamedCsvRecordHandlerTest.java
@@ -1,11 +1,13 @@
 package blackbox.reader;
 
 import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
 
 import java.util.Map;
 
 import org.junit.jupiter.api.Test;
 
+import de.siegmar.fastcsv.reader.CsvParseException;
 import de.siegmar.fastcsv.reader.CsvReader;
 import de.siegmar.fastcsv.reader.FieldModifiers;
 import de.siegmar.fastcsv.reader.NamedCsvRecordHandler;
@@ -56,4 +58,20 @@ void consumer() {
             .fields().containsExactly(Map.entry("col1", "foo"), Map.entry("col2", "bar"));
     }
 
+    @Test
+    void noDuplicateHeaderInit() {
+        assertThatThrownBy(() -> NamedCsvRecordHandler.of(c -> c.header("col1", "col2", "col1")))
+            .isInstanceOf(IllegalArgumentException.class)
+            .hasMessage("Header contains duplicate fields: [col1]");
+    }
+
+    @Test
+    void noDuplicateHeaderData() {
+        assertThatThrownBy(() -> CsvReader.builder().ofNamedCsvRecord("col1,col2,col1").stream().count())
+            .isInstanceOf(CsvParseException.class)
+            .hasMessage("Exception when reading first record")
+            .hasRootCauseExactlyInstanceOf(IllegalArgumentException.class)
+            .hasRootCauseMessage("Header contains duplicate fields: [col1]");
+    }
+
 }
diff --git a/lib/src/main/java/de/siegmar/fastcsv/reader/NamedCsvRecordHandler.java b/lib/src/main/java/de/siegmar/fastcsv/reader/NamedCsvRecordHandler.java
@@ -1,5 +1,7 @@
 package de.siegmar.fastcsv.reader;
 
+import java.util.HashSet;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Objects;
 import java.util.function.Consumer;
@@ -18,11 +20,14 @@
 public final class NamedCsvRecordHandler extends AbstractInternalCsvCallbackHandler<NamedCsvRecord> {
 
     private static final String[] EMPTY_HEADER = new String[0];
+    private final boolean allowDuplicateHeader;
     private String[] header;
 
     private NamedCsvRecordHandler(final int maxFields, final int maxFieldSize, final int maxRecordSize,
-                                  final FieldModifier fieldModifier, final List<String> header) {
+                                  final FieldModifier fieldModifier,
+                                  final boolean allowDuplicateHeader, final List<String> header) {
         super(maxFields, maxFieldSize, maxRecordSize, fieldModifier);
+        this.allowDuplicateHeader = allowDuplicateHeader;
         if (header != null) {
             setHeader(header.toArray(new String[0]));
         }
@@ -50,7 +55,7 @@ public static NamedCsvRecordHandler of() {
     ///
     /// @param configurer the configuration, must not be `null`
     /// @return the new instance
-    /// @throws NullPointerException if `null` is passed
+    /// @throws NullPointerException     if `null` is passed
     /// @throws IllegalArgumentException if argument constraints are violated
     /// @see #builder()
     public static NamedCsvRecordHandler of(final Consumer<NamedCsvRecordHandlerBuilder> configurer) {
@@ -60,14 +65,36 @@ public static NamedCsvRecordHandler of(final Consumer<NamedCsvRecordHandlerBuild
         return builder.build();
     }
 
-    private void setHeader(final String... header) {
+    @SuppressWarnings("PMD.UseVarargs")
+    private void setHeader(final String[] header) {
         Objects.requireNonNull(header, "header must not be null");
         for (final String h : header) {
             Objects.requireNonNull(h, "header element must not be null");
         }
+
+        if (!allowDuplicateHeader) {
+            checkForDuplicates(header);
+        }
+
         this.header = header.clone();
     }
 
+    @SuppressWarnings("PMD.UseVarargs")
+    private static void checkForDuplicates(final String[] header) {
+        final var duplicateHeaders = new LinkedHashSet<String>();
+        final var seen = new HashSet<String>();
+        for (final String h : header) {
+            if (!seen.add(h)) {
+                duplicateHeaders.add(h);
+            }
+        }
+
+        if (!duplicateHeaders.isEmpty()) {
+            throw new IllegalArgumentException("Header contains duplicate fields: "
+                + duplicateHeaders);
+        }
+    }
+
     @Override
     protected NamedCsvRecord buildRecord() {
         if (comment) {
@@ -83,15 +110,29 @@ protected NamedCsvRecord buildRecord() {
     }
 
     /// A builder for [NamedCsvRecordHandler].
-    @SuppressWarnings("PMD.AvoidFieldNameMatchingMethodName")
+    @SuppressWarnings({"checkstyle:HiddenField", "PMD.AvoidFieldNameMatchingMethodName"})
     public static final class NamedCsvRecordHandlerBuilder
         extends AbstractInternalCsvCallbackHandlerBuilder<NamedCsvRecordHandlerBuilder> {
 
+        private boolean allowDuplicateHeader;
         private List<String> header;
 
         private NamedCsvRecordHandlerBuilder() {
         }
 
+        /// Sets whether duplicate header fields are allowed.
+        ///
+        /// When set to `false`, an [IllegalArgumentException] is thrown if the header contains duplicate fields.
+        /// When set to `true`, duplicate fields are allowed. See [NamedCsvRecord] for details on how duplicate
+        /// headers are handled.
+        ///
+        /// @param allowDuplicateHeader whether duplicate header fields are allowed (default: `false`)
+        /// @return This updated object, allowing additional method calls to be chained together.
+        public NamedCsvRecordHandlerBuilder allowDuplicateHeader(final boolean allowDuplicateHeader) {
+            this.allowDuplicateHeader = allowDuplicateHeader;
+            return this;
+        }
+
         /// Sets a predefined header.
         ///
         /// When not set, the header is taken from the first record (that is not a comment).
@@ -133,7 +174,8 @@ protected NamedCsvRecordHandlerBuilder self() {
         /// @throws IllegalArgumentException if argument constraints are violated
         ///     (see [AbstractInternalCsvCallbackHandler])
         public NamedCsvRecordHandler build() {
-            return new NamedCsvRecordHandler(maxFields, maxFieldSize, maxRecordSize, fieldModifier, header);
+            return new NamedCsvRecordHandler(maxFields, maxFieldSize, maxRecordSize, fieldModifier,
+                allowDuplicateHeader, header);
         }
 
     }