Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ protected void map(LongWritable key, Text value, Context context)
RECORD record = null;
try {
record = getLineParser().parse(value.toString());
} catch (IOException e) {
} catch (IOException | RuntimeException e) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can improve, this was a hack in commons-csv-1.0 https://github.com/apache/commons-csv/blob/4ef853bc13d3548ce9cae02c7c26ddbc1c790669/src/main/java/org/apache/commons/csv/CSVParser.java#L398

We could upgrade to newer one, where there is better error management!

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @NihalJain thanks for suggesting this yeah i agree we have to upgrade from commons-csv.
will work on this !!

context.getCounter(COUNTER_GROUP_NAME, "Parser errors").increment(1L);
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
package org.apache.phoenix.mapreduce;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.IOException;
import org.apache.commons.csv.CSVRecord;
Expand Down Expand Up @@ -49,4 +52,87 @@ public void testCsvLineParserWithQuoting() throws IOException {
assertTrue(parsed.isConsistent());
assertEquals(1, parsed.getRecordNumber());
}

/**
* Test that a normal CSV line parses successfully.
*/
@Test
public void testParseValidLine() throws IOException {
CsvToKeyValueMapper.CsvLineParser parser =
new CsvToKeyValueMapper.CsvLineParser(',', '"', '\\');
CSVRecord record = parser.parse("value1,value2,value3");
assertNotNull("Valid CSV line should parse to a non-null record", record);
}

/**
* Test that an empty line returns null (due to ignoreEmptyLines).
*/
@Test
public void testParseEmptyLine() throws IOException {
CsvToKeyValueMapper.CsvLineParser parser =
new CsvToKeyValueMapper.CsvLineParser(',', '"', '\\');
CSVRecord record = parser.parse("");
assertNull("Empty line should return null", record);
}

/**
* PHOENIX-7267: Verifies that commons-csv throws RuntimeException (not IOException)
* for malformed CSV input with unclosed quotes.
* <p>
* This is the root cause of the bug: the CSVParser iterator wraps IOException in a
* RuntimeException, which was not caught by FormatToBytesWritableMapper.map().
* The fix adds RuntimeException to the catch block.
*/
@Test
public void testParseMalformedLineThrowsRuntimeException() {
CsvToKeyValueMapper.CsvLineParser parser =
new CsvToKeyValueMapper.CsvLineParser(',', '"', null);

// This input has an opening quote that is never closed — triggers
// "EOF reached before encapsulated token finished" in commons-csv
String malformedInput = "value1,\"unclosed value";

try {
parser.parse(malformedInput);
fail("Expected an exception for malformed CSV input with unclosed quote");
} catch (IOException e) {
fail("Got IOException directly — commons-csv wraps this in RuntimeException");
} catch (RuntimeException e) {
// commons-csv's CSVParser iterator wraps IOException in RuntimeException.
// The fix in FormatToBytesWritableMapper.map() now catches this.
assertTrue("RuntimeException should wrap an IOException",
e.getCause() instanceof IOException);
assertTrue("Should contain EOF error message",
e.getMessage().contains("EOF reached before encapsulated token finished"));
}
}

/**
* PHOENIX-7267: Verifies that the mapper's error handling pattern works correctly.
* <p>
* Simulates the catch block in FormatToBytesWritableMapper.map() to confirm that
* both IOException and RuntimeException from malformed CSV are properly caught
* after the fix.
*/
@Test
public void testMapperCatchBlockHandlesMalformedCsv() {
CsvToKeyValueMapper.CsvLineParser parser =
new CsvToKeyValueMapper.CsvLineParser(',', '"', null);

String malformedInput = "val1,\"val2,val3";
boolean errorHandled = false;

// This simulates the fixed catch block in FormatToBytesWritableMapper.map():
// catch (IOException | RuntimeException e) { ... }
try {
parser.parse(malformedInput);
} catch (IOException | RuntimeException e) {
// With the fix, both IOException and RuntimeException are caught here.
// The mapper increments the "Parser errors" counter and continues
// processing the next record instead of failing the entire job.
errorHandled = true;
}

assertTrue("Malformed CSV should be caught by the fixed catch block", errorHandled);
}
}