From 26ce9e86ab73543c9495cc15164d657fdda9cfda Mon Sep 17 00:00:00 2001 From: arnavb Date: Fri, 15 Aug 2025 07:26:21 +0000 Subject: [PATCH 1/2] Update --- .../parquet/column/mem/TestMemPageStore.java | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemPageStore.java b/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemPageStore.java index 6628305e84..700050f95e 100644 --- a/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemPageStore.java +++ b/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemPageStore.java @@ -20,11 +20,14 @@ import static org.apache.parquet.column.Encoding.BIT_PACKED; import static org.apache.parquet.column.Encoding.PLAIN; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; import java.io.IOException; import org.apache.parquet.bytes.BytesInput; import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.column.page.DataPage; +import org.apache.parquet.column.page.DataPageV1; import org.apache.parquet.column.page.PageReader; import org.apache.parquet.column.page.PageWriter; import org.apache.parquet.column.page.mem.MemPageStore; @@ -46,19 +49,42 @@ public void test() throws IOException { ColumnDescriptor col = new ColumnDescriptor(path, PrimitiveTypeName.INT64, 2, 2); LongStatistics stats = new LongStatistics(); PageWriter pageWriter = memPageStore.getPageWriter(col); + pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN); pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN); pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN); pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN); + PageReader pageReader = memPageStore.getPageReader(col); long totalValueCount = pageReader.getTotalValueCount(); - LOG.info(String.valueOf(totalValueCount)); + LOG.info("Total value count: " + totalValueCount); + + assertEquals("Expected total value count to be 836 (4 pages * 209 values)", 836, totalValueCount); + int total = 0; + int pageCount = 0; do { DataPage readPage = pageReader.readPage(); + + // Assert page was successfully read + assertNotNull("Page should not be null", readPage); + // Assert page has expected value count + assertEquals("Each page should have 209 values", 209, readPage.getValueCount()); + // Assert encodings when the implementation is DataPageV1 + if (readPage instanceof DataPageV1) { + DataPageV1 v1 = (DataPageV1) readPage; + assertEquals("Page repetition level encoding should be BIT_PACKED", BIT_PACKED, v1.getRlEncoding()); + assertEquals("Page definition level encoding should be BIT_PACKED", BIT_PACKED, v1.getDlEncoding()); + assertEquals("Page value encoding should be PLAIN", PLAIN, v1.getValueEncoding()); + } + total += readPage.getValueCount(); - LOG.info(readPage.toString()); - // TODO: assert + pageCount++; + LOG.info("Page " + pageCount + ": " + readPage.toString()); } while (total < totalValueCount); + + // Assert we read exactly the expected number of pages and values + assertEquals("Should have read 4 pages", 4, pageCount); + assertEquals("Total values read should match totalValueCount", totalValueCount, total); } } From 2389db8091138ad31c1431957b2ea8a79cd170e1 Mon Sep 17 00:00:00 2001 From: arnavb Date: Thu, 28 Aug 2025 06:24:54 +0000 Subject: [PATCH 2/2] update --- .../java/org/apache/parquet/column/mem/TestMemPageStore.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemPageStore.java b/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemPageStore.java index 700050f95e..41fc52d757 100644 --- a/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemPageStore.java +++ b/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemPageStore.java @@ -22,6 +22,7 @@ import static org.apache.parquet.column.Encoding.PLAIN; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import java.io.IOException; import org.apache.parquet.bytes.BytesInput; @@ -71,6 +72,7 @@ public void test() throws IOException { // Assert page has expected value count assertEquals("Each page should have 209 values", 209, readPage.getValueCount()); // Assert encodings when the implementation is DataPageV1 + assertTrue("Page should be an instance of DataPageV1", readPage instanceof DataPageV1); if (readPage instanceof DataPageV1) { DataPageV1 v1 = (DataPageV1) readPage; assertEquals("Page repetition level encoding should be BIT_PACKED", BIT_PACKED, v1.getRlEncoding()); @@ -80,7 +82,6 @@ public void test() throws IOException { total += readPage.getValueCount(); pageCount++; - LOG.info("Page " + pageCount + ": " + readPage.toString()); } while (total < totalValueCount); // Assert we read exactly the expected number of pages and values