Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/workflows/maven.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: Java CI
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: '11'
- name: Build with Maven
run: mvn -B test
13 changes: 12 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
<target>7</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.2</version>
</plugin>
</plugins>
</build>

Expand Down Expand Up @@ -46,5 +51,11 @@
<artifactId>avro</artifactId>
<version>1.10.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -23,35 +23,37 @@ public static void main(String[] args) {
DateTime dateTime = new DateTime(2020, 6, 1, 0, 0, 0, 0, DateTimeZone.UTC);

for(int i = 0; i < 2; i++) {
generateParquetFileFor(dateTime.plusDays(i));
try {
generateParquetFileFor(dateTime.plusDays(i));
} catch (Exception e) {
e.printStackTrace(System.out);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using e.printStackTrace() is generally discouraged in application code, especially when writing to System.out. It can be difficult to manage and control log output this way.

Could we consider using a dedicated logging framework (like SLF4J with Logback or Log4j2) here? This would provide more robust and configurable logging.

For example, with SLF4J:

// At class level:
// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(App.class);

// In catch block:
logger.error("Failed to generate Parquet file for date: {}", dateTime.plusDays(i), e);

If adding a full logging framework is out of scope for this PR, at least consider writing to System.err for errors, or re-throwing a custom exception if main shouldn't handle this directly.

                // Consider using a proper logger here, e.g., logger.error("Error generating Parquet file", e);
                e.printStackTrace(System.err); // At a minimum, print to System.err for errors

}
}
}

private static void generateParquetFileFor(DateTime dateTime) {
try {
Schema schema = parseSchema();
DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyyMMdd");
Path path = new Path("data_" + dateTime.toString(fmt) + ".parquet");

List<GenericData.Record> recordList = generateRecords(schema, dateTime);

try (ParquetWriter<GenericData.Record> writer = AvroParquetWriter.<GenericData.Record>builder(path)
.withSchema(schema)
.withCompressionCodec(CompressionCodecName.SNAPPY)
.withRowGroupSize(ParquetWriter.DEFAULT_BLOCK_SIZE)
.withPageSize(ParquetWriter.DEFAULT_PAGE_SIZE)
.withConf(new Configuration())
.withValidation(false)
.withDictionaryEncoding(false)
.build()) {

for (GenericData.Record record : recordList) {
writer.write(record);
}
public static Path generateParquetFileFor(DateTime dateTime) throws Exception {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Declaring throws Exception is quite broad and can make it harder for callers to understand and handle specific error conditions. It's generally better to declare more specific exceptions that the method might throw (e.g., java.io.IOException, or a custom application-specific exception).

Looking at the Parquet writer and file operations, java.io.IOException seems like a primary candidate. Would it be more appropriate to declare throws IOException or a more specific custom exception here?

public static Path generateParquetFileFor(DateTime dateTime) throws java.io.IOException {

Schema schema = parseSchema();
DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyyMMdd");
Path path = new Path("data_" + dateTime.toString(fmt) + ".parquet");

List<GenericData.Record> recordList = generateRecords(schema, dateTime);

try (ParquetWriter<GenericData.Record> writer = AvroParquetWriter.<GenericData.Record>builder(path)
.withSchema(schema)
.withCompressionCodec(CompressionCodecName.SNAPPY)
.withRowGroupSize(ParquetWriter.DEFAULT_BLOCK_SIZE)
.withPageSize(ParquetWriter.DEFAULT_PAGE_SIZE)
.withConf(new Configuration())
.withValidation(false)
.withDictionaryEncoding(false)
.build()) {

for (GenericData.Record record : recordList) {
writer.write(record);
}
} catch (Exception ex) {
ex.printStackTrace(System.out);
}

return path;
}

private static Schema parseSchema() {
Expand Down Expand Up @@ -87,4 +89,4 @@ private static List<GenericData.Record> generateRecords(Schema schema, DateTime

return recordList;
}
}
}
55 changes: 55 additions & 0 deletions src/test/java/com/instarsocial/parquet/AppTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package com.instarsocial.parquet;

import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;

import java.io.File;
import java.io.FileInputStream;
import java.nio.charset.StandardCharsets;
import static org.junit.Assert.*;

/**
* Tests for {@link App} using the Arrange-Act-Assert pattern.
*/

public class AppTest {

/** Temporary file generated by the test. */
private static File generatedFile;

@BeforeClass
public static void beforeAll() {
generatedFile = null;
}

@AfterClass
public static void afterAll() {
if (generatedFile != null && generatedFile.exists()) {
generatedFile.delete();
}
}

@Test
public void Should_CreateParquetFile_When_CallingGenerateParquetFileFor() throws Exception {
// Arrange
DateTime dateTime = new DateTime(2020, 6, 1, 0, 0, 0, 0, DateTimeZone.UTC);

// Act
org.apache.hadoop.fs.Path actual = App.generateParquetFileFor(dateTime);

// Assert
assertNotNull("Path should not be null", actual);
generatedFile = new File(actual.toString());
assertTrue("Generated file should exist", generatedFile.exists());

try (FileInputStream fis = new FileInputStream(generatedFile)) {
byte[] magic = new byte[4];
int read = fis.read(magic);
assertEquals(4, read);
assertEquals("PAR1", new String(magic, StandardCharsets.US_ASCII));
}
}
}
Loading