Skip to content

Commit 8be0dad

Browse files
authored
GH-2967: Support unified config options for convert parquet-cli (#3283)
1 parent ca5eb46 commit 8be0dad

File tree

4 files changed

+97
-3
lines changed

4 files changed

+97
-3
lines changed

parquet-cli/README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,17 @@ Usage: parquet [options] [command] [command options]
134134
See 'parquet help <command>' for more information on a specific command.
135135
```
136136

137+
### Configuration Options
138+
139+
- `--conf` or `--property`: Set any configuration property in format `key=value`. Can be specified multiple times.
140+
141+
Examples:
142+
```bash
143+
parquet convert input.avro -o output.parquet --conf parquet.avro.write-parquet-uuid=true
144+
145+
parquet convert input.avro -o output.parquet --conf parquet.avro.write-old-list-structure=false
146+
147+
# Multiple options
148+
parquet convert-csv input.csv -o output.parquet --schema schema.avsc --conf parquet.avro.write-parquet-uuid=true --conf parquet.avro.write-old-list-structure=false
149+
150+
```

parquet-cli/src/main/java/org/apache/parquet/cli/Main.java

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import com.beust.jcommander.Parameters;
2626
import com.google.common.annotations.VisibleForTesting;
2727
import com.google.common.collect.ImmutableSet;
28+
import java.util.List;
2829
import java.util.Set;
2930
import org.apache.commons.logging.LogFactory;
3031
import org.apache.hadoop.conf.Configurable;
@@ -67,6 +68,11 @@ public class Main extends Configured implements Tool {
6768
description = "Print extra debugging information")
6869
private boolean debug = false;
6970

71+
@Parameter(
72+
names = {"--conf", "--property"},
73+
description = "Set a configuration property (format: key=value). Can be specified multiple times.")
74+
private List<String> confProperties;
75+
7076
@VisibleForTesting
7177
@Parameter(names = "--dollar-zero", description = "A way for the runtime path to be passed in", hidden = true)
7278
String programName = DEFAULT_PROGRAM_NAME;
@@ -162,10 +168,25 @@ public int run(String[] args) throws Exception {
162168
return 1;
163169
}
164170

165-
try {
166-
if (command instanceof Configurable) {
167-
((Configurable) command).setConf(getConf());
171+
// Note to developer: This is a generic way to apply configs to given command.
172+
// If the command does not support the configs, it would simply be ignored.
173+
if (command instanceof Configurable) {
174+
Configuration merged = new Configuration(getConf());
175+
if (confProperties != null) {
176+
for (String prop : confProperties) {
177+
String[] parts = prop.split("=", 2);
178+
if (parts.length != 2) {
179+
throw new IllegalArgumentException(
180+
"Configuration property must be in format key=value: " + prop);
181+
}
182+
merged.set(parts[0].trim(), parts[1].trim());
183+
console.debug("Set configuration property: {}={}", parts[0].trim(), parts[1].trim());
184+
}
168185
}
186+
((Configurable) command).setConf(merged);
187+
}
188+
189+
try {
169190
return command.run();
170191
} catch (IllegalArgumentException e) {
171192
if (debug) {

parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCSVCommandTest.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,19 @@ public void testConvertCSVCommandWithDifferentSchemas() throws IOException {
6161
command.setConf(new Configuration());
6262
command.run();
6363
}
64+
65+
@Test
66+
public void testConvertCSVCommandWithGenericConf() throws IOException {
67+
File file = csvFile();
68+
ConvertCSVCommand command = new ConvertCSVCommand(createLogger());
69+
command.targets = Arrays.asList(file.getAbsolutePath());
70+
File output = new File(getTempFolder(), getClass().getSimpleName() + "_with_generic_conf.parquet");
71+
command.outputPath = output.getAbsolutePath();
72+
Configuration conf = new Configuration();
73+
conf.set("parquet.avro.write-parquet-uuid", "true");
74+
conf.set("parquet.avro.write-old-list-structure", "false");
75+
command.setConf(conf);
76+
Assert.assertEquals(0, command.run());
77+
Assert.assertTrue(output.exists());
78+
}
6479
}

parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCommandTest.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,48 @@ public void testConvertCommand() throws IOException {
3737
Assert.assertEquals(0, command.run());
3838
Assert.assertTrue(output.exists());
3939
}
40+
41+
@Test
42+
public void testConvertCommandWithGenericConf() throws IOException {
43+
File file = toAvro(parquetFile());
44+
ConvertCommand command = new ConvertCommand(createLogger());
45+
command.targets = Arrays.asList(file.getAbsolutePath());
46+
File output = new File(getTempFolder(), "converted_with_generic_conf.parquet");
47+
command.outputPath = output.getAbsolutePath();
48+
Configuration conf = new Configuration();
49+
conf.set("parquet.avro.write-parquet-uuid", "true");
50+
conf.set("parquet.avro.write-old-list-structure", "false");
51+
conf.set("test.property", "test.value");
52+
command.setConf(conf);
53+
54+
Assert.assertEquals(0, command.run());
55+
Assert.assertTrue(output.exists());
56+
}
57+
58+
@Test
59+
public void testConvertCommandConfigurationValidation() throws IOException {
60+
File file = toAvro(parquetFile());
61+
ConvertCommand command = new ConvertCommand(createLogger());
62+
command.targets = Arrays.asList(file.getAbsolutePath());
63+
File output = new File(getTempFolder(), "converted_with_config_validation.parquet");
64+
command.outputPath = output.getAbsolutePath();
65+
66+
Configuration conf = new Configuration();
67+
conf.set("parquet.avro.write-parquet-uuid", "true");
68+
conf.set("parquet.avro.write-old-list-structure", "false");
69+
command.setConf(conf);
70+
71+
Assert.assertEquals(0, command.run());
72+
Assert.assertTrue(output.exists());
73+
74+
File output2 = new File(getTempFolder(), "converted_with_config_validation2.parquet");
75+
command.outputPath = output2.getAbsolutePath();
76+
Configuration conf2 = new Configuration();
77+
conf2.set("parquet.avro.write-parquet-uuid", "false");
78+
conf2.set("parquet.avro.write-old-list-structure", "true");
79+
command.setConf(conf2);
80+
81+
Assert.assertEquals(0, command.run());
82+
Assert.assertTrue(output2.exists());
83+
}
4084
}

0 commit comments

Comments
 (0)