5353 from datafusion ._internal import DataFrame as DataFrameInternal
5454 from datafusion ._internal import expr as expr_internal
5555
56+ from dataclasses import dataclass
5657from enum import Enum
5758
5859
@@ -114,6 +115,19 @@ def get_default_level(self) -> Optional[int]:
114115 return None
115116
116117
118+ @dataclass
119+ class ParquetWriterOptions :
120+ """Options for writing Parquet files."""
121+
122+ compression : str | Compression = Compression .ZSTD
123+ compression_level : int | None = None
124+
125+
126+ @dataclass
127+ class ParquetColumnOptions :
128+ """Placeholder for column-specific options."""
129+
130+
117131class DataFrame :
118132 """Two dimensional table representation of data.
119133
@@ -704,7 +718,7 @@ def write_csv(self, path: str | pathlib.Path, with_header: bool = False) -> None
704718 def write_parquet (
705719 self ,
706720 path : str | pathlib .Path ,
707- compression : Union [str , Compression ] = Compression .ZSTD ,
721+ compression : Union [str , Compression , ParquetWriterOptions ] = Compression .ZSTD ,
708722 compression_level : int | None = None ,
709723 ) -> None :
710724 """Execute the :py:class:`DataFrame` and write the results to a Parquet file.
@@ -725,7 +739,13 @@ def write_parquet(
725739 recommended range is 1 to 22, with the default being 4. Higher levels
726740 provide better compression but slower speed.
727741 """
728- # Convert string to Compression enum if necessary
742+ if isinstance (compression , ParquetWriterOptions ):
743+ if compression_level is not None :
744+ msg = "compression_level should be None when using ParquetWriterOptions"
745+ raise ValueError (msg )
746+ self .write_parquet_with_options (path , compression )
747+ return
748+
729749 if isinstance (compression , str ):
730750 compression = Compression .from_str (compression )
731751
@@ -737,6 +757,28 @@ def write_parquet(
737757
738758 self .df .write_parquet (str (path ), compression .value , compression_level )
739759
760+ def write_parquet_with_options (
761+ self , path : str | pathlib .Path , options : ParquetWriterOptions
762+ ) -> None :
763+ """Execute the :py:class:`DataFrame` and write the results to Parquet.
764+
765+ Args:
766+ path: Destination path.
767+ options: Parquet writer options.
768+ """
769+ compression = options .compression
770+ if isinstance (compression , str ):
771+ compression = Compression .from_str (compression )
772+
773+ level = options .compression_level
774+ if (
775+ compression in {Compression .GZIP , Compression .BROTLI , Compression .ZSTD }
776+ and level is None
777+ ):
778+ level = compression .get_default_level ()
779+
780+ self .df .write_parquet (str (path ), compression .value , level )
781+
740782 def write_json (self , path : str | pathlib .Path ) -> None :
741783 """Execute the :py:class:`DataFrame` and write the results to a JSON file.
742784
0 commit comments