Skip to content

Commit 4681420

Browse files
committed
Merge branch 'main' into fill-null
2 parents 799b67c + 7d8bcd8 commit 4681420

26 files changed

+2789
-84
lines changed

.asf.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ github:
2929
rebase: false
3030
features:
3131
issues: true
32+
protected_branches:
33+
main:
34+
required_pull_request_reviews:
35+
required_approving_review_count: 1
3236

3337
staging:
3438
whoami: asf-staging

docs/source/user-guide/dataframe.rst

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,17 @@ You can customize how DataFrames are rendered in HTML by configuring the formatt
7575
7676
# Change the default styling
7777
configure_formatter(
78-
max_rows=50, # Maximum number of rows to display
79-
max_width=None, # Maximum width in pixels (None for auto)
80-
theme="light", # Theme: "light" or "dark"
81-
precision=2, # Floating point precision
82-
thousands_separator=",", # Separator for thousands
83-
date_format="%Y-%m-%d", # Date format
84-
truncate_width=20 # Max width for string columns before truncating
78+
max_cell_length=25, # Maximum characters in a cell before truncation
79+
max_width=1000, # Maximum width in pixels
80+
max_height=300, # Maximum height in pixels
81+
max_memory_bytes=2097152, # Maximum memory for rendering (2MB)
82+
min_rows_display=20, # Minimum number of rows to display
83+
repr_rows=10, # Number of rows to display in __repr__
84+
enable_cell_expansion=True,# Allow expanding truncated cells
85+
custom_css=None, # Additional custom CSS
86+
show_truncation_message=True, # Show message when data is truncated
87+
style_provider=None, # Custom styling provider
88+
use_shared_styles=True # Share styles across tables
8589
)
8690
8791
The formatter settings affect all DataFrames displayed after configuration.
@@ -113,6 +117,25 @@ For advanced styling needs, you can create a custom style provider:
113117
# Apply the custom style provider
114118
configure_formatter(style_provider=MyStyleProvider())
115119
120+
Performance Optimization with Shared Styles
121+
-------------------------------------------
122+
The ``use_shared_styles`` parameter (enabled by default) optimizes performance when displaying
123+
multiple DataFrames in notebook environments:
124+
125+
.. code-block:: python
126+
from datafusion.html_formatter import StyleProvider, configure_formatter
127+
# Default: Use shared styles (recommended for notebooks)
128+
configure_formatter(use_shared_styles=True)
129+
130+
# Disable shared styles (each DataFrame includes its own styles)
131+
configure_formatter(use_shared_styles=False)
132+
133+
When ``use_shared_styles=True``:
134+
- CSS styles and JavaScript are included only once per notebook session
135+
- This reduces HTML output size and prevents style duplication
136+
- Improves rendering performance with many DataFrames
137+
- Applies consistent styling across all DataFrames
138+
116139
Creating a Custom Formatter
117140
---------------------------
118141

@@ -177,3 +200,18 @@ You can also use a context manager to temporarily change formatting settings:
177200
178201
# Back to default formatting
179202
df.show()
203+
204+
Memory and Display Controls
205+
---------------------------
206+
207+
You can control how much data is displayed and how much memory is used for rendering:
208+
209+
.. code-block:: python
210+
211+
configure_formatter(
212+
max_memory_bytes=4 * 1024 * 1024, # 4MB maximum memory for display
213+
min_rows_display=50, # Always show at least 50 rows
214+
repr_rows=20 # Show 20 rows in __repr__ output
215+
)
216+
217+
These parameters help balance comprehensive data display against performance considerations.

python/datafusion/common.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,12 @@
3333
SqlTable = common_internal.SqlTable
3434
SqlType = common_internal.SqlType
3535
SqlView = common_internal.SqlView
36+
TableType = common_internal.TableType
37+
TableSource = common_internal.TableSource
38+
Constraints = common_internal.Constraints
3639

3740
__all__ = [
41+
"Constraints",
3842
"DFSchema",
3943
"DataType",
4044
"DataTypeMap",
@@ -47,6 +51,8 @@
4751
"SqlTable",
4852
"SqlType",
4953
"SqlView",
54+
"TableSource",
55+
"TableType",
5056
]
5157

5258

python/datafusion/expr.py

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,29 @@
5454
Case = expr_internal.Case
5555
Cast = expr_internal.Cast
5656
Column = expr_internal.Column
57+
CopyTo = expr_internal.CopyTo
58+
CreateCatalog = expr_internal.CreateCatalog
59+
CreateCatalogSchema = expr_internal.CreateCatalogSchema
60+
CreateExternalTable = expr_internal.CreateExternalTable
61+
CreateFunction = expr_internal.CreateFunction
62+
CreateFunctionBody = expr_internal.CreateFunctionBody
63+
CreateIndex = expr_internal.CreateIndex
5764
CreateMemoryTable = expr_internal.CreateMemoryTable
5865
CreateView = expr_internal.CreateView
66+
Deallocate = expr_internal.Deallocate
67+
DescribeTable = expr_internal.DescribeTable
5968
Distinct = expr_internal.Distinct
69+
DmlStatement = expr_internal.DmlStatement
70+
DropCatalogSchema = expr_internal.DropCatalogSchema
71+
DropFunction = expr_internal.DropFunction
6072
DropTable = expr_internal.DropTable
73+
DropView = expr_internal.DropView
6174
EmptyRelation = expr_internal.EmptyRelation
75+
Execute = expr_internal.Execute
6276
Exists = expr_internal.Exists
6377
Explain = expr_internal.Explain
6478
Extension = expr_internal.Extension
79+
FileType = expr_internal.FileType
6580
Filter = expr_internal.Filter
6681
GroupingSet = expr_internal.GroupingSet
6782
Join = expr_internal.Join
@@ -83,21 +98,31 @@
8398
Literal = expr_internal.Literal
8499
Negative = expr_internal.Negative
85100
Not = expr_internal.Not
101+
OperateFunctionArg = expr_internal.OperateFunctionArg
86102
Partitioning = expr_internal.Partitioning
87103
Placeholder = expr_internal.Placeholder
104+
Prepare = expr_internal.Prepare
88105
Projection = expr_internal.Projection
106+
RecursiveQuery = expr_internal.RecursiveQuery
89107
Repartition = expr_internal.Repartition
90108
ScalarSubquery = expr_internal.ScalarSubquery
91109
ScalarVariable = expr_internal.ScalarVariable
110+
SetVariable = expr_internal.SetVariable
92111
SimilarTo = expr_internal.SimilarTo
93112
Sort = expr_internal.Sort
94113
Subquery = expr_internal.Subquery
95114
SubqueryAlias = expr_internal.SubqueryAlias
96115
TableScan = expr_internal.TableScan
116+
TransactionAccessMode = expr_internal.TransactionAccessMode
117+
TransactionConclusion = expr_internal.TransactionConclusion
118+
TransactionEnd = expr_internal.TransactionEnd
119+
TransactionIsolationLevel = expr_internal.TransactionIsolationLevel
120+
TransactionStart = expr_internal.TransactionStart
97121
TryCast = expr_internal.TryCast
98122
Union = expr_internal.Union
99123
Unnest = expr_internal.Unnest
100124
UnnestExpr = expr_internal.UnnestExpr
125+
Values = expr_internal.Values
101126
WindowExpr = expr_internal.WindowExpr
102127

103128
__all__ = [
@@ -111,15 +136,30 @@
111136
"CaseBuilder",
112137
"Cast",
113138
"Column",
139+
"CopyTo",
140+
"CreateCatalog",
141+
"CreateCatalogSchema",
142+
"CreateExternalTable",
143+
"CreateFunction",
144+
"CreateFunctionBody",
145+
"CreateIndex",
114146
"CreateMemoryTable",
115147
"CreateView",
148+
"Deallocate",
149+
"DescribeTable",
116150
"Distinct",
151+
"DmlStatement",
152+
"DropCatalogSchema",
153+
"DropFunction",
117154
"DropTable",
155+
"DropView",
118156
"EmptyRelation",
157+
"Execute",
119158
"Exists",
120159
"Explain",
121160
"Expr",
122161
"Extension",
162+
"FileType",
123163
"Filter",
124164
"GroupingSet",
125165
"ILike",
@@ -142,22 +182,32 @@
142182
"Literal",
143183
"Negative",
144184
"Not",
185+
"OperateFunctionArg",
145186
"Partitioning",
146187
"Placeholder",
188+
"Prepare",
147189
"Projection",
190+
"RecursiveQuery",
148191
"Repartition",
149192
"ScalarSubquery",
150193
"ScalarVariable",
194+
"SetVariable",
151195
"SimilarTo",
152196
"Sort",
153197
"SortExpr",
154198
"Subquery",
155199
"SubqueryAlias",
156200
"TableScan",
201+
"TransactionAccessMode",
202+
"TransactionConclusion",
203+
"TransactionEnd",
204+
"TransactionIsolationLevel",
205+
"TransactionStart",
157206
"TryCast",
158207
"Union",
159208
"Unnest",
160209
"UnnestExpr",
210+
"Values",
161211
"Window",
162212
"WindowExpr",
163213
"WindowFrame",
@@ -686,8 +736,8 @@ def log10(self) -> Expr:
686736
def initcap(self) -> Expr:
687737
"""Set the initial letter of each word to capital.
688738
689-
Converts the first letter of each word in ``string``
690-
to uppercase and the remaining characters to lowercase.
739+
Converts the first letter of each word in ``string`` to uppercase and the
740+
remaining characters to lowercase.
691741
"""
692742
from . import functions as F
693743

0 commit comments

Comments
 (0)