Skip to content

Commit 14729fc

Browse files
committed
Refactor HTML rendering in DataFrame representation to improve efficiency and clarity
1 parent 5652818 commit 14729fc

File tree

1 file changed

+32
-36
lines changed

1 file changed

+32
-36
lines changed

src/dataframe.rs

Lines changed: 32 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -116,67 +116,63 @@ impl PyDataFrame {
116116
}
117117

118118
fn _repr_html_(&self, py: Python) -> PyDataFusionResult<String> {
119-
let mut html_str = "<table border='1'>\n".to_string();
119+
// Estimate required capacity and preallocate buffer
120+
let mut html_str = String::with_capacity(2048);
121+
html_str.push_str("<table border='1'>\n");
120122

121-
// Limit to the first 11 rows
123+
// Limit to the first 11 rows (10 + 1 to check if there are more)
122124
let df = self.df.as_ref().clone().limit(0, Some(11))?;
123125
let batches = wait_for_future(py, df.collect())?;
124126

125-
// If there are no rows, close the table and return
127+
// Early exit if no data
126128
if batches.is_empty() {
127129
html_str.push_str("</table>\n");
128130
return Ok(html_str);
129131
}
130132

131133
// Get schema for headers
132134
let schema = batches[0].schema();
135+
let column_count = schema.fields().len();
136+
html_str.reserve(16 * column_count); // Pre-allocate header space
133137

134-
let mut header = Vec::new();
138+
html_str.push_str("<tr>");
135139
for field in schema.fields() {
136-
header.push(format!("<th>{}</th>", field.name()));
140+
html_str.push_str("<th>");
141+
html_str.push_str(field.name());
142+
html_str.push_str("</th>");
137143
}
138-
let header_str = header.join("");
139-
html_str.push_str(&format!("<tr>{}</tr>\n", header_str));
144+
html_str.push_str("</tr>\n");
140145

141-
// Flatten rows and format them as HTML
146+
// Track total rows and handle formatting
142147
let mut total_rows = 0;
143148
for batch in &batches {
144-
total_rows += batch.num_rows();
145-
let formatters = batch
146-
.columns()
147-
.iter()
148-
.map(|c| ArrayFormatter::try_new(c.as_ref(), &FormatOptions::default()))
149-
.map(|c| {
150-
c.map_err(|e| PyValueError::new_err(format!("Error: {:?}", e.to_string())))
151-
})
152-
.collect::<Result<Vec<_>, _>>()?;
153-
154-
let num_rows_to_render = if total_rows > 10 {
155-
10
156-
} else {
157-
batch.num_rows()
158-
};
159-
160-
for row in 0..num_rows_to_render {
161-
let mut cells = Vec::new();
162-
for formatter in &formatters {
163-
cells.push(format!("<td>{}</td>", formatter.value(row)));
149+
let rows_remaining = 10 - total_rows;
150+
let rows_in_batch = batch.num_rows().min(rows_remaining);
151+
152+
// Process rows
153+
for row in 0..rows_in_batch {
154+
html_str.push_str("<tr>");
155+
for col in batch.columns() {
156+
let formatter =
157+
ArrayFormatter::try_new(col.as_ref(), &FormatOptions::default())?;
158+
html_str.push_str("<td>");
159+
html_str.push_str(&formatter.value(row).to_string());
160+
html_str.push_str("</td>");
164161
}
165-
let row_str = cells.join("");
166-
html_str.push_str(&format!("<tr>{}</tr>\n", row_str));
162+
html_str.push_str("</tr>\n");
167163
}
168164

169-
if total_rows >= 10 {
170-
break;
171-
}
165+
total_rows += rows_in_batch;
172166
}
173167

174-
if total_rows > 10 {
175-
html_str.push_str("<tr><td colspan=\"100%\">... and additional rows</td></tr>\n");
168+
// Add ellipsis row if more data exists
169+
if total_rows >= 10 {
170+
html_str.push_str("<tr><td colspan=\"");
171+
html_str.push_str(&column_count.to_string());
172+
html_str.push_str("\">... and additional rows</td></tr>\n");
176173
}
177174

178175
html_str.push_str("</table>\n");
179-
180176
Ok(html_str)
181177
}
182178

0 commit comments

Comments
 (0)