Handle Python interrupts

kosiew · kosiew · commit 51e1267417ec · 2025-06-05T14:09:31.000+08:00
diff --git a/src/catalog.rs b/src/catalog.rs
@@ -97,7 +97,7 @@ impl PyDatabase {
     }
 
     fn table(&self, name: &str, py: Python) -> PyDataFusionResult<PyTable> {
-        if let Some(table) = wait_for_future(py, self.database.table(name))? {
+        if let Some(table) = wait_for_future(py, self.database.table(name))?? {
             Ok(PyTable::new(table))
         } else {
             Err(PyDataFusionError::Common(format!(
diff --git a/src/context.rs b/src/context.rs
@@ -375,7 +375,7 @@ impl PySessionContext {
             None => {
                 let state = self.ctx.state();
                 let schema = options.infer_schema(&state, &table_path);
-                wait_for_future(py, schema)?
+                wait_for_future(py, schema)??
             }
         };
         let config = ListingTableConfig::new(table_path)
@@ -400,7 +400,7 @@ impl PySessionContext {
     /// Returns a PyDataFrame whose plan corresponds to the SQL statement.
     pub fn sql(&mut self, query: &str, py: Python) -> PyDataFusionResult<PyDataFrame> {
         let result = self.ctx.sql(query);
-        let df = wait_for_future(py, result)?;
+        let df = wait_for_future(py, result)??;
         Ok(PyDataFrame::new(df))
     }
 
@@ -417,7 +417,7 @@ impl PySessionContext {
             SQLOptions::new()
         };
         let result = self.ctx.sql_with_options(query, options);
-        let df = wait_for_future(py, result)?;
+        let df = wait_for_future(py, result)??;
         Ok(PyDataFrame::new(df))
     }
 
@@ -451,7 +451,7 @@ impl PySessionContext {
 
         self.ctx.register_table(&*table_name, Arc::new(table))?;
 
-        let table = wait_for_future(py, self._table(&table_name))?;
+        let table = wait_for_future(py, self._table(&table_name))??;
 
         let df = PyDataFrame::new(table);
         Ok(df)
@@ -650,7 +650,7 @@ impl PySessionContext {
             .collect();
 
         let result = self.ctx.register_parquet(name, path, options);
-        wait_for_future(py, result)?;
+        wait_for_future(py, result)??;
         Ok(())
     }
 
@@ -693,11 +693,11 @@ impl PySessionContext {
         if path.is_instance_of::<PyList>() {
             let paths = path.extract::<Vec<String>>()?;
             let result = self.register_csv_from_multiple_paths(name, paths, options);
-            wait_for_future(py, result)?;
+            wait_for_future(py, result)??;
         } else {
             let path = path.extract::<String>()?;
             let result = self.ctx.register_csv(name, &path, options);
-            wait_for_future(py, result)?;
+            wait_for_future(py, result)??;
         }
 
         Ok(())
@@ -734,7 +734,7 @@ impl PySessionContext {
         options.schema = schema.as_ref().map(|x| &x.0);
 
         let result = self.ctx.register_json(name, path, options);
-        wait_for_future(py, result)?;
+        wait_for_future(py, result)??;
 
         Ok(())
     }
@@ -764,7 +764,7 @@ impl PySessionContext {
         options.schema = schema.as_ref().map(|x| &x.0);
 
         let result = self.ctx.register_avro(name, path, options);
-        wait_for_future(py, result)?;
+        wait_for_future(py, result)??;
 
         Ok(())
     }
@@ -826,7 +826,8 @@ impl PySessionContext {
 
     pub fn table(&self, name: &str, py: Python) -> PyResult<PyDataFrame> {
         let x = wait_for_future(py, self.ctx.table(name))
-            .map_err(|e| PyKeyError::new_err(e.to_string()))?;
+            .map_err(|e| PyKeyError::new_err(e.to_string()))?
+            .map_err(py_datafusion_err)?;
         Ok(PyDataFrame::new(x))
     }
 
@@ -865,10 +866,10 @@ impl PySessionContext {
         let df = if let Some(schema) = schema {
             options.schema = Some(&schema.0);
             let result = self.ctx.read_json(path, options);
-            wait_for_future(py, result)?
+            wait_for_future(py, result)??
         } else {
             let result = self.ctx.read_json(path, options);
-            wait_for_future(py, result)?
+            wait_for_future(py, result)??
         };
         Ok(PyDataFrame::new(df))
     }
@@ -915,12 +916,12 @@ impl PySessionContext {
             let paths = path.extract::<Vec<String>>()?;
             let paths = paths.iter().map(|p| p as &str).collect::<Vec<&str>>();
             let result = self.ctx.read_csv(paths, options);
-            let df = PyDataFrame::new(wait_for_future(py, result)?);
+            let df = PyDataFrame::new(wait_for_future(py, result)??);
             Ok(df)
         } else {
             let path = path.extract::<String>()?;
             let result = self.ctx.read_csv(path, options);
-            let df = PyDataFrame::new(wait_for_future(py, result)?);
+            let df = PyDataFrame::new(wait_for_future(py, result)??);
             Ok(df)
         }
     }
@@ -958,7 +959,7 @@ impl PySessionContext {
             .collect();
 
         let result = self.ctx.read_parquet(path, options);
-        let df = PyDataFrame::new(wait_for_future(py, result)?);
+        let df = PyDataFrame::new(wait_for_future(py, result)??);
         Ok(df)
     }
 
@@ -978,10 +979,10 @@ impl PySessionContext {
         let df = if let Some(schema) = schema {
             options.schema = Some(&schema.0);
             let read_future = self.ctx.read_avro(path, options);
-            wait_for_future(py, read_future)?
+            wait_for_future(py, read_future)??
         } else {
             let read_future = self.ctx.read_avro(path, options);
-            wait_for_future(py, read_future)?
+            wait_for_future(py, read_future)??
         };
         Ok(PyDataFrame::new(df))
     }
@@ -1021,8 +1022,8 @@ impl PySessionContext {
         let plan = plan.plan.clone();
         let fut: JoinHandle<datafusion::common::Result<SendableRecordBatchStream>> =
             rt.spawn(async move { plan.execute(part, Arc::new(ctx)) });
-        let stream = wait_for_future(py, fut).map_err(py_datafusion_err)?;
-        Ok(PyRecordBatchStream::new(stream?))
+        let stream = wait_for_future(py, async { fut.await.expect("Tokio task panicked") })??;
+        Ok(PyRecordBatchStream::new(stream))
     }
 }
 
diff --git a/src/dataframe.rs b/src/dataframe.rs
@@ -233,7 +233,7 @@ impl PyDataFrame {
         let (batches, has_more) = wait_for_future(
             py,
             collect_record_batches_to_display(self.df.as_ref().clone(), config),
-        )?;
+        )??;
         if batches.is_empty() {
             // This should not be reached, but do it for safety since we index into the vector below
             return Ok("No data to display".to_string());
@@ -256,7 +256,7 @@ impl PyDataFrame {
         let (batches, has_more) = wait_for_future(
             py,
             collect_record_batches_to_display(self.df.as_ref().clone(), config),
-        )?;
+        )??;
         if batches.is_empty() {
             // This should not be reached, but do it for safety since we index into the vector below
             return Ok("No data to display".to_string());
@@ -288,7 +288,7 @@ impl PyDataFrame {
     /// Calculate summary statistics for a DataFrame
     fn describe(&self, py: Python) -> PyDataFusionResult<Self> {
         let df = self.df.as_ref().clone();
-        let stat_df = wait_for_future(py, df.describe())?;
+        let stat_df = wait_for_future(py, df.describe())??;
         Ok(Self::new(stat_df))
     }
 
@@ -400,16 +400,15 @@ impl PyDataFrame {
 
     /// Cache DataFrame.
     fn cache(&self, py: Python) -> PyDataFusionResult<Self> {
-        let df = wait_for_future(py, self.df.as_ref().clone().cache())?;
+        let df = wait_for_future(py, self.df.as_ref().clone().cache())??;
         Ok(Self::new(df))
     }
 
     /// Executes this DataFrame and collects all results into a vector of vector of RecordBatch
     /// maintaining the input partitioning.
     fn collect_partitioned(&self, py: Python) -> PyResult<Vec<Vec<PyObject>>> {
-        let batches =
-            wait_for_future(py, self.df.as_ref().clone().collect_partitioned())?
-                .map_err(PyDataFusionError::from)?;
+        let batches = wait_for_future(py, self.df.as_ref().clone().collect_partitioned())?
+            .map_err(PyDataFusionError::from)?;
 
         batches
             .into_iter()
@@ -512,7 +511,7 @@ impl PyDataFrame {
 
     /// Get the execution plan for this `DataFrame`
     fn execution_plan(&self, py: Python) -> PyDataFusionResult<PyExecutionPlan> {
-        let plan = wait_for_future(py, self.df.as_ref().clone().create_physical_plan())?;
+        let plan = wait_for_future(py, self.df.as_ref().clone().create_physical_plan())??;
         Ok(plan.into())
     }
 
@@ -625,7 +624,7 @@ impl PyDataFrame {
                 DataFrameWriteOptions::new(),
                 Some(csv_options),
             ),
-        )?;
+        )??;
         Ok(())
     }
 
@@ -686,7 +685,7 @@ impl PyDataFrame {
                 DataFrameWriteOptions::new(),
                 Option::from(options),
             ),
-        )?;
+        )??;
         Ok(())
     }
 
@@ -698,7 +697,7 @@ impl PyDataFrame {
                 .as_ref()
                 .clone()
                 .write_json(path, DataFrameWriteOptions::new(), None),
-        )?;
+        )??;
         Ok(())
     }
 
@@ -721,7 +720,7 @@ impl PyDataFrame {
         py: Python<'py>,
         requested_schema: Option<Bound<'py, PyCapsule>>,
     ) -> PyDataFusionResult<Bound<'py, PyCapsule>> {
-        let mut batches = wait_for_future(py, self.df.as_ref().clone().collect())?;
+        let mut batches = wait_for_future(py, self.df.as_ref().clone().collect())??;
         let mut schema: Schema = self.df.schema().to_owned().into();
 
         if let Some(schema_capsule) = requested_schema {
@@ -754,8 +753,8 @@ impl PyDataFrame {
         let df = self.df.as_ref().clone();
         let fut: JoinHandle<datafusion::common::Result<SendableRecordBatchStream>> =
             rt.spawn(async move { df.execute_stream().await });
-        let stream = wait_for_future(py, fut).map_err(py_datafusion_err)?;
-        Ok(PyRecordBatchStream::new(stream?))
+        let stream = wait_for_future(py, async { fut.await.expect("Tokio task panicked") })??;
+        Ok(PyRecordBatchStream::new(stream))
     }
 
     fn execute_stream_partitioned(&self, py: Python) -> PyResult<Vec<PyRecordBatchStream>> {
@@ -764,14 +763,10 @@ impl PyDataFrame {
         let df = self.df.as_ref().clone();
         let fut: JoinHandle<datafusion::common::Result<Vec<SendableRecordBatchStream>>> =
             rt.spawn(async move { df.execute_stream_partitioned().await });
-        let stream = wait_for_future(py, fut).map_err(py_datafusion_err)?;
+        let stream = wait_for_future(py, async { fut.await.expect("Tokio task panicked") })?
+            .map_err(py_datafusion_err)?;
 
-        match stream {
-            Ok(batches) => Ok(batches.into_iter().map(PyRecordBatchStream::new).collect()),
-            _ => Err(PyValueError::new_err(
-                "Unable to execute stream partitioned",
-            )),
-        }
+        Ok(stream.into_iter().map(PyRecordBatchStream::new).collect())
     }
 
     /// Convert to pandas dataframe with pyarrow
@@ -816,7 +811,7 @@ impl PyDataFrame {
 
     // Executes this DataFrame to get the total number of rows.
     fn count(&self, py: Python) -> PyDataFusionResult<usize> {
-        Ok(wait_for_future(py, self.df.as_ref().clone().count())?)
+        Ok(wait_for_future(py, self.df.as_ref().clone().count())??)
     }
 
     /// Fill null values with a specified value for specific columns
@@ -842,7 +837,7 @@ impl PyDataFrame {
 /// Print DataFrame
 fn print_dataframe(py: Python, df: DataFrame) -> PyDataFusionResult<()> {
     // Get string representation of record batches
-    let batches = wait_for_future(py, df.collect())?;
+    let batches = wait_for_future(py, df.collect())??;
     let batches_as_string = pretty::pretty_format_batches(&batches);
     let result = match batches_as_string {
         Ok(batch) => format!("DataFrame()\n{batch}"),
diff --git a/src/substrait.rs b/src/substrait.rs
@@ -72,7 +72,7 @@ impl PySubstraitSerializer {
         path: &str,
         py: Python,
     ) -> PyDataFusionResult<()> {
-        wait_for_future(py, serializer::serialize(sql, &ctx.ctx, path))?;
+        wait_for_future(py, serializer::serialize(sql, &ctx.ctx, path))??;
         Ok(())
     }
 
@@ -94,19 +94,20 @@ impl PySubstraitSerializer {
         ctx: PySessionContext,
         py: Python,
     ) -> PyDataFusionResult<PyObject> {
-        let proto_bytes: Vec<u8> = wait_for_future(py, serializer::serialize_bytes(sql, &ctx.ctx))?;
+        let proto_bytes: Vec<u8> =
+            wait_for_future(py, serializer::serialize_bytes(sql, &ctx.ctx))??;
         Ok(PyBytes::new(py, &proto_bytes).into())
     }
 
     #[staticmethod]
     pub fn deserialize(path: &str, py: Python) -> PyDataFusionResult<PyPlan> {
-        let plan = wait_for_future(py, serializer::deserialize(path))?;
+        let plan = wait_for_future(py, serializer::deserialize(path))??;
         Ok(PyPlan { plan: *plan })
     }
 
     #[staticmethod]
     pub fn deserialize_bytes(proto_bytes: Vec<u8>, py: Python) -> PyDataFusionResult<PyPlan> {
-        let plan = wait_for_future(py, serializer::deserialize_bytes(proto_bytes))?;
+        let plan = wait_for_future(py, serializer::deserialize_bytes(proto_bytes))??;
         Ok(PyPlan { plan: *plan })
     }
 }
@@ -143,7 +144,7 @@ impl PySubstraitConsumer {
     ) -> PyDataFusionResult<PyLogicalPlan> {
         let session_state = ctx.ctx.state();
         let result = consumer::from_substrait_plan(&session_state, &plan.plan);
-        let logical_plan = wait_for_future(py, result)?;
+        let logical_plan = wait_for_future(py, result)??;
         Ok(PyLogicalPlan::new(logical_plan))
     }
 }
diff --git a/src/utils.rs b/src/utils.rs

Original file line number	Diff line number	Diff line change
`@@ -97,7 +97,7 @@ impl PyDatabase {`
`97`	`97`	`}`
`98`	`98`
`99`	`99`	`fn table(&self, name: &str, py: Python) -> PyDataFusionResult<PyTable> {`
`100`		`- if let Some(table) = wait_for_future(py, self.database.table(name))? {`
	`100`	`+ if let Some(table) = wait_for_future(py, self.database.table(name))?? {`
`101`	`101`	`Ok(PyTable::new(table))`
`102`	`102`	`} else {`
`103`	`103`	`Err(PyDataFusionError::Common(format!(`
Original file line number	Diff line number	Diff line change
`@@ -72,7 +72,7 @@ impl PySubstraitSerializer {`
`72`	`72`	`path: &str,`
`73`	`73`	`py: Python,`
`74`	`74`	`) -> PyDataFusionResult<()> {`
`75`		`- wait_for_future(py, serializer::serialize(sql, &ctx.ctx, path))?;`
	`75`	`+ wait_for_future(py, serializer::serialize(sql, &ctx.ctx, path))??;`
`76`	`76`	`Ok(())`
`77`	`77`	`}`
`78`	`78`
`@@ -94,19 +94,20 @@ impl PySubstraitSerializer {`
`94`	`94`	`ctx: PySessionContext,`
`95`	`95`	`py: Python,`
`96`	`96`	`) -> PyDataFusionResult<PyObject> {`
`97`		`- let proto_bytes: Vec<u8> = wait_for_future(py, serializer::serialize_bytes(sql, &ctx.ctx))?;`
	`97`	`+ let proto_bytes: Vec<u8> =`
	`98`	`+ wait_for_future(py, serializer::serialize_bytes(sql, &ctx.ctx))??;`
`98`	`99`	`Ok(PyBytes::new(py, &proto_bytes).into())`
`99`	`100`	`}`
`100`	`101`
`101`	`102`	`#[staticmethod]`
`102`	`103`	`pub fn deserialize(path: &str, py: Python) -> PyDataFusionResult<PyPlan> {`
`103`		`- let plan = wait_for_future(py, serializer::deserialize(path))?;`
	`104`	`+ let plan = wait_for_future(py, serializer::deserialize(path))??;`
`104`	`105`	`Ok(PyPlan { plan: *plan })`
`105`	`106`	`}`
`106`	`107`
`107`	`108`	`#[staticmethod]`
`108`	`109`	`pub fn deserialize_bytes(proto_bytes: Vec<u8>, py: Python) -> PyDataFusionResult<PyPlan> {`
`109`		`- let plan = wait_for_future(py, serializer::deserialize_bytes(proto_bytes))?;`
	`110`	`+ let plan = wait_for_future(py, serializer::deserialize_bytes(proto_bytes))??;`
`110`	`111`	`Ok(PyPlan { plan: *plan })`
`111`	`112`	`}`
`112`	`113`	`}`
`@@ -143,7 +144,7 @@ impl PySubstraitConsumer {`
`143`	`144`	`) -> PyDataFusionResult<PyLogicalPlan> {`
`144`	`145`	`let session_state = ctx.ctx.state();`
`145`	`146`	`let result = consumer::from_substrait_plan(&session_state, &plan.plan);`
`146`		`- let logical_plan = wait_for_future(py, result)?;`
	`147`	`+ let logical_plan = wait_for_future(py, result)??;`
`147`	`148`	`Ok(PyLogicalPlan::new(logical_plan))`
`148`	`149`	`}`
`149`	`150`	`}`