diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py
index 9cbb72636ef..df61300226a 100644
--- a/fastdeploy/worker/gpu_model_runner.py
+++ b/fastdeploy/worker/gpu_model_runner.py
@@ -875,6 +875,16 @@ def insert_tasks_v1(self, req_dicts: List[Request], num_running_requests: int =
                             self.share_inputs["max_reply_lens"][idx : idx + 1, :] = -1
                             self.share_inputs["limit_think_status"][idx : idx + 1, :] = 0
 
+                if not isinstance(request.prompt_token_ids, (list, np.ndarray)):
+                    logger.error(
+                        f"Request {request.request_id} has invalid prompt_token_ids type: "
+                        f"{type(request.prompt_token_ids)}, expected list or np.ndarray. Aborting request."
+                    )
+                    async_set_value(self.share_inputs["stop_flags"][idx : idx + 1], True)
+                    async_set_value(self.share_inputs["seq_lens_this_time_buffer"][idx : idx + 1], 0)
+                    async_set_value(self.share_inputs["seq_lens_encoder"][idx : idx + 1], 0)
+                    continue
+
                 if isinstance(request.prompt_token_ids, np.ndarray):
                     prompt_token_ids = request.prompt_token_ids.tolist()
                 else: