diff --git a/src/diffusers/pipelines/glm_image/pipeline_glm_image.py b/src/diffusers/pipelines/glm_image/pipeline_glm_image.py index 8cbaf99a2243..589b3be47b2c 100644 --- a/src/diffusers/pipelines/glm_image/pipeline_glm_image.py +++ b/src/diffusers/pipelines/glm_image/pipeline_glm_image.py @@ -407,8 +407,8 @@ def generate_prior_tokens( if len(source_grids) > 0: prior_token_image_embed = self.vision_language_encoder.get_image_features( - inputs["pixel_values"], source_grids, return_dict=False - ) + inputs["pixel_values"], source_grids + ).pooler_output prior_token_image_embed = torch.cat(prior_token_image_embed, dim=0) prior_token_image_ids_d32 = self.vision_language_encoder.get_image_tokens( prior_token_image_embed, source_grids