@@ -44,6 +44,7 @@ def __init__(
4444 n_ubatch : int = 512 ,
4545 pooling_type : int = LLAMA_POOLING_TYPE_UNSPECIFIED ,
4646 n_gpu_layers : int = 0 ,
47+ verbose : bool = True ,
4748 ** kwargs ):
4849 """
4950 Initialize the embedding model with enforced configuration.
@@ -66,6 +67,7 @@ def __init__(
6667 kwargs ["n_ctx" ] = n_ctx
6768 kwargs ["n_batch" ] = n_batch
6869 kwargs ["n_ubatch" ] = n_ubatch
70+ kwargs ["verbose" ] = verbose
6971
7072 # Enable Unified KV Cache (Crucial for Batching)
7173 # This allows us to assign arbitrary seq_ids in a batch, enabling the parallel /
@@ -189,16 +191,19 @@ def _decode_batch():
189191 for _ in range (seq_len ):
190192 # Get the vector of the i-th token
191193 ptr = llama_cpp .llama_get_embeddings_ith (ctx , curr_token_idx )
192- data = ptr [:out_dim ]
194+ if ptr is None :
195+ # Fallback: append zero vector or skip (here we zero-pad to keep shape)
196+ doc_tokens_embd .append ([0.0 ] * out_dim )
197+ else :
198+ data = ptr [:out_dim ]
199+ # Normalization
200+ data = self ._normalize_vector (data , normalize )
201+ doc_tokens_embd .append (data )
193202
194- # Normalization
195- data = self ._normalize_vector (data , normalize )
196-
197- doc_tokens_embd .append (data )
198203 curr_token_idx += 1
199204 results .append (doc_tokens_embd )
200205
201- # Branth B: Sequence Level (Mean, Cls, Rank, Unspecified)
206+ # Branch B: Sequence Level (Mean, Cls, Rank, Unspecified)
202207 else :
203208 for i in range (len (batch_seq_lens )):
204209 # Obtain the vector of the i-th sequence.
0 commit comments