diff --git a/ideeplc/__init__.py b/ideeplc/__init__.py
index ae57bfe..7f86cee 100644
--- a/ideeplc/__init__.py
+++ b/ideeplc/__init__.py
@@ -1,3 +1,3 @@
 """iDeepLC: A deep Learning-based retention time predictor for unseen modified peptides with a novel encoding system"""
 
-__version__ = "1.3.1"
+__version__ = "1.3.2"
diff --git a/ideeplc/data_initialize.py b/ideeplc/data_initialize.py
index 02d578a..80b7713 100644
--- a/ideeplc/data_initialize.py
+++ b/ideeplc/data_initialize.py
@@ -1,14 +1,13 @@
 import logging
-from typing import Tuple, Union
+from typing import Tuple, Union, Iterator
 import pandas as pd
 import numpy as np
-from torch.utils.data import Dataset, DataLoader
+from torch.utils.data import Dataset
 from ideeplc.utilities import df_to_matrix, reform_seq
 
 LOGGER = logging.getLogger(__name__)
 
 
-# Making the pytorch dataset
 class MyDataset(Dataset):
     def __init__(self, sequences: np.ndarray, retention: np.ndarray) -> None:
         self.sequences = sequences
@@ -25,15 +24,14 @@ def data_initialize(
     csv_path: str, **kwargs
 ) -> Union[Tuple[MyDataset, np.ndarray], Tuple[MyDataset, np.ndarray]]:
     """
-    Initialize peptides matrices based on a CSV file containing raw peptide sequences.
+    Initialize peptide matrices based on a CSV file containing raw peptide sequences.
 
     :param csv_path: Path to the CSV file containing raw peptide sequences.
-    :return: DataLoader for prediction.
+    :return: Dataset for prediction or fine-tuning and x_shape.
     """
-
     LOGGER.info(f"Loading peptides from {csv_path}")
+
     try:
-        # Load peptides from CSV file
         df = pd.read_csv(csv_path)
     except FileNotFoundError:
         LOGGER.error(f"File {csv_path} not found.")
@@ -63,22 +61,108 @@ def data_initialize(
     LOGGER.info(
         f"Loaded and reformed {len(reformed_peptides)} peptides sequences from the file."
     )
+
     try:
-        # Convert sequences to matrix format
         sequences, tr, errors = df_to_matrix(reformed_peptides, df)
     except Exception as e:
         LOGGER.error(f"Error converting sequences to matrix format: {e}")
         raise
+
     if errors:
         LOGGER.warning(f"Errors encountered during conversion: {errors}")
 
     prediction_dataset = MyDataset(sequences, tr)
 
-    # Create DataLoader objects
-    dataloader_pred = DataLoader(prediction_dataset)
-    # passing the training X shape
-    for batch in dataloader_pred:
-        x_shape = batch[0].shape
-        break
+    if len(prediction_dataset) == 0:
+        LOGGER.error("No valid peptide entries were found in the input file.")
+        raise ValueError("No valid peptide entries were found in the input file.")
+
+    # Keep historical x_shape contract expected by model/tests: (batch, channels, length)
+    x_shape = (1,) + prediction_dataset[0][0].shape
     LOGGER.info(f"Dataset initialized with data shape {x_shape}.")
     return prediction_dataset, x_shape
+
+
+def data_initialize_chunked(
+    csv_path: str, chunk_size: int = 10000, **kwargs
+) -> Iterator[Tuple[pd.DataFrame, MyDataset, np.ndarray]]:
+    """
+    Initialize peptide matrices from a CSV file in chunks.
+
+    :param csv_path: Path to the CSV file containing raw peptide sequences.
+    :param chunk_size: Number of rows to load per chunk.
+    :return: Iterator yielding dataframe chunk, dataset chunk, and x_shape.
+    """
+    LOGGER.info(f"Loading peptides from {csv_path} in chunks of {chunk_size}")
+
+    try:
+        chunk_iter = pd.read_csv(csv_path, chunksize=chunk_size)
+    except FileNotFoundError:
+        LOGGER.error(f"File {csv_path} not found.")
+        raise
+    except pd.errors.EmptyDataError:
+        LOGGER.error(f"File {csv_path} is empty.")
+        raise
+    except Exception as e:
+        LOGGER.error(f"Error reading {csv_path}: {e}")
+        raise
+
+    for chunk_idx, df in enumerate(chunk_iter, start=1):
+        if "seq" not in df.columns:
+            LOGGER.error("CSV file must contain a 'seq' column with peptide sequences.")
+            raise ValueError("Missing 'seq' column in the CSV file.")
+        if "modifications" not in df.columns:
+            LOGGER.error(
+                "CSV file must contain a 'modifications' column with peptide modifications."
+            )
+            raise ValueError("Missing 'modifications' column in the CSV file.")
+        if "tr" not in df.columns:
+            LOGGER.error("CSV file must contain a 'tr' column with retention times.")
+            raise ValueError("Missing 'tr' column in the CSV file.")
+
+        reformed_peptides = [
+            reform_seq(seq, mod) for seq, mod in zip(df["seq"], df["modifications"])
+        ]
+        LOGGER.info(
+            f"Chunk {chunk_idx}: loaded and reformed {len(reformed_peptides)} peptides sequences."
+        )
+
+        try:
+            sequences, tr, errors = df_to_matrix(reformed_peptides, df)
+        except Exception as e:
+            LOGGER.error(
+                f"Error converting sequences to matrix format in chunk {chunk_idx}: {e}"
+            )
+            raise
+
+        if errors:
+            LOGGER.warning(f"Errors encountered during conversion in chunk {chunk_idx}: {errors}")
+
+        prediction_dataset = MyDataset(sequences, tr)
+
+        if len(prediction_dataset) == 0:
+            LOGGER.warning(f"Chunk {chunk_idx} contains no valid peptide entries.")
+            continue
+
+        # Keep historical x_shape contract expected by model/tests: (batch, channels, length)
+        x_shape = (1,) + prediction_dataset[0][0].shape
+        LOGGER.info(f"Chunk {chunk_idx} initialized with data shape {x_shape}.")
+        yield df, prediction_dataset, x_shape
+
+
+def get_input_shape_from_first_chunk(csv_path: str, chunk_size: int = 10000):
+    """
+    Get the input shape from the first valid chunk of a CSV file.
+
+    :param csv_path: Path to the CSV file containing raw peptide sequences.
+    :param chunk_size: Number of rows to load per chunk.
+    :return: x_shape for model initialization.
+    """
+    for _, dataset_chunk, x_shape in data_initialize_chunked(
+        csv_path=csv_path, chunk_size=chunk_size
+    ):
+        LOGGER.info(f"Detected input shape from first valid chunk: {x_shape}")
+        return x_shape
+
+    LOGGER.error("No valid chunks found in the input file.")
+    raise ValueError("No valid chunks found in the input file.")
\ No newline at end of file
diff --git a/ideeplc/fine_tuning.py b/ideeplc/fine_tuning.py
index 70f0588..dcda916 100644
--- a/ideeplc/fine_tuning.py
+++ b/ideeplc/fine_tuning.py
@@ -24,6 +24,8 @@ def __init__(
         validation_data=None,
         validation_split=0.1,
         patience=5,
+        num_workers=0,
+        pin_memory=False,
     ):
         """
         Initialize the fine-tuner with the model and data loaders.
@@ -38,6 +40,8 @@ def __init__(
         :param validation_data: Optional validation dataset.
         :param validation_split: Fraction of training data to use for validation.
         :param patience: Number of epochs with no improvement after which training will be stopped.
+        :param num_workers: Number of workers for the DataLoader.
+        :param pin_memory: Whether to pin memory in the DataLoader.
         """
         self.model = model.to(device)
         self.train_data = train_data
@@ -49,6 +53,8 @@ def __init__(
         self.validation_data = validation_data
         self.validation_split = validation_split
         self.patience = patience
+        self.num_workers = num_workers
+        self.pin_memory = pin_memory
 
     def _freeze_layers(self, layers_to_freeze):
         """
@@ -71,34 +77,52 @@ def prepare_data(self, data, shuffle=True):
         :param shuffle: Whether to shuffle the data.
         :return: DataLoader for the dataset.
         """
-        return DataLoader(data, batch_size=self.batch_size, shuffle=shuffle)
+        return DataLoader(
+            data,
+            batch_size=self.batch_size,
+            shuffle=shuffle,
+            num_workers=self.num_workers,
+            pin_memory=self.pin_memory,
+        )
 
     def fine_tune(self, layers_to_freeze=None):
         """
         Fine-tune the iDeepLC model on the training dataset.
 
         :param layers_to_freeze: List of layer names to freeze during fine-tuning.
+        :return: Best model based on validation loss.
         """
         LOGGER.info("Starting fine-tuning...")
+
         if layers_to_freeze:
             self._freeze_layers(layers_to_freeze)
 
-        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
+        optimizer = torch.optim.Adam(
+            filter(lambda p: p.requires_grad, self.model.parameters()),
+            lr=self.learning_rate,
+        )
         loss_fn = self.loss_function
-        # Prepare DataLoader
-        if self.validation_data:
-            dataloader_train = self.prepare_data(self.train_data)
+
+        if self.validation_data is not None:
+            dataloader_train = self.prepare_data(self.train_data, shuffle=True)
             dataloader_val = self.prepare_data(self.validation_data, shuffle=False)
         else:
-            # Split the training data into training and validation sets
             train_size = int((1 - self.validation_split) * len(self.train_data))
             val_size = len(self.train_data) - train_size
+
+            if train_size == 0 or val_size == 0:
+                raise ValueError(
+                    "Training dataset is too small for the requested validation split."
+                )
+
             train_dataset, val_dataset = torch.utils.data.random_split(
                 self.train_data, [train_size, val_size]
             )
-            dataloader_train = self.prepare_data(train_dataset)
+            dataloader_train = self.prepare_data(train_dataset, shuffle=True)
             dataloader_val = self.prepare_data(val_dataset, shuffle=False)
+
         LOGGER.info(f"Training on {len(dataloader_train.dataset)} samples.")
+        LOGGER.info(f"Validating on {len(dataloader_val.dataset)} samples.")
 
         best_model = copy.deepcopy(self.model)
         best_loss = float("inf")
@@ -107,15 +131,15 @@ def fine_tune(self, layers_to_freeze=None):
         for epoch in range(self.epochs):
             self.model.train()
             running_loss = 0.0
+
             for batch in dataloader_train:
                 inputs, target = batch
-                inputs, target = inputs.to(self.device), target.to(self.device)
+                inputs = inputs.to(self.device, non_blocking=True)
+                target = target.to(self.device, non_blocking=True)
 
-                # Forward pass
                 outputs = self.model(inputs.float())
                 loss = loss_fn(outputs, target.float().view(-1, 1))
 
-                # Backward pass and optimization
                 optimizer.zero_grad()
                 loss.backward()
                 optimizer.step()
@@ -125,22 +149,24 @@ def fine_tune(self, layers_to_freeze=None):
             avg_loss = running_loss / len(dataloader_train.dataset)
             LOGGER.info(f"Epoch [{epoch + 1}/{self.epochs}], Loss: {avg_loss:.4f}")
 
-            # Validate the model after each epoch
-            if dataloader_val:
-                val_loss, _, _, _ = validate(
-                    self.model, dataloader_val, loss_fn, self.device
+            val_loss, _, _, _ = validate(
+                self.model, dataloader_val, loss_fn, self.device
+            )
+
+            if val_loss < best_loss:
+                best_loss = val_loss
+                best_model = copy.deepcopy(self.model)
+                patience_counter = 0
+                LOGGER.info(f"New best validation loss: {best_loss:.4f}")
+            else:
+                patience_counter += 1
+                LOGGER.info(
+                    f"No improvement in validation loss. Patience: {patience_counter}/{self.patience}"
                 )
-                if val_loss < best_loss:
-                    best_loss = val_loss
-                    best_model = copy.deepcopy(self.model)
-                    patience_counter = 0
-                    LOGGER.info(f"New best validation loss: {best_loss:.4f}")
-                else:
-                    patience_counter += 1
-
-                if patience_counter >= self.patience:
-                    LOGGER.info("Early stopping triggered.")
-                    break
+
+            if patience_counter >= self.patience:
+                LOGGER.info("Early stopping triggered.")
+                break
 
         LOGGER.info("Fine-tuning complete.")
-        return best_model
+        return best_model
\ No newline at end of file
diff --git a/ideeplc/ideeplc_core.py b/ideeplc/ideeplc_core.py
index 958f2b2..4639183 100644
--- a/ideeplc/ideeplc_core.py
+++ b/ideeplc/ideeplc_core.py
@@ -4,10 +4,9 @@
 from pathlib import Path
 import sys
 from torch import nn
-from torch.utils.data import DataLoader
 from ideeplc.model import MyNet
 from ideeplc.config import get_config
-from ideeplc.data_initialize import data_initialize
+from ideeplc.data_initialize import data_initialize, get_input_shape_from_first_chunk
 from ideeplc.predict import predict
 from ideeplc.figure import make_figures
 from ideeplc.fine_tuning import iDeepLCFineTuner
@@ -39,9 +38,7 @@ def get_model_save_path():
     Determines the correct directory and filename for saving the model.
     Appends a timestamp to the filename to prevent overwriting.
 
-
-    Returns:
-        tuple: (model_save_path, model_dir)
+    :return: Tuple containing model_save_path and model_dir.
     """
     timestamp = datetime.datetime.now().strftime("%m%d")
     model_dir = Path("ideeplc/models") / timestamp
@@ -62,19 +59,25 @@ def main(args):
     """
     Main function that executes training/evaluation for the iDeepLC package based on the provided arguments.
 
-    Args:
-        args (argparse.Namespace): Parsed arguments from the CLI.
+    :param args: Parsed arguments from the CLI.
     """
-
     LOGGER.info("Starting iDeepLC prediction...")
+
     try:
         # Load configuration
         config = get_config()
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        chunk_size = config.get("chunk_size", 10000)
+        batch_size = config["batch_size"]
 
-        # Initialize data
+        LOGGER.info(f"Using device: {device}")
         LOGGER.info(f"Loading data from {args.input}")
-        matrix_input, x_shape = data_initialize(csv_path=args.input)
+
+        # For model initialization, only inspect the first valid chunk
+        x_shape = get_input_shape_from_first_chunk(
+            csv_path=args.input, chunk_size=chunk_size
+        )
+
         # Initialize model
         LOGGER.info("Initializing model")
         model = MyNet(x_shape=x_shape, config=config).to(device)
@@ -84,11 +87,12 @@ def main(args):
         pretrained_model, model_dir = get_model_save_path()
         if args.model:
             try:
-                logging.info(f"Using user-specified model path: {args.model}")
+                LOGGER.info(f"Using user-specified model path: {args.model}")
                 pretrained_model = Path(args.model)
             except Exception as e:
                 LOGGER.error(f"Invalid model path provided: {e}")
                 raise e
+
         model.load_state_dict(
             torch.load(pretrained_model, map_location=device), strict=False
         )
@@ -96,6 +100,9 @@ def main(args):
 
         if args.finetune:
             LOGGER.info("Fine-tuning the model")
+
+            matrix_input, _ = data_initialize(csv_path=args.input)
+
             fine_tuner = iDeepLCFineTuner(
                 model=model,
                 train_data=matrix_input,
@@ -103,28 +110,28 @@ def main(args):
                 device=device,
                 learning_rate=config["learning_rate"],
                 epochs=config["epochs"],
-                batch_size=config["batch_size"],
-                validation_data=None,  # No validation data provided for prediction
+                batch_size=batch_size,
+                validation_data=None,
                 validation_split=0.1,
-                patience=5,
+                patience=20,
             )
             model = fine_tuner.fine_tune(layers_to_freeze=config["layers_to_freeze"])
+            torch.save(model.state_dict(), "finetuned_model_.pth")
 
-        dataloader_input = DataLoader(
-            matrix_input, batch_size=config["batch_size"], shuffle=False
-        )
         # Prediction on provided data
         LOGGER.info("Starting prediction")
         pred_loss, pred_cor, pred_results, ground_truth = predict(
             model=model,
-            dataloader_input=dataloader_input,
             loss_fn=loss_function,
             device=device,
             calibrate=args.calibrate,
             input_file=args.input,
             save_results=args.save,
+            batch_size=batch_size,
+            chunk_size=chunk_size,
         )
         LOGGER.info("Prediction completed.")
+
         # Generate Figures
         make_figures(
             predictions=pred_results,
@@ -137,4 +144,4 @@ def main(args):
 
     except Exception as e:
         LOGGER.error(f"An error occurred during execution: {e}")
-        raise e
+        raise e
\ No newline at end of file
diff --git a/ideeplc/model.py b/ideeplc/model.py
index e86c6a5..f3f7d94 100644
--- a/ideeplc/model.py
+++ b/ideeplc/model.py
@@ -6,6 +6,7 @@
 class MyNet(nn.Module):
     def __init__(self, x_shape, config):
         self.x_shape = x_shape
+        self.seq_len = self.x_shape[-1]
         super(MyNet, self).__init__()
 
         #  first cnn layers for amino
@@ -103,7 +104,7 @@ def __init__(self, x_shape, config):
         self.l4 = nn.ModuleList()
 
         self.l4.append(
-            nn.Linear(in_features=self.x_shape[2], out_features=config["fc_out"])
+            nn.Linear(in_features=self.seq_len, out_features=config["fc_out"])
         )
         self.l4.append(nn.ReLU())
 
@@ -157,9 +158,9 @@ def __init__(self, x_shape, config):
                         + config["cnn3_channels"]
                         + config["cnn4_channels"]
                     )
-                    * self.x_shape[2]
+                    * self.seq_len
                     + config["fc_out"] * 7
-                    + config["cnn2_channels"] * int(self.x_shape[2] / 2)
+                    + config["cnn2_channels"] * int(self.seq_len / 2)
                 ),
                 out_features=int(config["fc2_out"]),
             )
@@ -196,7 +197,7 @@ def __init__(self, x_shape, config):
 
     def forward(self, x):
         x1 = x[:, :1, :]
-        x2 = x[:, 1:8, : int(self.x_shape[2] / 2)]
+        x2 = x[:, 1:8, : int(self.seq_len / 2)]
         x3 = x[:, 8:14, :]
         x4 = x[:, 14:21, :]
         x5 = x[:, 21:, :]
diff --git a/ideeplc/predict.py b/ideeplc/predict.py
index bc90455..6ca01bb 100644
--- a/ideeplc/predict.py
+++ b/ideeplc/predict.py
@@ -1,13 +1,14 @@
 import os
 from pathlib import Path
-from typing import Tuple
 import datetime
+from typing import Tuple
 import numpy as np
 import pandas as pd
 import torch
 from torch import nn
 from torch.utils.data import DataLoader
 from ideeplc.calibrate import SplineTransformerCalibration
+from ideeplc.data_initialize import data_initialize_chunked
 import logging
 
 LOGGER = logging.getLogger(__name__)
@@ -18,6 +19,7 @@ def validate(
 ) -> Tuple[float, float, list, list]:
     """
     Validate the model on a given dataset.
+
     :param model: The trained model.
     :param dataloader: The DataLoader providing the validation/test data.
     :param loss_fn: The loss function to use.
@@ -31,9 +33,9 @@ def validate(
 
     with torch.no_grad():
         for inputs, labels in dataloader:
-            inputs, labels = inputs.to(device, non_blocking=True), labels.to(
-                device, non_blocking=True
-            )
+            inputs = inputs.to(device, non_blocking=True)
+            labels = labels.to(device, non_blocking=True)
+
             outputs_batch = model(inputs.float())
             loss = loss_fn(outputs_batch, labels.float().view(-1, 1))
 
@@ -42,7 +44,12 @@ def validate(
             ground_truth.extend(labels.cpu().numpy().flatten())
 
     avg_loss = total_loss / len(dataloader.dataset)
-    correlation = np.corrcoef(predictions, ground_truth)[0, 1]
+
+    if len(predictions) > 1 and len(ground_truth) > 1:
+        correlation = np.corrcoef(predictions, ground_truth)[0, 1]
+    else:
+        correlation = np.nan
+
     LOGGER.info(
         f"Validation complete. Loss: {avg_loss:.4f}, Correlation: {correlation:.4f}"
     )
@@ -52,84 +59,173 @@ def validate(
 
 def predict(
     model: nn.Module,
-    dataloader_input: DataLoader,
     loss_fn: nn.Module,
     device: torch.device,
     input_file: str,
     calibrate: bool,
     save_results: bool,
+    batch_size: int = None,
+    chunk_size: int = 10000,
+    dataloader_input: DataLoader = None,
 ):
     """
-    Load a trained model and evaluate it on test datasets.
+    Load a trained model and evaluate it on test datasets in chunks.
 
     :param model: The trained model.
-    :param dataloader_input: Test dataset loader.
     :param loss_fn: Loss function.
     :param device: Computation device.
     :param input_file: Path to the input file containing peptide sequences.
     :param calibrate: If True, calibrates the results.
     :param save_results: If True, saves the evaluation results.
+    :param batch_size: Batch size used during prediction when reading from file.
+    :param chunk_size: Number of input rows to process per chunk.
+    :param dataloader_input: Optional prebuilt DataLoader for backward compatibility.
     :return: Loss, correlation, predictions, and ground truth values.
     """
-    LOGGER.info("Starting prediction process.")
+    LOGGER.info(
+        f"Starting prediction process with batch size {batch_size} and chunk size {chunk_size}."
+    )
+
+    all_predictions = []
+    all_ground_truth = []
+    total_loss = 0.0
+    total_samples = 0
+
+    calibrated_preds = None
+
+    timestamp = datetime.datetime.now().strftime("%Y%m%d")
+    input_file_name = os.path.splitext(os.path.basename(input_file))[0]
+    output_path = (
+        Path("ideeplc_output") / f"{input_file_name}_predictions_{timestamp}.csv"
+    )
 
     try:
-        # Validate on the primary test set
-        loss, correlation, predictions, ground_truth = validate(
-            model, dataloader_input, loss_fn, device
-        )
+        if dataloader_input is not None:
+            LOGGER.info("Using provided dataloader_input for prediction.")
+            loss, correlation, all_predictions, all_ground_truth = validate(
+                model=model,
+                dataloader=dataloader_input,
+                loss_fn=loss_fn,
+                device=device,
+            )
+
+            if calibrate:
+                LOGGER.info("Fitting calibration model.")
+                calibration_model = SplineTransformerCalibration()
+                calibration_model.fit(all_ground_truth, all_predictions)
+                calibrated_preds = calibration_model.transform(all_predictions)
+
+                if len(calibrated_preds) > 1 and len(all_ground_truth) > 1:
+                    correlation = np.corrcoef(calibrated_preds, all_ground_truth)[0, 1]
+                else:
+                    correlation = np.nan
+
+                loss_calibrated = loss_fn(
+                    torch.tensor(calibrated_preds).float().view(-1, 1),
+                    torch.tensor(all_ground_truth).float().view(-1, 1),
+                )
+                loss = loss_calibrated.item()
+                return loss, correlation, calibrated_preds, all_ground_truth
+
+            return loss, correlation, all_predictions, all_ground_truth
+
+        if batch_size is None:
+            raise ValueError("batch_size must be provided when dataloader_input is not used.")
+
+        if save_results:
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            if output_path.exists():
+                output_path.unlink()
+
+        for chunk_idx, (df_chunk, dataset_chunk, x_shape) in enumerate(
+            data_initialize_chunked(csv_path=input_file, chunk_size=chunk_size),
+            start=1,
+        ):
+            LOGGER.info(
+                f"Processing chunk {chunk_idx} with {len(dataset_chunk)} entries and shape {x_shape}."
+            )
+
+            dataloader_input = DataLoader(
+                dataset_chunk,
+                batch_size=batch_size,
+                shuffle=False,
+            )
+
+            chunk_loss, _, chunk_predictions, chunk_ground_truth = validate(
+                model=model,
+                dataloader=dataloader_input,
+                loss_fn=loss_fn,
+                device=device,
+            )
+
+            n_chunk = len(dataset_chunk)
+            total_loss += chunk_loss * n_chunk
+            total_samples += n_chunk
+
+            all_predictions.extend(chunk_predictions)
+            all_ground_truth.extend(chunk_ground_truth)
+
+            if save_results:
+                result_data = {
+                    "sequences": df_chunk.get("seq", None),
+                    "modifications": df_chunk.get("modifications", None),
+                    "ground_truth": chunk_ground_truth,
+                    "predictions": chunk_predictions,
+                }
+
+                result_df = pd.DataFrame(result_data)
+                result_df.to_csv(
+                    output_path,
+                    mode="a",
+                    index=False,
+                    header=not output_path.exists(),
+                )
+                LOGGER.info(f"Chunk {chunk_idx} results appended to {output_path}")
+
+        if total_samples == 0:
+            LOGGER.error("No valid samples were processed during prediction.")
+            raise ValueError("No valid samples were processed during prediction.")
+
+        loss = total_loss / total_samples
+
+        if len(all_predictions) > 1 and len(all_ground_truth) > 1:
+            correlation = np.corrcoef(all_predictions, all_ground_truth)[0, 1]
+        else:
+            correlation = np.nan
 
         if calibrate:
             LOGGER.info("Fitting calibration model.")
             calibration_model = SplineTransformerCalibration()
-            calibration_model.fit(ground_truth, predictions)
-            calibrated_preds = calibration_model.transform(predictions)
-            correlation_preds = np.corrcoef(calibrated_preds, ground_truth)[0, 1]
+            calibration_model.fit(all_ground_truth, all_predictions)
+            calibrated_preds = calibration_model.transform(all_predictions)
+
+            if len(calibrated_preds) > 1 and len(all_ground_truth) > 1:
+                correlation = np.corrcoef(calibrated_preds, all_ground_truth)[0, 1]
+            else:
+                correlation = np.nan
 
             loss_calibrated = loss_fn(
                 torch.tensor(calibrated_preds).float().view(-1, 1),
-                torch.tensor(ground_truth).float().view(-1, 1),
+                torch.tensor(all_ground_truth).float().view(-1, 1),
             )
             LOGGER.info(f"Calibration Loss: {loss_calibrated.item():.4f}")
             loss = loss_calibrated.item()
-            correlation = correlation_preds
-        # Save results
-        if save_results:
-            input_df = pd.read_csv(input_file)
 
-            # Extract sequences and modifications from the input file
-            sequences = input_df.get("seq", None)
-            modifications = input_df.get("modifications", None)
-
-            timestamp = datetime.datetime.now().strftime("%Y%m%d")
-            input_file_name = os.path.splitext(os.path.basename(input_file))[0]
-            output_path = (
-                Path("ideeplc_output")
-                / f"{input_file_name}_predictions_{timestamp}.csv"
-            )
-            output_path.parent.mkdir(
-                parents=True, exist_ok=True
-            )  # Ensure the directory exists
+            if save_results:
+                saved_df = pd.read_csv(output_path)
+                saved_df["calibrated_predictions"] = calibrated_preds
+                saved_df.to_csv(output_path, index=False)
+                LOGGER.info(f"Calibrated results saved to {output_path}")
 
-            result_data = {
-                "sequences": sequences,
-                "modifications": modifications,
-                "ground_truth": ground_truth,
-                "predictions": predictions,
-            }
-
-            if calibrate:
-                result_data["calibrated_predictions"] = calibrated_preds
-
-            result_df = pd.DataFrame(result_data)
-            result_df.to_csv(output_path, index=False)
-            LOGGER.info(f"Results saved to {output_path}")
+        LOGGER.info(
+            f"Prediction complete. Loss: {loss:.4f}, Correlation: {correlation:.4f}"
+        )
 
         if calibrate:
-            return loss, correlation, calibrated_preds, ground_truth
+            return loss, correlation, calibrated_preds, all_ground_truth
         else:
-            return loss, correlation, predictions, ground_truth
+            return loss, correlation, all_predictions, all_ground_truth
 
     except Exception as e:
         LOGGER.error(f"An error occurred during prediction: {e}")
-        raise e
+        raise e
\ No newline at end of file
diff --git a/ideeplc/utilities.py b/ideeplc/utilities.py
index aecc7ea..3f7e08a 100644
--- a/ideeplc/utilities.py
+++ b/ideeplc/utilities.py
@@ -133,19 +133,51 @@ def encode_sequence_and_modification(
         if mods:
             for mod in mods:
                 name = mod.name
+
+                if name not in modifications_dict:
+                    raise KeyError(
+                        f"Modification '{name}' was not found in the modification feature dictionary."
+                    )
+                if aa not in modifications_dict[name]:
+                    raise KeyError(
+                        f"Modification '{name}' is not defined for amino acid '{aa}'."
+                    )
+
                 encoded[start_index : start_index + config.num_features, loc + 1] = (
                     list(modifications_dict[name][aa].values())
                 )
+
         if n_term:
             for mod in n_term:
                 name = f"{mod.name}(N-T)"
+
+                if name not in modifications_dict:
+                    raise KeyError(
+                        f"N-terminal modification '{name}' was not found in the modification feature dictionary."
+                    )
+                if sequence[0] not in modifications_dict[name]:
+                    raise KeyError(
+                        f"N-terminal modification '{name}' is not defined for amino acid '{sequence[0]}'."
+                    )
+
                 encoded[start_index : start_index + config.num_features, 1] = list(
                     modifications_dict[name][sequence[0]].values()
                 )
+
         if c_term:
             loc = len(parsed_sequence) + 1
             for mod in c_term:
                 name = mod.name
+
+                if name not in modifications_dict:
+                    raise KeyError(
+                        f"C-terminal modification '{name}' was not found in the modification feature dictionary."
+                    )
+                if sequence[-1] not in modifications_dict[name]:
+                    raise KeyError(
+                        f"C-terminal modification '{name}' is not defined for amino acid '{sequence[-1]}'."
+                    )
+
                 encoded[start_index : start_index + config.num_features, loc] = list(
                     modifications_dict[name][sequence[-1]].values()
                 )
@@ -272,7 +304,7 @@ def encode_sequence_one_hot(sequence: str) -> np.ndarray:
 def df_to_matrix(
     seqs: Union[str, List[str]], df: Optional[pd.DataFrame] = None
 ) -> (
-    tuple[ndarray, list[Any], list[list[str | list[str] | int | Exception]]]
+    tuple[ndarray, list[Any], list[list[str | list[str] | int | str | Exception]]]
     | ndarray
     | Any
 ):
@@ -280,30 +312,14 @@ def df_to_matrix(
     Convert a peptide or a list of peptides to their matrix representation.
 
     If a DataFrame with 'tr' and 'predictions' columns is also provided,
-    the function returns a tuple: (matrix, tr_values, prediction_values, errors).
+    the function returns a tuple: (matrix, tr_values, errors).
     Otherwise, it returns only the encoded matrix representation.
 
-    Parameters
-    ----------
-    seqs : str or List[str]
-        A single peptide sequence or a list of peptide sequences.
-    df : pd.DataFrame, optional
-        DataFrame with 'tr' and 'predictions' columns. Must align with the list of sequences.
-
-    Returns
-    -------
-    If `seqs` is a string:
-        np.ndarray : matrix representation of the peptide.
-    If `seqs` is a list and `df` is None:
-        np.ndarray : stacked matrix representation of all peptides.
-    If `seqs` is a list and `df` is given:
-        Tuple[np.ndarray, List[float], List] :
-            - stacked matrix
-            - list of tr values
-            - list of errors (with peptide, index, and exception)
+    :param seqs: A single peptide sequence or a list of peptide sequences.
+    :param df: DataFrame with 'tr' column. Must align with the list of sequences.
+    :return: Encoded matrix representation, and optionally tr values and errors.
     """
 
-    # Normalize to list of sequences
     single_input = isinstance(seqs, str)
     if single_input:
         seqs = [seqs]
@@ -317,10 +333,12 @@ def df_to_matrix(
 
     for idx, peptide in enumerate(seqs):
         try:
+            step = "peptide_parser"
             parsed_sequence, modifiers, sequence, modifications = peptide_parser(
                 peptide
             )
 
+            step = "encode_sequence_and_modification"
             encode_seq_mod = encode_sequence_and_modification(
                 sequence,
                 parsed_sequence,
@@ -329,10 +347,13 @@ def df_to_matrix(
                 modifiers["n_term"],
                 modifiers["c_term"],
             )
+
+            step = "encode_diamino_sequence_and_modification"
             encode_di_seq_mod = encode_diamino_sequence_and_modification(
                 encode_seq_mod=encode_seq_mod
             )
 
+            step = "encode_sequence_and_modification_atomic"
             encode_seq_mod_atomic = encode_sequence_and_modification_atomic(
                 sequence,
                 parsed_sequence,
@@ -340,13 +361,19 @@ def df_to_matrix(
                 modifiers["n_term"],
                 modifiers["c_term"],
             )
+
+            step = "encode_diamino_sequence_and_modification_atomic"
             encode_di_seq_mod_atomic = encode_diamino_sequence_and_modification_atomic(
                 encode_seq_mod_atomic
             )
 
+            step = "encode_sequence_metadata"
             encode_seq_meta = encode_sequence_metadata(sequence, encode_seq_mod_atomic)
+
+            step = "encode_sequence_one_hot"
             seq_hot = encode_sequence_one_hot(sequence)
 
+            step = "combine_encoded_features"
             peptide_encoded = (
                 encode_seq_mod
                 + encode_di_seq_mod
@@ -362,10 +389,28 @@ def df_to_matrix(
                 tr.append(df["tr"].iloc[idx])
 
         except Exception as e:
-            errors.append([peptide, idx, e])
+            errors.append([peptide, idx, step, e])
             continue
 
+    if len(seqs_encoded) == 0:
+        if errors:
+            first_error = errors[0]
+            raise ValueError(
+                f"All peptides failed during encoding. First failing peptide: '{first_error[0]}' at index {first_error[1]}. Failing step: {first_error[2]}. Original error: {first_error[3]}"
+            )
+        raise ValueError(
+            "All peptides failed during encoding. No encoded arrays were created."
+        )
+
     seqs_stack = np.stack(seqs_encoded)
+
+    if errors:
+        print("Encoding errors encountered:")
+        for peptide, idx, step, err in errors[:10]:
+            print(
+                f" - index {idx}, step {step}, peptide '{peptide}': {err}"
+            )
+
     if single_input:
         return seqs_stack[0]
     elif df is not None: