From 122a59d0852d5f66569f50cb5d8d233b6c519ad6 Mon Sep 17 00:00:00 2001
From: hieuddo <hieu.dd.1998@gmail.com>
Date: Thu, 21 May 2026 15:37:44 +0800
Subject: [PATCH 01/10] feat: refine session-based recommendation framework
 with models including FPMC, BERT4Rec, SASRec, GPT2Rec, and updated GRU4Rec

---
 cornac/models/__init__.py                |   5 +
 cornac/models/bert4rec/__init__.py       |  16 +
 cornac/models/bert4rec/bert4rec.py       | 117 +++++++
 cornac/models/bert4rec/recom_bert4rec.py | 192 +++++++++++
 cornac/models/fpmc/__init__.py           |  16 +
 cornac/models/fpmc/fpmc.py               |  84 +++++
 cornac/models/fpmc/recom_fpmc.py         | 212 ++++++++++++
 cornac/models/gpt2rec/__init__.py        |  16 +
 cornac/models/gpt2rec/gpt2rec.py         | 112 +++++++
 cornac/models/gpt2rec/recom_gpt2rec.py   | 186 +++++++++++
 cornac/models/gru4rec/gru4rec.py         | 374 ++++++----------------
 cornac/models/gru4rec/recom_gru4rec.py   | 214 +++++++++----
 cornac/models/recommender.py             |  72 +++++
 cornac/models/sasrec/__init__.py         |  16 +
 cornac/models/sasrec/recom_sasrec.py     | 246 ++++++++++++++
 cornac/models/sasrec/sasrec.py           | 186 +++++++++++
 cornac/models/seq_utils/__init__.py      |  69 ++++
 cornac/models/seq_utils/iterators.py     | 390 +++++++++++++++++++++++
 cornac/models/seq_utils/losses.py        | 125 ++++++++
 cornac/models/seq_utils/optim.py         |  93 ++++++
 20 files changed, 2408 insertions(+), 333 deletions(-)
 create mode 100644 cornac/models/bert4rec/__init__.py
 create mode 100644 cornac/models/bert4rec/bert4rec.py
 create mode 100644 cornac/models/bert4rec/recom_bert4rec.py
 create mode 100644 cornac/models/fpmc/__init__.py
 create mode 100644 cornac/models/fpmc/fpmc.py
 create mode 100644 cornac/models/fpmc/recom_fpmc.py
 create mode 100644 cornac/models/gpt2rec/__init__.py
 create mode 100644 cornac/models/gpt2rec/gpt2rec.py
 create mode 100644 cornac/models/gpt2rec/recom_gpt2rec.py
 create mode 100644 cornac/models/sasrec/__init__.py
 create mode 100644 cornac/models/sasrec/recom_sasrec.py
 create mode 100644 cornac/models/sasrec/sasrec.py
 create mode 100644 cornac/models/seq_utils/__init__.py
 create mode 100644 cornac/models/seq_utils/iterators.py
 create mode 100644 cornac/models/seq_utils/losses.py
 create mode 100644 cornac/models/seq_utils/optim.py

diff --git a/cornac/models/__init__.py b/cornac/models/__init__.py
index 3451261ae..643b25258 100644
--- a/cornac/models/__init__.py
+++ b/cornac/models/__init__.py
@@ -16,6 +16,7 @@
 from .recommender import Recommender
 from .recommender import NextBasketRecommender
 from .recommender import NextItemRecommender
+from .recommender import SequentialRecommender
 
 from .amr import AMR
 from .ann import AnnoyANN
@@ -24,6 +25,7 @@
 from .ann import ScaNNANN
 from .baseline_only import BaselineOnly
 from .beacon import Beacon
+from .bert4rec import BERT4Rec
 from .bivaecf import BiVAECF
 from .bpr import BPR
 from .bpr import WBPR
@@ -45,9 +47,11 @@
 from .ease import EASE
 from .efm import EFM
 from .fm import FM
+from .fpmc import FPMC
 from .gcmc import GCMC
 from .global_avg import GlobalAvg
 from .gp_top import GPTop
+from .gpt2rec import GPT2Rec
 from .gru4rec import GRU4Rec
 from .hft import HFT
 from .hpf import HPF
@@ -74,6 +78,7 @@
 from .pmf import PMF
 from .recvae import RecVAE
 from .sansa import SANSA
+from .sasrec import SASRec
 from .sbpr import SBPR
 from .skm import SKMeans
 from .sorec import SoRec
diff --git a/cornac/models/bert4rec/__init__.py b/cornac/models/bert4rec/__init__.py
new file mode 100644
index 000000000..af46ce60a
--- /dev/null
+++ b/cornac/models/bert4rec/__init__.py
@@ -0,0 +1,16 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+from .recom_bert4rec import BERT4Rec
diff --git a/cornac/models/bert4rec/bert4rec.py b/cornac/models/bert4rec/bert4rec.py
new file mode 100644
index 000000000..5fc1d77c2
--- /dev/null
+++ b/cornac/models/bert4rec/bert4rec.py
@@ -0,0 +1,117 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import torch
+import torch.nn as nn
+
+
+class BERT4RecModel(nn.Module):
+    """BERT4Rec: bidirectional transformer encoder for sequence rec.
+
+    Uses HuggingFace's :class:`~transformers.BertModel` as the backbone. The
+    sequence-final hidden state is used to score candidate items via the dot
+    product with their output embeddings (separate "head" linear or tied to
+    ``item_emb``).
+
+    Returns ``(B, B+N)`` score matrices when called as
+    ``forward(_, hist_iids, out_iids, return_hidden=False)``.
+    """
+
+    def __init__(
+        self,
+        item_num,
+        embedding_dim=100,
+        maxlen=20,
+        n_layers=2,
+        n_heads=1,
+        dropout=0.1,
+        pad_idx=-1,
+        tie_weights=False,
+        init_std=0.02,
+        device="cpu",
+    ):
+        super().__init__()
+        from transformers.models.bert import BertConfig, BertModel
+
+        self.item_num = item_num
+        self.pad_idx = pad_idx if pad_idx >= 0 else item_num
+        self.maxlen = maxlen
+        self.dev = device
+        self.init_std = init_std
+        self.tie_weights = tie_weights
+
+        config = BertConfig(
+            vocab_size=item_num + 1,
+            hidden_size=embedding_dim,
+            num_hidden_layers=n_layers,
+            num_attention_heads=n_heads,
+            intermediate_size=embedding_dim * 4,
+            hidden_act="gelu",
+            hidden_dropout_prob=dropout,
+            attention_probs_dropout_prob=dropout,
+            max_position_embeddings=maxlen + 1,
+            initializer_range=init_std,
+            pad_token_id=self.pad_idx,
+            layer_norm_eps=1e-12,
+            use_cache=False,
+        )
+
+        self.item_emb = nn.Embedding(
+            num_embeddings=item_num + 1,
+            embedding_dim=embedding_dim,
+            padding_idx=self.pad_idx,
+        )
+        self.transformer_model = BertModel(config)
+        self.item_biases = nn.Embedding(item_num + 1, 1, padding_idx=self.pad_idx)
+        self._init_weights()
+        self.to(device)
+
+    def _init_weights(self):
+        self.item_emb.weight.data.normal_(mean=0.0, std=self.init_std)
+        self.item_emb.weight.data[self.pad_idx].zero_()
+        self.item_biases.weight.data.zero_()
+
+    def _encode(self, hist_iids):
+        attention_mask = (hist_iids != self.pad_idx).long()
+        embeds = self.item_emb(hist_iids)
+        out = self.transformer_model(
+            inputs_embeds=embeds, attention_mask=attention_mask
+        )
+        return out.last_hidden_state[:, -1, :]
+
+    def forward(self, user_ids, hist_iids, out_iids, return_hidden=False):
+        hidden = self._encode(hist_iids)
+        item_e = self.item_emb(out_iids)
+        bias = self.item_biases(out_iids)
+        if return_hidden:
+            return hidden, item_e, bias
+        scores = torch.mm(hidden, item_e.T) + bias.T
+        return scores
+
+    @torch.no_grad()
+    def predict(self, user_ids, log_seqs, item_indices=None):
+        if item_indices is None:
+            item_indices = torch.arange(self.item_num, device=self.dev)
+        else:
+            item_indices = torch.as_tensor(
+                item_indices, dtype=torch.long, device=self.dev
+            )
+        if not isinstance(log_seqs, torch.Tensor):
+            log_seqs = torch.as_tensor(log_seqs, dtype=torch.long, device=self.dev)
+        hidden = self._encode(log_seqs)
+        item_e = self.item_emb(item_indices)
+        bias = self.item_biases(item_indices)
+        scores = torch.mm(hidden, item_e.T) + bias.T
+        return scores.squeeze().detach().cpu().numpy()
diff --git a/cornac/models/bert4rec/recom_bert4rec.py b/cornac/models/bert4rec/recom_bert4rec.py
new file mode 100644
index 000000000..553c1181f
--- /dev/null
+++ b/cornac/models/bert4rec/recom_bert4rec.py
@@ -0,0 +1,192 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+from tqdm.auto import trange
+
+from ...utils import get_rng
+from ..recommender import SequentialRecommender
+from ..seq_utils import session_seq_iter, user_seq_iter
+
+SUPPORTED_LOSSES = (
+    "bce",
+    "ce",
+    "bpr",
+    "bpr-max",
+    "softmax",
+    "cross-entropy",
+    "xe_softmax",
+    "top1",
+)
+
+
+class BERT4Rec(SequentialRecommender):
+    """BERT4Rec: bidirectional transformer for sequential recommendation.
+
+    Parameters mirror :class:`cornac.models.SASRec`. See the SASRec docstring
+    for details about ``mode``, ``loss``, and other parameters.
+
+    References
+    ----------
+    Sun, Fei, et al. "BERT4Rec: Sequential recommendation with bidirectional encoder representations from transformer."
+    CIKM 2019.
+    """
+
+    def __init__(
+        self,
+        name="BERT4Rec",
+        embedding_dim=100,
+        mode="session-aware",
+        loss="ce",
+        batch_size=512,
+        learning_rate=0.001,
+        n_sample=2048,
+        sample_alpha=0.5,
+        n_epochs=10,
+        max_len=50,
+        num_blocks=2,
+        num_heads=1,
+        dropout=0.2,
+        l2_reg=0.0,
+        bpreg=1.0,
+        elu_param=0.5,
+        device="cpu",
+        trainable=True,
+        verbose=False,
+        seed=None,
+    ):
+        super().__init__(name, mode=mode, trainable=trainable, verbose=verbose)
+        if loss not in SUPPORTED_LOSSES:
+            raise ValueError(
+                f"loss='{loss}' not supported; choose from {SUPPORTED_LOSSES}"
+            )
+        self.embedding_dim = embedding_dim
+        self.loss = loss
+        self.batch_size = batch_size
+        self.learning_rate = learning_rate
+        self.n_sample = n_sample
+        self.sample_alpha = sample_alpha
+        self.n_epochs = n_epochs
+        self.max_len = max_len
+        self.num_blocks = num_blocks
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.l2_reg = l2_reg
+        self.bpreg = bpreg
+        self.elu_param = elu_param
+        self.device = device
+        self.seed = seed
+        self.rng = get_rng(seed)
+
+    def _build_data_iter(self, pad_index):
+        kwargs = dict(
+            train_set=self.train_set,
+            pad_index=pad_index,
+            batch_size=self.batch_size,
+            max_len=self.max_len,
+            n_sample=self.n_sample,
+            sample_alpha=self.sample_alpha,
+            rng=self.rng,
+            shuffle=True,
+        )
+        if self.mode == "session-aware":
+            return user_seq_iter(**kwargs)
+        return session_seq_iter(**kwargs)
+
+    def fit(self, train_set, val_set=None):
+        super().fit(train_set, val_set)
+        if not self.trainable:
+            return self
+
+        import torch
+
+        from .bert4rec import BERT4RecModel
+        from ..seq_utils.losses import get_loss_function
+
+        torch.manual_seed(self.seed if self.seed is not None else 0)
+
+        self.pad_idx = self.total_items
+        self.model = BERT4RecModel(
+            item_num=self.total_items,
+            embedding_dim=self.embedding_dim,
+            maxlen=self.max_len,
+            n_layers=self.num_blocks,
+            n_heads=self.num_heads,
+            dropout=self.dropout,
+            pad_idx=self.pad_idx,
+            device=self.device,
+        )
+
+        loss_fn = get_loss_function(self.loss)
+        loss_kwargs = dict(
+            bpreg=self.bpreg, elu_param=self.elu_param, n_sample=self.n_sample
+        )
+
+        opt = torch.optim.Adam(
+            self.model.parameters(), lr=self.learning_rate, betas=(0.9, 0.98)
+        )
+
+        progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose)
+        for _ in progress_bar:
+            self.model.train()
+            total_loss = 0.0
+            cnt = 0
+            for inc, (in_uids, hist_iids, out_iids) in enumerate(
+                self._build_data_iter(self.pad_idx)
+            ):
+                if len(hist_iids) < 2:
+                    continue
+                hist_iids_t = torch.tensor(
+                    hist_iids, dtype=torch.long, device=self.device, requires_grad=False
+                )
+                out_iids_t = torch.tensor(
+                    out_iids, dtype=torch.long, device=self.device, requires_grad=False
+                )
+
+                self.model.zero_grad()
+                item_scores = self.model(
+                    None, hist_iids_t, out_iids_t, return_hidden=False
+                )
+
+                L = loss_fn(
+                    item_scores,
+                    out_iids=out_iids_t,
+                    batch_size=len(hist_iids),
+                    **loss_kwargs,
+                )
+                if self.l2_reg > 0:
+                    for p in self.model.parameters():
+                        L = L + self.l2_reg * torch.norm(p)
+
+                L.backward()
+                opt.step()
+
+                total_loss += L.cpu().detach().numpy() * len(hist_iids)
+                cnt += len(hist_iids)
+                if inc % 10 == 0 and cnt > 0:
+                    progress_bar.set_postfix(loss=(total_loss / cnt))
+        return self
+
+    def score(self, user_idx, history_items, **kwargs):
+        import torch
+
+        flat = self._flatten_history(history_items)
+        if len(flat) == 0:
+            return np.ones(self.total_items, dtype="float")
+        log_seq = [self.pad_idx] * (self.max_len - len(flat)) + list(flat)
+        log_seq = log_seq[-self.max_len :]
+        log_seq_t = torch.tensor([log_seq], dtype=torch.long, device=self.device)
+        self.model.eval()
+        return self.model.predict(user_idx, log_seq_t)
diff --git a/cornac/models/fpmc/__init__.py b/cornac/models/fpmc/__init__.py
new file mode 100644
index 000000000..ca661be34
--- /dev/null
+++ b/cornac/models/fpmc/__init__.py
@@ -0,0 +1,16 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+from .recom_fpmc import FPMC
diff --git a/cornac/models/fpmc/fpmc.py b/cornac/models/fpmc/fpmc.py
new file mode 100644
index 000000000..2acb3ec76
--- /dev/null
+++ b/cornac/models/fpmc/fpmc.py
@@ -0,0 +1,84 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import torch
+import torch.nn as nn
+
+
+class FPMC_Model(nn.Module):
+    """Factorizing Personalized Markov Chains (Rendle et al., 2010).
+
+    Score for user ``u``, last item ``i``, candidate ``j``:
+
+        s(u, i, j) = <UI_u, IU_j> + <IL_j, LI_i>
+
+    Implemented to return ``(B, B+N)`` matrices when out_iids contains the
+    in-batch positives followed by ``N`` shared negatives.
+    """
+
+    def __init__(self, user_num, item_num, factor_num, pad_idx=None, device="cpu"):
+        super().__init__()
+        self.user_num = user_num
+        self.item_num = item_num
+        self.factor_num = factor_num
+        self.device = device
+        self.pad_idx = pad_idx if pad_idx is not None else item_num
+
+        # User-Item (target) embeddings
+        self.UI_emb = nn.Embedding(user_num + 1, factor_num, padding_idx=user_num)
+        # Item-User factor: scoring with UI
+        self.IU_emb = nn.Embedding(item_num + 1, factor_num, padding_idx=self.pad_idx)
+        # Last-Item factor (input)
+        self.LI_emb = nn.Embedding(item_num + 1, factor_num, padding_idx=self.pad_idx)
+        # Item-Last factor: scoring with LI
+        self.IL_emb = nn.Embedding(item_num + 1, factor_num, padding_idx=self.pad_idx)
+        self.item_biases = nn.Embedding(item_num + 1, 1, padding_idx=self.pad_idx)
+
+        nn.init.normal_(self.UI_emb.weight, std=0.01)
+        nn.init.normal_(self.IU_emb.weight, std=0.01)
+        nn.init.normal_(self.LI_emb.weight, std=0.01)
+        nn.init.normal_(self.IL_emb.weight, std=0.01)
+        nn.init.constant_(self.item_biases.weight, 0)
+        for emb in (
+            self.UI_emb,
+            self.IU_emb,
+            self.LI_emb,
+            self.IL_emb,
+            self.item_biases,
+        ):
+            if emb.padding_idx is not None:
+                emb.weight.data[emb.padding_idx].zero_()
+
+    def forward(self, in_uids, in_iids, out_iids):
+        last_item_emb = self.LI_emb(in_iids)  # (B, D)
+        user_emb = self.UI_emb(in_uids)  # (B, D)
+        iu_emb = self.IU_emb(out_iids)  # (B+N, D)
+        il_emb = self.IL_emb(out_iids)  # (B+N, D)
+        bias = self.item_biases(out_iids)  # (B+N, 1)
+
+        mf = torch.einsum("be,ne->bn", user_emb, iu_emb)
+        fmc = torch.einsum("ne,be->bn", il_emb, last_item_emb)
+        return mf + fmc + bias.T
+
+    @torch.no_grad()
+    def predict(self, user_idx, last_iid, item_indices=None):
+        if item_indices is None:
+            item_indices = torch.arange(self.item_num, device=self.device)
+        else:
+            item_indices = torch.as_tensor(
+                item_indices, dtype=torch.long, device=self.device
+            )
+        scores = self.forward(user_idx, last_iid, item_indices).squeeze()
+        return scores.detach().cpu().numpy()
diff --git a/cornac/models/fpmc/recom_fpmc.py b/cornac/models/fpmc/recom_fpmc.py
new file mode 100644
index 000000000..a40a3682c
--- /dev/null
+++ b/cornac/models/fpmc/recom_fpmc.py
@@ -0,0 +1,212 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+from tqdm.auto import trange
+
+from ...utils import get_rng
+from ..recommender import SequentialRecommender
+from ..seq_utils import session_seq_iter, user_seq_iter
+
+SUPPORTED_LOSSES = (
+    "bpr",
+    "bce",
+    "ce",
+    "bpr-max",
+    "softmax",
+    "cross-entropy",
+    "xe_softmax",
+    "top1",
+)
+
+
+class FPMC(SequentialRecommender):
+    """Factorizing Personalized Markov Chains for next-basket recommendation.
+
+    Operates on (user, last_item) → next_item triples. The training data
+    iterator is the same one used by transformer/seq models, but with
+    ``max_len=1`` so each example contributes a single (last_item, target)
+    pair. In ``session-based`` mode the last-item only comes from the
+    current session; in ``session-aware`` mode it may come from any prior
+    session of the user (i.e. the prior interaction in user-chronological
+    order regardless of session boundaries).
+
+    Parameters
+    ----------
+    name: string, default: 'FPMC'
+
+    embedding_dim: int, optional, default: 100
+        Latent factor dimension.
+
+    mode: str, optional, default: 'session-aware'
+
+    loss: str, optional, default: 'bpr'
+
+    batch_size, learning_rate, n_sample, sample_alpha, n_epochs:
+        Standard training hyperparameters.
+
+    bpreg, elu_param: only used when ``loss="bpr-max"``.
+
+    momentum: float, optional, default: 0.0
+        Momentum for the IndexedAdagradM optimizer.
+
+    References
+    ----------
+    Rendle, S., Freudenthaler, C., & Schmidt-Thieme, L. (2010).
+    Factorizing personalized Markov chains for next-basket recommendation. WWW.
+    """
+
+    def __init__(
+        self,
+        name="FPMC",
+        embedding_dim=100,
+        mode="session-aware",
+        loss="bpr",
+        batch_size=512,
+        learning_rate=0.05,
+        momentum=0.0,
+        n_sample=2048,
+        sample_alpha=0.5,
+        n_epochs=10,
+        bpreg=1.0,
+        elu_param=0.5,
+        device="cpu",
+        trainable=True,
+        verbose=False,
+        seed=None,
+    ):
+        super().__init__(name, mode=mode, trainable=trainable, verbose=verbose)
+        if loss not in SUPPORTED_LOSSES:
+            raise ValueError(
+                f"loss='{loss}' not supported; choose from {SUPPORTED_LOSSES}"
+            )
+        self.embedding_dim = embedding_dim
+        self.loss = loss
+        self.batch_size = batch_size
+        self.learning_rate = learning_rate
+        self.momentum = momentum
+        self.n_sample = n_sample
+        self.sample_alpha = sample_alpha
+        self.n_epochs = n_epochs
+        self.bpreg = bpreg
+        self.elu_param = elu_param
+        self.device = device
+        self.seed = seed
+        self.rng = get_rng(seed)
+
+    def _build_data_iter(self, pad_index):
+        # FPMC only uses the immediately previous item, so set max_len = 1.
+        kwargs = dict(
+            train_set=self.train_set,
+            pad_index=pad_index,
+            batch_size=self.batch_size,
+            max_len=1,
+            n_sample=self.n_sample,
+            sample_alpha=self.sample_alpha,
+            rng=self.rng,
+            shuffle=True,
+        )
+        if self.mode == "session-aware":
+            return user_seq_iter(**kwargs)
+        return session_seq_iter(**kwargs)
+
+    def fit(self, train_set, val_set=None):
+        super().fit(train_set, val_set)
+        if not self.trainable:
+            return self
+
+        import torch
+
+        from .fpmc import FPMC_Model
+        from ..seq_utils.losses import get_loss_function
+        from ..seq_utils.optim import IndexedAdagradM
+
+        torch.manual_seed(self.seed if self.seed is not None else 0)
+
+        self.pad_idx = self.total_items
+        self.model = FPMC_Model(
+            user_num=self.total_users,
+            item_num=self.total_items,
+            factor_num=self.embedding_dim,
+            pad_idx=self.pad_idx,
+            device=self.device,
+        ).to(self.device)
+
+        loss_fn = get_loss_function(self.loss)
+        loss_kwargs = dict(
+            bpreg=self.bpreg, elu_param=self.elu_param, n_sample=self.n_sample
+        )
+
+        opt = IndexedAdagradM(
+            self.model.parameters(), lr=self.learning_rate, momentum=self.momentum
+        )
+
+        progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose)
+        for _ in progress_bar:
+            self.model.train()
+            total_loss = 0.0
+            cnt = 0
+            for inc, (in_uids, hist_iids, out_iids) in enumerate(
+                self._build_data_iter(self.pad_idx)
+            ):
+                if len(hist_iids) < 2:
+                    continue
+                in_uids_t = torch.tensor(
+                    in_uids, dtype=torch.long, device=self.device, requires_grad=False
+                )
+                # FPMC uses just the most recent item (hist_iids shape (B, 1))
+                last_iid_t = torch.tensor(
+                    hist_iids[:, -1],
+                    dtype=torch.long,
+                    device=self.device,
+                    requires_grad=False,
+                )
+                out_iids_t = torch.tensor(
+                    out_iids, dtype=torch.long, device=self.device, requires_grad=False
+                )
+
+                self.model.zero_grad()
+                item_scores = self.model(in_uids_t, last_iid_t, out_iids_t)
+
+                L = loss_fn(
+                    item_scores,
+                    out_iids=out_iids_t,
+                    batch_size=len(in_uids),
+                    **loss_kwargs,
+                )
+                L.backward()
+                opt.step()
+
+                total_loss += L.cpu().detach().numpy() * len(in_uids)
+                cnt += len(in_uids)
+                if inc % 10 == 0 and cnt > 0:
+                    progress_bar.set_postfix(loss=(total_loss / cnt))
+        return self
+
+    def score(self, user_idx, history_items, **kwargs):
+        import torch
+
+        flat = self._flatten_history(history_items)
+        if len(flat) == 0:
+            return np.ones(self.total_items, dtype="float")
+        last = int(flat[-1])
+        # Cap user index to known users (cold-start fallback to padding row)
+        u_idx = user_idx if 0 <= user_idx < self.total_users else self.total_users
+        self.model.eval()
+        with torch.no_grad():
+            u_t = torch.tensor([u_idx], dtype=torch.long, device=self.device)
+            i_t = torch.tensor([last], dtype=torch.long, device=self.device)
+            cdds = torch.arange(self.total_items, dtype=torch.long, device=self.device)
+            return self.model.predict(u_t, i_t, cdds)
diff --git a/cornac/models/gpt2rec/__init__.py b/cornac/models/gpt2rec/__init__.py
new file mode 100644
index 000000000..ed6d4676c
--- /dev/null
+++ b/cornac/models/gpt2rec/__init__.py
@@ -0,0 +1,16 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+from .recom_gpt2rec import GPT2Rec
diff --git a/cornac/models/gpt2rec/gpt2rec.py b/cornac/models/gpt2rec/gpt2rec.py
new file mode 100644
index 000000000..93d4e56e4
--- /dev/null
+++ b/cornac/models/gpt2rec/gpt2rec.py
@@ -0,0 +1,112 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import torch
+import torch.nn as nn
+
+
+class GPT2RecModel(nn.Module):
+    """GPT2-based causal transformer for next-item recommendation.
+
+    Same input/output contract as :class:`SASRecModel` and
+    :class:`BERT4RecModel`. Returns a ``(B, B+N)`` score matrix.
+    """
+
+    def __init__(
+        self,
+        item_num,
+        embedding_dim=100,
+        maxlen=20,
+        n_layers=2,
+        n_heads=1,
+        dropout=0.1,
+        pad_idx=-1,
+        tie_weights=False,
+        init_std=0.02,
+        device="cpu",
+    ):
+        super().__init__()
+        from transformers.models.gpt2 import GPT2Config, GPT2Model
+
+        self.item_num = item_num
+        self.pad_idx = pad_idx if pad_idx >= 0 else item_num
+        self.maxlen = maxlen
+        self.dev = device
+        self.init_std = init_std
+        self.tie_weights = tie_weights
+
+        config = GPT2Config(
+            vocab_size=item_num + 1,
+            n_positions=maxlen + 1,
+            n_embd=embedding_dim,
+            n_layer=n_layers,
+            n_head=n_heads,
+            n_inner=embedding_dim * 4,
+            activation_function="gelu_new",
+            resid_pdrop=dropout,
+            embd_pdrop=dropout,
+            attn_pdrop=dropout,
+            initializer_range=init_std,
+            pad_token_id=self.pad_idx,
+            layer_norm_epsilon=1e-12,
+            use_cache=False,
+        )
+
+        self.item_emb = nn.Embedding(
+            item_num + 1, embedding_dim, padding_idx=self.pad_idx
+        )
+        self.transformer_model = GPT2Model(config)
+        self.item_biases = nn.Embedding(item_num + 1, 1, padding_idx=self.pad_idx)
+
+        self._init_weights()
+        self.to(device)
+
+    def _init_weights(self):
+        self.item_emb.weight.data.normal_(mean=0.0, std=self.init_std)
+        self.item_emb.weight.data[self.pad_idx].zero_()
+        self.item_biases.weight.data.zero_()
+
+    def _encode(self, hist_iids):
+        attention_mask = (hist_iids != self.pad_idx).long()
+        embeds = self.item_emb(hist_iids)
+        out = self.transformer_model(
+            inputs_embeds=embeds, attention_mask=attention_mask
+        )
+        return out.last_hidden_state[:, -1, :]
+
+    def forward(self, user_ids, hist_iids, out_iids, return_hidden=False):
+        hidden = self._encode(hist_iids)
+        item_e = self.item_emb(out_iids)
+        bias = self.item_biases(out_iids)
+        if return_hidden:
+            return hidden, item_e, bias
+        scores = torch.mm(hidden, item_e.T) + bias.T
+        return scores
+
+    @torch.no_grad()
+    def predict(self, user_ids, log_seqs, item_indices=None):
+        if item_indices is None:
+            item_indices = torch.arange(self.item_num, device=self.dev)
+        else:
+            item_indices = torch.as_tensor(
+                item_indices, dtype=torch.long, device=self.dev
+            )
+        if not isinstance(log_seqs, torch.Tensor):
+            log_seqs = torch.as_tensor(log_seqs, dtype=torch.long, device=self.dev)
+        hidden = self._encode(log_seqs)
+        item_e = self.item_emb(item_indices)
+        bias = self.item_biases(item_indices)
+        scores = torch.mm(hidden, item_e.T) + bias.T
+        return scores.squeeze().detach().cpu().numpy()
diff --git a/cornac/models/gpt2rec/recom_gpt2rec.py b/cornac/models/gpt2rec/recom_gpt2rec.py
new file mode 100644
index 000000000..0ed2f754a
--- /dev/null
+++ b/cornac/models/gpt2rec/recom_gpt2rec.py
@@ -0,0 +1,186 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+from tqdm.auto import trange
+
+from ...utils import get_rng
+from ..recommender import SequentialRecommender
+from ..seq_utils import session_seq_iter, user_seq_iter
+
+SUPPORTED_LOSSES = (
+    "bce",
+    "ce",
+    "bpr",
+    "bpr-max",
+    "softmax",
+    "cross-entropy",
+    "xe_softmax",
+    "top1",
+)
+
+
+class GPT2Rec(SequentialRecommender):
+    """GPT2-based causal transformer for sequential recommendation.
+
+    Parameters mirror :class:`cornac.models.SASRec`.
+    """
+
+    def __init__(
+        self,
+        name="GPT2Rec",
+        embedding_dim=100,
+        mode="session-aware",
+        loss="ce",
+        batch_size=512,
+        learning_rate=0.001,
+        n_sample=2048,
+        sample_alpha=0.5,
+        n_epochs=10,
+        max_len=50,
+        num_blocks=2,
+        num_heads=1,
+        dropout=0.2,
+        l2_reg=0.0,
+        bpreg=1.0,
+        elu_param=0.5,
+        device="cpu",
+        trainable=True,
+        verbose=False,
+        seed=None,
+    ):
+        super().__init__(name, mode=mode, trainable=trainable, verbose=verbose)
+        if loss not in SUPPORTED_LOSSES:
+            raise ValueError(
+                f"loss='{loss}' not supported; choose from {SUPPORTED_LOSSES}"
+            )
+        self.embedding_dim = embedding_dim
+        self.loss = loss
+        self.batch_size = batch_size
+        self.learning_rate = learning_rate
+        self.n_sample = n_sample
+        self.sample_alpha = sample_alpha
+        self.n_epochs = n_epochs
+        self.max_len = max_len
+        self.num_blocks = num_blocks
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.l2_reg = l2_reg
+        self.bpreg = bpreg
+        self.elu_param = elu_param
+        self.device = device
+        self.seed = seed
+        self.rng = get_rng(seed)
+
+    def _build_data_iter(self, pad_index):
+        kwargs = dict(
+            train_set=self.train_set,
+            pad_index=pad_index,
+            batch_size=self.batch_size,
+            max_len=self.max_len,
+            n_sample=self.n_sample,
+            sample_alpha=self.sample_alpha,
+            rng=self.rng,
+            shuffle=True,
+        )
+        if self.mode == "session-aware":
+            return user_seq_iter(**kwargs)
+        return session_seq_iter(**kwargs)
+
+    def fit(self, train_set, val_set=None):
+        super().fit(train_set, val_set)
+        if not self.trainable:
+            return self
+
+        import torch
+
+        from .gpt2rec import GPT2RecModel
+        from ..seq_utils.losses import get_loss_function
+
+        torch.manual_seed(self.seed if self.seed is not None else 0)
+
+        self.pad_idx = self.total_items
+        self.model = GPT2RecModel(
+            item_num=self.total_items,
+            embedding_dim=self.embedding_dim,
+            maxlen=self.max_len,
+            n_layers=self.num_blocks,
+            n_heads=self.num_heads,
+            dropout=self.dropout,
+            pad_idx=self.pad_idx,
+            device=self.device,
+        )
+
+        loss_fn = get_loss_function(self.loss)
+        loss_kwargs = dict(
+            bpreg=self.bpreg, elu_param=self.elu_param, n_sample=self.n_sample
+        )
+
+        opt = torch.optim.Adam(
+            self.model.parameters(), lr=self.learning_rate, betas=(0.9, 0.98)
+        )
+
+        progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose)
+        for _ in progress_bar:
+            self.model.train()
+            total_loss = 0.0
+            cnt = 0
+            for inc, (in_uids, hist_iids, out_iids) in enumerate(
+                self._build_data_iter(self.pad_idx)
+            ):
+                if len(hist_iids) < 2:
+                    continue
+                hist_iids_t = torch.tensor(
+                    hist_iids, dtype=torch.long, device=self.device, requires_grad=False
+                )
+                out_iids_t = torch.tensor(
+                    out_iids, dtype=torch.long, device=self.device, requires_grad=False
+                )
+
+                self.model.zero_grad()
+                item_scores = self.model(
+                    None, hist_iids_t, out_iids_t, return_hidden=False
+                )
+
+                L = loss_fn(
+                    item_scores,
+                    out_iids=out_iids_t,
+                    batch_size=len(hist_iids),
+                    **loss_kwargs,
+                )
+                if self.l2_reg > 0:
+                    for p in self.model.parameters():
+                        L = L + self.l2_reg * torch.norm(p)
+
+                L.backward()
+                opt.step()
+
+                total_loss += L.cpu().detach().numpy() * len(hist_iids)
+                cnt += len(hist_iids)
+                if inc % 10 == 0 and cnt > 0:
+                    progress_bar.set_postfix(loss=(total_loss / cnt))
+        return self
+
+    def score(self, user_idx, history_items, **kwargs):
+        import torch
+
+        flat = self._flatten_history(history_items)
+        if len(flat) == 0:
+            return np.ones(self.total_items, dtype="float")
+        log_seq = [self.pad_idx] * (self.max_len - len(flat)) + list(flat)
+        log_seq = log_seq[-self.max_len :]
+        log_seq_t = torch.tensor([log_seq], dtype=torch.long, device=self.device)
+        self.model.eval()
+        return self.model.predict(user_idx, log_seq_t)
diff --git a/cornac/models/gru4rec/gru4rec.py b/cornac/models/gru4rec/gru4rec.py
index 2ea880b58..476c281e2 100644
--- a/cornac/models/gru4rec/gru4rec.py
+++ b/cornac/models/gru4rec/gru4rec.py
@@ -1,12 +1,25 @@
-from collections import Counter
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
 
 import numpy as np
 import torch
 from torch import nn
 from torch.autograd import Variable
-from torch.optim import Optimizer
 
-from cornac.utils.common import get_rng
+from ..seq_utils.iterators import io_iter  # noqa: F401
+from ..seq_utils.optim import IndexedAdagradM  # noqa: F401
 
 
 def init_parameter_matrix(
@@ -18,78 +31,6 @@ def init_parameter_matrix(
     return nn.init._no_grad_uniform_(tensor, -sigma, sigma)
 
 
-class IndexedAdagradM(Optimizer):
-    def __init__(self, params, lr=0.05, momentum=0.0, eps=1e-6):
-        if lr <= 0.0:
-            raise ValueError("Invalid learning rate: {}".format(lr))
-        if momentum < 0.0:
-            raise ValueError("Invalid momentum value: {}".format(momentum))
-        if eps <= 0.0:
-            raise ValueError("Invalid epsilon value: {}".format(eps))
-
-        defaults = dict(lr=lr, momentum=momentum, eps=eps)
-        super(IndexedAdagradM, self).__init__(params, defaults)
-
-        for group in self.param_groups:
-            for p in group["params"]:
-                state = self.state[p]
-                state["acc"] = torch.full_like(
-                    p, 0, memory_format=torch.preserve_format
-                )
-                if momentum > 0:
-                    state["mom"] = torch.full_like(
-                        p, 0, memory_format=torch.preserve_format
-                    )
-
-    def share_memory(self):
-        for group in self.param_groups:
-            for p in group["params"]:
-                state = self.state[p]
-                state["acc"].share_memory_()
-                if group["momentum"] > 0:
-                    state["mom"].share_memory_()
-
-    @torch.no_grad()
-    def step(self, closure=None):
-        loss = None
-        if closure is not None:
-            with torch.enable_grad():
-                loss = closure()
-        for group in self.param_groups:
-            for p in group["params"]:
-                if p.grad is None:
-                    continue
-                grad = p.grad
-                state = self.state[p]
-                clr = group["lr"]
-                momentum = group["momentum"]
-                if grad.is_sparse:
-                    grad = grad.coalesce()
-                    grad_indices = grad._indices()[0]
-                    grad_values = grad._values()
-                    accs = state["acc"][grad_indices] + grad_values.pow(2)
-                    state["acc"].index_copy_(0, grad_indices, accs)
-                    accs.add_(group["eps"]).sqrt_().mul_(-1 / clr)
-                    if momentum > 0:
-                        moma = state["mom"][grad_indices]
-                        moma.mul_(momentum).add_(grad_values / accs)
-                        state["mom"].index_copy_(0, grad_indices, moma)
-                        p.index_add_(0, grad_indices, moma)
-                    else:
-                        p.index_add_(0, grad_indices, grad_values / accs)
-                else:
-                    state["acc"].add_(grad.pow(2))
-                    accs = state["acc"].add(group["eps"])
-                    accs.sqrt_()
-                    if momentum > 0:
-                        mom = state["mom"]
-                        mom.mul_(momentum).addcdiv_(grad, accs, value=-clr)
-                        p.add_(mom)
-                    else:
-                        p.addcdiv_(grad, accs, value=-clr)
-        return loss
-
-
 class GRUEmbedding(nn.Module):
     def __init__(self, dim_in, dim_out):
         super(GRUEmbedding, self).__init__()
@@ -118,6 +59,21 @@ def forward(self, X, H):
 
 
 class GRU4RecModel(nn.Module):
+    """GRU4Rec PyTorch architecture.
+
+    The model computes a hidden state from the latest item id (and previous
+    hidden state) and scores all candidate items via either:
+
+    - ``constrained_embedding=True``: tied input/output item embeddings.
+    - ``embedding > 0``: separate input embedding of given size.
+    - otherwise: a custom :class:`GRUEmbedding` directly producing the hidden.
+
+    The model returns a ``(B, B+N)`` score matrix where columns 0..B-1 are
+    the in-batch positives and columns B..B+N-1 are shared sampled negatives.
+    Padding row ``n_items`` is included so it is safe to pass ``n_items`` as
+    a fake input id for cold-start fallback at score-time.
+    """
+
     def __init__(
         self,
         n_items,
@@ -146,12 +102,14 @@ def __init__(
         self.elu_param = elu_param
         self.bpreg = bpreg
         self.loss = loss
-        self.set_loss_function(self.loss)
         self.start = 0
         if constrained_embedding:
             n_input = layers[-1]
         elif embedding:
-            self.E = nn.Embedding(n_items, embedding, sparse=True)
+            # +1 for an extra padding row to support cold-start at inference
+            self.E = nn.Embedding(
+                n_items + 1, embedding, sparse=True, padding_idx=n_items
+            )
             n_input = embedding
         else:
             self.GE = GRUEmbedding(n_items, layers[0])
@@ -165,8 +123,11 @@ def __init__(
             self.D.append(nn.Dropout(dropout_p_hidden))
         self.G = nn.ModuleList(self.G)
         self.D = nn.ModuleList(self.D)
-        self.Wy = nn.Embedding(n_items, layers[-1], sparse=True)
-        self.By = nn.Embedding(n_items, 1, sparse=True)
+        # +1 row for padding (index = n_items) so we can pad cold-start inputs
+        self.Wy = nn.Embedding(
+            n_items + 1, layers[-1], sparse=True, padding_idx=n_items
+        )
+        self.By = nn.Embedding(n_items + 1, 1, sparse=True, padding_idx=n_items)
         self.reset_parameters()
 
     @torch.no_grad()
@@ -182,73 +143,30 @@ def reset_parameters(self):
             nn.init.zeros_(self.G[i].bias_hh)
         init_parameter_matrix(self.Wy.weight)
         nn.init.zeros_(self.By.weight)
-
-    def set_loss_function(self, loss):
-        if loss == "cross-entropy":
-            self.loss_function = self.xe_loss_with_softmax
-        elif loss == "bpr-max":
-            self.loss_function = self.bpr_max_loss_with_elu
-        elif loss == "top1":
-            self.loss_function = self.top1
-        else:
-            raise NotImplementedError
-
-    def xe_loss_with_softmax(self, O, Y, M):
-        if self.logq > 0:
-            O = O - self.logq * torch.log(
-                torch.cat([self.P0[Y[:M]], self.P0[Y[M:]] ** self.sample_alpha])
-            )
-        X = torch.exp(O - O.max(dim=1, keepdim=True)[0])
-        X = X / X.sum(dim=1, keepdim=True)
-        return -torch.sum(torch.log(torch.diag(X) + 1e-24))
-
-    def softmax_neg(self, X):
-        hm = 1.0 - torch.eye(*X.shape, out=torch.empty_like(X))
-        X = X * hm
-        e_x = torch.exp(X - X.max(dim=1, keepdim=True)[0]) * hm
-        return e_x / e_x.sum(dim=1, keepdim=True)
-
-    def bpr_max_loss_with_elu(self, O, Y, M):
-        if self.elu_param > 0:
-            O = nn.functional.elu(O, self.elu_param)
-        softmax_scores = self.softmax_neg(O)
-        target_scores = torch.diag(O)
-        target_scores = target_scores.reshape(target_scores.shape[0], -1)
-        return torch.sum(
-            (
-                -torch.log(
-                    torch.sum(torch.sigmoid(target_scores - O) * softmax_scores, dim=1)
-                    + 1e-24
-                )
-                + self.bpreg * torch.sum((O**2) * softmax_scores, dim=1)
-            )
-        )
-
-    def top1(self, O, Y, M):
-        target_scores = torch.diag(O)
-        target_scores = target_scores.reshape(target_scores.shape[0], -1)
-        return torch.sum(
-            (
-                torch.mean(
-                    torch.sigmoid(O - target_scores) + torch.sigmoid(O**2), axis=1
-                )
-                - torch.sigmoid(target_scores**2) / (M + self.n_sample)
-            )
-        )
+        if self.Wy.padding_idx is not None:
+            self.Wy.weight.data[self.Wy.padding_idx].zero_()
+        if self.By.padding_idx is not None:
+            self.By.weight.data[self.By.padding_idx].zero_()
 
     def _init_numpy_weights(self, shape):
-        sigma = np.sqrt(6.0 / (shape[0] + shape[1]))
-        m = np.random.rand(*shape).astype("float32") * 2 * sigma - sigma
+        sigma = float(np.sqrt(6.0 / (shape[0] + shape[1])))
+        m = (np.random.rand(*shape) * 2 * sigma - sigma).astype("float32")
         return m
 
     @torch.no_grad()
     def _reset_weights_to_compatibility_mode(self):
+        """Reset weights using numpy RNG with seed 42 for reproducibility.
+
+        Note: when ``constrained_embedding=False`` and ``embedding > 0`` the
+        ``E`` embedding includes a padding row (``n_items``), and ``Wy``/``By``
+        also include padding rows. We only set the first ``n_items`` rows.
+        """
         np.random.seed(42)
         if self.constrained_embedding:
             n_input = self.layers[-1]
         elif self.embedding:
             n_input = self.embedding
-            self.E.weight.set_(
+            self.E.weight.data[: self.n_items].copy_(
                 torch.tensor(
                     self._init_numpy_weights((self.n_items, n_input)),
                     device=self.E.weight.device,
@@ -256,20 +174,29 @@ def _reset_weights_to_compatibility_mode(self):
             )
         else:
             n_input = self.n_items
-            m = []
-            m.append(self._init_numpy_weights((n_input, self.layers[0])))
-            m.append(self._init_numpy_weights((n_input, self.layers[0])))
-            m.append(self._init_numpy_weights((n_input, self.layers[0])))
+            m = [
+                self._init_numpy_weights((n_input, self.layers[0])),
+                self._init_numpy_weights((n_input, self.layers[0])),
+                self._init_numpy_weights((n_input, self.layers[0])),
+            ]
             self.GE.Wx0.weight.set_(
-                torch.tensor(np.hstack(m), device=self.GE.Wx0.weight.device)
+                torch.tensor(
+                    np.hstack(m), dtype=torch.float32, device=self.GE.Wx0.weight.device
+                )
+            )
+            m2 = [
+                self._init_numpy_weights((self.layers[0], self.layers[0])),
+                self._init_numpy_weights((self.layers[0], self.layers[0])),
+            ]
+            self.GE.Wrz0.set_(
+                torch.tensor(
+                    np.hstack(m2), dtype=torch.float32, device=self.GE.Wrz0.device
+                )
             )
-            m2 = []
-            m2.append(self._init_numpy_weights((self.layers[0], self.layers[0])))
-            m2.append(self._init_numpy_weights((self.layers[0], self.layers[0])))
-            self.GE.Wrz0.set_(torch.tensor(np.hstack(m2), device=self.GE.Wrz0.device))
             self.GE.Wh0.set_(
                 torch.tensor(
                     self._init_numpy_weights((self.layers[0], self.layers[0])),
+                    dtype=torch.float32,
                     device=self.GE.Wh0.device,
                 )
             )
@@ -277,19 +204,27 @@ def _reset_weights_to_compatibility_mode(self):
                 torch.zeros((self.layers[0] * 3,), device=self.GE.Bh0.device)
             )
         for i in range(self.start, len(self.layers)):
-            m = []
-            m.append(self._init_numpy_weights((n_input, self.layers[i])))
-            m.append(self._init_numpy_weights((n_input, self.layers[i])))
-            m.append(self._init_numpy_weights((n_input, self.layers[i])))
+            m = [
+                self._init_numpy_weights((n_input, self.layers[i])),
+                self._init_numpy_weights((n_input, self.layers[i])),
+                self._init_numpy_weights((n_input, self.layers[i])),
+            ]
             self.G[i].weight_ih.set_(
-                torch.tensor(np.vstack(m), device=self.G[i].weight_ih.device)
+                torch.tensor(
+                    np.vstack(m), dtype=torch.float32, device=self.G[i].weight_ih.device
+                )
             )
-            m2 = []
-            m2.append(self._init_numpy_weights((self.layers[i], self.layers[i])))
-            m2.append(self._init_numpy_weights((self.layers[i], self.layers[i])))
-            m2.append(self._init_numpy_weights((self.layers[i], self.layers[i])))
+            m2 = [
+                self._init_numpy_weights((self.layers[i], self.layers[i])),
+                self._init_numpy_weights((self.layers[i], self.layers[i])),
+                self._init_numpy_weights((self.layers[i], self.layers[i])),
+            ]
             self.G[i].weight_hh.set_(
-                torch.tensor(np.vstack(m2), device=self.G[i].weight_hh.device)
+                torch.tensor(
+                    np.vstack(m2),
+                    dtype=torch.float32,
+                    device=self.G[i].weight_hh.device,
+                )
             )
             self.G[i].bias_hh.set_(
                 torch.zeros((self.layers[i] * 3,), device=self.G[i].bias_hh.device)
@@ -297,15 +232,20 @@ def _reset_weights_to_compatibility_mode(self):
             self.G[i].bias_ih.set_(
                 torch.zeros((self.layers[i] * 3,), device=self.G[i].bias_ih.device)
             )
-        self.Wy.weight.set_(
+        self.Wy.weight.data[: self.n_items].copy_(
             torch.tensor(
                 self._init_numpy_weights((self.n_items, self.layers[-1])),
+                dtype=torch.float32,
                 device=self.Wy.weight.device,
             )
         )
-        self.By.weight.set_(
+        self.By.weight.data[: self.n_items].copy_(
             torch.zeros((self.n_items, 1), device=self.By.weight.device)
         )
+        if self.Wy.padding_idx is not None:
+            self.Wy.weight.data[self.Wy.padding_idx].zero_()
+        if self.By.padding_idx is not None:
+            self.By.weight.data[self.By.padding_idx].zero_()
 
     def embed_constrained(self, X, Y=None):
         if Y is not None:
@@ -359,8 +299,8 @@ def hidden_step(self, X, H, training=False):
         return X
 
     def score_items(self, X, O, B):
-        O = torch.mm(X, O.T) + B.T
-        return O
+        out = torch.mm(X, O.T) + B.T
+        return out
 
     def forward(self, X, H, Y, training=False):
         E, O, B = self.embed(X, H, Y)
@@ -373,118 +313,13 @@ def forward(self, X, H, Y, training=False):
         return R
 
 
-def io_iter(
-    s_iter, uir_tuple, n_sample=0, sample_alpha=0, rng=None, batch_size=1, shuffle=False
-):
-    """Paralellize mini-batch of input-output items. Create an iterator over data yielding batch of input item indices, batch of output item indices,
-    batch of start masking, batch of end masking, and batch of valid ids (relative positions of current sequences in the last batch).
-
-    Parameters
-    ----------
-    batch_size: int, optional, default = 1
-
-    shuffle: bool, optional, default: False
-        If `True`, orders of triplets will be randomized. If `False`, default orders kept.
-
-    Returns
-    -------
-    iterator : batch of input item indices, batch of output item indices, batch of starting sequence mask, batch of ending sequence mask, batch of valid ids
+def score(model, layers, device, history_items):
+    """Score all items given a flat ``history_items`` list of integers.
 
+    Returns a numpy array of length ``n_items`` (or ``n_items + 1`` if the
+    output embedding includes a padding row; in that case the caller is
+    responsible for trimming).
     """
-    rng = rng if rng is not None else get_rng(None)
-    start_mask = np.zeros(batch_size, dtype="int")
-    end_mask = np.ones(batch_size, dtype="int")
-    input_iids = None
-    output_iids = None
-    l_pool = []
-    c_pool = [None for _ in range(batch_size)]
-    sizes = np.zeros(batch_size, dtype="int")
-    if n_sample > 0:
-        item_count = Counter(uir_tuple[1])
-        item_indices = np.array(
-            [iid for iid, _ in item_count.most_common()], dtype="int"
-        )
-        item_dist = (
-            np.array([cnt for _, cnt in item_count.most_common()], dtype="float")
-            ** sample_alpha
-        )
-        item_dist = item_dist / item_dist.sum()
-    for _, batch_mapped_ids in s_iter(batch_size, shuffle):
-        l_pool += batch_mapped_ids
-        while len(l_pool) > 0:
-            if end_mask.sum() == 0:
-                input_iids = uir_tuple[1][
-                    [mapped_ids[-sizes[idx]] for idx, mapped_ids in enumerate(c_pool)]
-                ]
-                output_iids = uir_tuple[1][
-                    [
-                        mapped_ids[-sizes[idx] + 1]
-                        for idx, mapped_ids in enumerate(c_pool)
-                    ]
-                ]
-                sizes -= 1
-                for idx, size in enumerate(sizes):
-                    if size == 1:
-                        end_mask[idx] = 1
-                if n_sample > 0:
-                    negative_samples = rng.choice(
-                        item_indices, size=n_sample, replace=True, p=item_dist
-                    )
-                    output_iids = np.concatenate([output_iids, negative_samples])
-                yield input_iids, output_iids, start_mask, np.arange(
-                    batch_size, dtype="int"
-                )
-                start_mask.fill(0)  # reset start masking
-            while end_mask.sum() > 0 and len(l_pool) > 0:
-                next_seq = l_pool.pop()
-                if len(next_seq) > 1:
-                    idx = np.nonzero(end_mask)[0][0]
-                    end_mask[idx] = 0
-                    start_mask[idx] = 1
-                    c_pool[idx] = next_seq
-                    sizes[idx] = len(c_pool[idx])
-
-    valid_id = np.ones(batch_size, dtype="int")
-    while True:
-        for idx, size in enumerate(sizes):
-            if size == 1:
-                end_mask[idx] = 1
-                valid_id[idx] = 0
-        input_iids = uir_tuple[1][
-            [
-                mapped_ids[-sizes[idx]]
-                for idx, mapped_ids in enumerate(c_pool)
-                if sizes[idx] > 1
-            ]
-        ]
-        output_iids = uir_tuple[1][
-            [
-                mapped_ids[-sizes[idx] + 1]
-                for idx, mapped_ids in enumerate(c_pool)
-                if sizes[idx] > 1
-            ]
-        ]
-        sizes -= 1
-        for idx, size in enumerate(sizes):
-            if size == 1:
-                end_mask[idx] = 1
-        start_mask = start_mask[np.nonzero(valid_id)[0]]
-        end_mask = end_mask[np.nonzero(valid_id)[0]]
-        sizes = sizes[np.nonzero(valid_id)[0]]
-        c_pool = [_ for _, valid in zip(c_pool, valid_id) if valid > 0]
-        if n_sample > 0:
-            negative_samples = rng.choice(
-                item_indices, size=n_sample, replace=True, p=item_dist
-            )
-            output_iids = np.concatenate([output_iids, negative_samples])
-        yield input_iids, output_iids, start_mask, np.nonzero(valid_id)[0]
-        valid_id = np.ones(len(input_iids), dtype="int")
-        if end_mask.sum() == len(input_iids):
-            break
-        start_mask.fill(0)  # reset start masking
-
-
-def score(model, layers, device, history_items):
     model.eval()
     H = []
     for i in range(len(layers)):
@@ -493,6 +328,7 @@ def score(model, layers, device, history_items):
                 (1, layers[i]), dtype=torch.float32, requires_grad=False, device=device
             )
         )
+    O = None
     for iid in history_items:
         O = model.forward(
             torch.tensor([iid], requires_grad=False, device=device),
@@ -500,4 +336,6 @@ def score(model, layers, device, history_items):
             None,
             training=False,
         )
+    if O is None:
+        return None
     return O.squeeze().cpu().detach().numpy()
diff --git a/cornac/models/gru4rec/recom_gru4rec.py b/cornac/models/gru4rec/recom_gru4rec.py
index 7e161d924..a753b7891 100644
--- a/cornac/models/gru4rec/recom_gru4rec.py
+++ b/cornac/models/gru4rec/recom_gru4rec.py
@@ -1,4 +1,4 @@
-# Copyright 2023 The Cornac Authors. All Rights Reserved.
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,17 +13,41 @@
 # limitations under the License.
 # ============================================================================
 
-import numpy as np
-from tqdm.auto import trange
 from collections import Counter
 
-from cornac.models.recommender import NextItemRecommender
+import numpy as np
+from tqdm.auto import trange
 
 from ...utils import get_rng
-
-
-class GRU4Rec(NextItemRecommender):
-    """Session-based Recommendations with Recurrent Neural Networks
+from ..recommender import SequentialRecommender
+from ..seq_utils import io_iter, user_io_iter
+
+SUPPORTED_LOSSES = (
+    "cross-entropy",
+    "xe_softmax",
+    "softmax",
+    "bpr",
+    "bpr-max",
+    "top1",
+    "bce",
+    "ce",
+)
+
+
+class GRU4Rec(SequentialRecommender):
+    """Session-based / Session-aware Recommendations with Recurrent Neural Networks.
+
+    The model is the same GRU-based architecture in either mode; only the
+    way training data is iterated differs:
+
+    - ``mode="session-based"``: items are processed per session and the
+      hidden state is reset at session boundaries. This matches the original
+      GRU4Rec (Hidasi et al., 2015).
+    - ``mode="session-aware"``: per-user chronological item sequences are
+      concatenated across sessions and processed as a single sequence. The
+      hidden state is reset at *user* boundaries instead. User IDs are not
+      used in scoring but the data layout allows the RNN to capture
+      cross-session dependencies.
 
     Parameters
     ----------
@@ -31,35 +55,44 @@ class GRU4Rec(NextItemRecommender):
         The name of the recommender model.
 
     layers: list of int, optional, default: [100]
-        The number of hidden units in each layer
+        The number of hidden units in each layer.
 
     loss: str, optional, default: 'cross-entropy'
-        Select the loss function.
+        Loss function. Supported: 'cross-entropy', 'bpr', 'bpr-max', 'top1',
+        'bce', 'ce'.
+
+    mode: str, optional, default: 'session-based'
+        Training data iteration strategy. One of 'session-based' or
+        'session-aware'.
 
     batch_size: int, optional, default: 512
-        Batch size
+        Batch size.
 
     dropout_p_embed: float, optional, default: 0.0
-        Dropout ratio for embedding layers
+        Dropout ratio for embedding layer.
 
     dropout_p_hidden: float, optional, default: 0.0
-        Dropout ratio for hidden layers
+        Dropout ratio for hidden layers.
 
     learning_rate: float, optional, default: 0.05
-        Learning rate for the optimizer
+        Learning rate for the optimizer.
 
     momentum: float, optional, default: 0.0
-        Momentum for adaptive learning rate
+        Momentum for the adaptive learning rate optimizer.
 
     sample_alpha: float, optional, default: 0.5
-        Tradeoff factor controls the contribution of negative sample towards final loss
+        Tradeoff factor controls the contribution of negative samples
+        towards the final loss (popularity-based sampling exponent).
 
     n_sample: int, optional, default: 2048
-        Number of negative samples
+        Number of additional shared negative samples per mini-batch.
 
     embedding: int, optional, default: 0
+        Size of the separate input embedding. ``0`` means no separate
+        embedding; use ``"layersize"`` to set it to ``layers[0]``.
 
     constrained_embedding: bool, optional, default: True
+        Whether input and output item embeddings are tied.
 
     n_epochs: int, optional, default: 10
 
@@ -67,18 +100,19 @@ class GRU4Rec(NextItemRecommender):
         Regularization coefficient for 'bpr-max' loss.
 
     elu_param: float, optional, default: 0.5
-        Elu param for 'bpr-max' loss
+        ELU parameter for 'bpr-max' loss.
 
-    logq: float, optional, default: 0,
-        LogQ correction  to offset the sampling bias affecting 'cross-entropy' loss.
+    logq: float, optional, default: 0.0
+        LogQ correction strength to offset sampling bias for the
+        cross-entropy loss.
 
     device: str, optional, default: 'cpu'
         Set to 'cuda' for GPU support.
 
-    trainable: boolean, optional, default: True
-        When False, the model will not be re-trained, and input of pre-trained parameters are required.
+    trainable: bool, optional, default: True
+        When False, the model will not be re-trained.
 
-    verbose: boolean, optional, default: True
+    verbose: bool, optional, default: False
         When True, running logs are displayed.
 
     seed: int, optional, default: None
@@ -89,7 +123,6 @@ class GRU4Rec(NextItemRecommender):
     Hidasi, B., Karatzoglou, A., Baltrunas, L., & Tikk, D. (2015).
     Session-based recommendations with recurrent neural networks.
     arXiv preprint arXiv:1511.06939.
-
     """
 
     def __init__(
@@ -97,6 +130,7 @@ def __init__(
         name="GRU4Rec",
         layers=[100],
         loss="cross-entropy",
+        mode="session-based",
         batch_size=512,
         dropout_p_embed=0.0,
         dropout_p_hidden=0.0,
@@ -115,7 +149,11 @@ def __init__(
         verbose=False,
         seed=None,
     ):
-        super().__init__(name, trainable=trainable, verbose=verbose)
+        super().__init__(name, mode=mode, trainable=trainable, verbose=verbose)
+        if loss not in SUPPORTED_LOSSES:
+            raise ValueError(
+                f"loss='{loss}' not supported; choose from {SUPPORTED_LOSSES}"
+            )
         self.layers = layers
         self.loss = loss
         self.batch_size = batch_size
@@ -135,22 +173,42 @@ def __init__(
         self.seed = seed
         self.rng = get_rng(seed)
 
+    def _build_loss_kwargs(self):
+        return dict(
+            P0=self.P0,
+            logq=self.logq,
+            sample_alpha=self.sample_alpha,
+            batch_size=None,  # filled per-batch
+            bpreg=self.bpreg,
+            elu_param=self.elu_param,
+            n_sample=self.n_sample,
+        )
+
     def fit(self, train_set, val_set=None):
         super().fit(train_set, val_set)
+        if not self.trainable:
+            return self
+
         import torch
 
-        from .gru4rec import GRU4RecModel, IndexedAdagradM, io_iter
+        from .gru4rec import GRU4RecModel
+        from ..seq_utils.losses import get_loss_function
+        from ..seq_utils.optim import IndexedAdagradM
 
         item_freq = Counter(self.train_set.uir_tuple[1])
-        P0 = torch.tensor(
-            [item_freq[iid] for (_, iid) in self.train_set.iid_map.items()],
-            dtype=torch.float32,
-            device=self.device,
-        ) if self.logq > 0 else None
+        self.P0 = (
+            torch.tensor(
+                [item_freq[iid] for (_, iid) in self.train_set.iid_map.items()],
+                dtype=torch.float32,
+                device=self.device,
+            )
+            if self.logq > 0
+            else None
+        )
 
         self.model = GRU4RecModel(
             n_items=self.total_items,
-            P0=P0,
+            P0=self.P0,
             layers=self.layers,
             dropout_p_embed=self.dropout_p_embed,
             dropout_p_hidden=self.dropout_p_hidden,
@@ -162,60 +220,86 @@ def fit(self, train_set, val_set=None):
             elu_param=self.elu_param,
             loss=self.loss,
         ).to(self.device)
-
         self.model._reset_weights_to_compatibility_mode()
 
+        loss_fn = get_loss_function(self.loss)
+        loss_kwargs = self._build_loss_kwargs()
+
         opt = IndexedAdagradM(
             self.model.parameters(), self.learning_rate, self.momentum
         )
 
         progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose)
         for _ in progress_bar:
-            H = []
-            for i in range(len(self.layers)):
-                H.append(
-                    torch.zeros(
-                        (self.batch_size, self.layers[i]),
-                        dtype=torch.float32,
-                        requires_grad=False,
-                        device=self.device,
-                    )
+            H = [
+                torch.zeros(
+                    (self.batch_size, self.layers[i]),
+                    dtype=torch.float32,
+                    requires_grad=False,
+                    device=self.device,
                 )
-            total_loss = 0
+                for i in range(len(self.layers))
+            ]
+            total_loss = 0.0
             cnt = 0
-            for inc, (in_iids, out_iids, start_mask, valid_id) in enumerate(
-                io_iter(
-                    s_iter=self.train_set.s_iter,
-                    uir_tuple=self.train_set.uir_tuple,
-                    n_sample=self.n_sample,
-                    sample_alpha=self.sample_alpha,
-                    rng=self.rng,
-                    batch_size=self.batch_size,
-                    shuffle=True,
-                )
-            ):
+            data_iter = self._build_data_iter()
+            for inc, batch in enumerate(data_iter):
+                # Unified 5-tuple from io_iter / user_io_iter:
+                #   (in_uids, in_iids, out_iids, start_mask, valid_id)
+                _, in_iids, out_iids, start_mask, valid_id = batch
                 for i in range(len(H)):
                     H[i][np.nonzero(start_mask)[0], :] = 0
                     H[i].detach_()
                     H[i] = H[i][valid_id]
-                in_iids = torch.tensor(in_iids, requires_grad=False, device=self.device)
-                out_iids = torch.tensor(
-                    out_iids, requires_grad=False, device=self.device
+                in_iids_t = torch.tensor(
+                    in_iids, dtype=torch.long, requires_grad=False, device=self.device
+                )
+                out_iids_t = torch.tensor(
+                    out_iids, dtype=torch.long, requires_grad=False, device=self.device
                 )
                 self.model.zero_grad()
-                R = self.model.forward(in_iids, H, out_iids, training=True)
-                L = self.model.loss_function(R, out_iids, len(in_iids)) / len(in_iids)
+                R = self.model.forward(in_iids_t, H, out_iids_t, training=True)
+                loss_kwargs["batch_size"] = len(in_iids)
+                loss_kwargs["out_iids"] = out_iids_t
+                L = loss_fn(R, **loss_kwargs)
                 L.backward()
                 opt.step()
                 total_loss += L.cpu().detach().numpy() * len(in_iids)
                 cnt += len(in_iids)
-                if inc % 10 == 0:
+                if inc % 10 == 0 and cnt > 0:
                     progress_bar.set_postfix(loss=(total_loss / cnt))
         return self
 
-    def score(self, user_idx, history_items, **kwargs):
-        from .gru4rec import score
+    def _build_data_iter(self):
+        if self.mode == "session-aware":
+            return user_io_iter(
+                train_set=self.train_set,
+                n_sample=self.n_sample,
+                sample_alpha=self.sample_alpha,
+                rng=self.rng,
+                batch_size=self.batch_size,
+                shuffle=True,
+            )
+        return io_iter(
+            s_iter=self.train_set.s_iter,
+            uir_tuple=self.train_set.uir_tuple,
+            n_sample=self.n_sample,
+            sample_alpha=self.sample_alpha,
+            rng=self.rng,
+            batch_size=self.batch_size,
+            shuffle=True,
+        )
 
-        if len(history_items) > 0:
-            return score(self.model, self.layers, self.device, history_items)
-        return np.ones(self.total_items, dtype="float")
+    def score(self, user_idx, history_items, **kwargs):
+        from .gru4rec import score as _score
+
+        flat_history = self._flatten_history(history_items)
+        if len(flat_history) == 0:
+            return np.ones(self.total_items, dtype="float")
+        scores = _score(self.model, self.layers, self.device, flat_history)
+        if scores is None:
+            return np.ones(self.total_items, dtype="float")
+        # The output embedding has +1 padding row; trim if present
+        if scores.shape[-1] == self.total_items + 1:
+            scores = scores[: self.total_items]
+        return scores
diff --git a/cornac/models/recommender.py b/cornac/models/recommender.py
index 5d8514e95..ab9dccefc 100644
--- a/cornac/models/recommender.py
+++ b/cornac/models/recommender.py
@@ -764,3 +764,75 @@ def score(self, user_idx, history_items, **kwargs):
 
         """
         raise NotImplementedError("The algorithm is not able to make score prediction!")
+
+
+SUPPORTED_MODES = ("session-based", "session-aware")
+
+
+class SequentialRecommender(NextItemRecommender):
+    """Generic class for a sequential next-item recommender model.
+
+    Sequential recommenders consume a user's interaction history (either a
+    flat list of item ids or a list of session item lists) and operate in one
+    of two modes:
+
+    - ``"session-based"``: only the most recent session is used as context;
+      the hidden state / history window resets at session boundaries.
+    - ``"session-aware"``: the user's chronological cross-session item
+      sequence is used as context.
+
+    Subclasses should set the desired default in their own ``__init__`` and
+    forward ``mode`` to ``super().__init__``.
+
+    Parameters
+    ----------
+    name: str, required
+        Name of the recommender model.
+
+    mode: str, optional, default: 'session-based'
+        One of ``'session-based'`` or ``'session-aware'``.
+
+    trainable: boolean, optional, default: True
+        When False, the model is not trainable.
+
+    verbose: boolean, optional, default: False
+        When True, running logs are displayed.
+    """
+
+    def __init__(self, name, mode="session-based", trainable=True, verbose=False):
+        if mode not in SUPPORTED_MODES:
+            raise ValueError(
+                f"mode='{mode}' not supported; choose from {SUPPORTED_MODES}"
+            )
+        super().__init__(name=name, trainable=trainable, verbose=verbose)
+        self.mode = mode
+
+    def _flatten_history(self, history_items):
+        """Coerce ``history_items`` into a flat list of item ids.
+
+        Accepts both:
+
+        - ``[i0, i1, i2, ...]`` (flat list, as produced by
+          :class:`cornac.eval_methods.NextItemEvaluation`).
+        - ``[[i0, i1], [i2, i3], ...]`` (list of session item lists, as
+          produced by the session-aware ``NextSessionEvaluation`` flow).
+
+        For ``mode == "session-based"`` the list of sessions is collapsed to
+        just the items of the *last non-empty* session. For
+        ``mode == "session-aware"`` all sessions are concatenated
+        chronologically.
+        """
+        if history_items is None or len(history_items) == 0:
+            return []
+        first = history_items[0]
+        if hasattr(first, "__iter__") and not isinstance(first, (str, bytes)):
+            if self.mode == "session-based":
+                for sess in reversed(history_items):
+                    if len(sess) > 0:
+                        return list(sess)
+                return []
+            flat = []
+            for sess in history_items:
+                flat.extend(list(sess))
+            return flat
+        return list(history_items)
diff --git a/cornac/models/sasrec/__init__.py b/cornac/models/sasrec/__init__.py
new file mode 100644
index 000000000..e163b9a7d
--- /dev/null
+++ b/cornac/models/sasrec/__init__.py
@@ -0,0 +1,16 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+from .recom_sasrec import SASRec
diff --git a/cornac/models/sasrec/recom_sasrec.py b/cornac/models/sasrec/recom_sasrec.py
new file mode 100644
index 000000000..b48a9f0c3
--- /dev/null
+++ b/cornac/models/sasrec/recom_sasrec.py
@@ -0,0 +1,246 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+from tqdm.auto import trange
+
+from ...utils import get_rng
+from ..recommender import SequentialRecommender
+from ..seq_utils import session_seq_iter, user_seq_iter
+
+SUPPORTED_LOSSES = (
+    "bce",
+    "ce",
+    "bpr",
+    "bpr-max",
+    "softmax",
+    "cross-entropy",
+    "xe_softmax",
+    "top1",
+)
+
+
+class SASRec(SequentialRecommender):
+    """SASRec: Self-Attentive Sequential Recommendation.
+
+    Parameters
+    ----------
+    name: string, default: 'SASRec'
+        The name of the recommender model.
+
+    embedding_dim: int, optional, default: 100
+        Item embedding dimension.
+
+    mode: str, optional, default: 'session-aware'
+        Training data iteration strategy. One of 'session-based' (each
+        training sequence comes from a single session) or 'session-aware'
+        (each training sequence comes from a user's chronological
+        cross-session item history).
+
+    loss: str, optional, default: 'ce'
+        Loss function. Supported: 'bce', 'ce', 'bpr', 'bpr-max', 'softmax'
+        (a.k.a 'cross-entropy' / 'xe_softmax'), 'top1'.
+
+    batch_size: int, optional, default: 512
+
+    learning_rate: float, optional, default: 0.001
+
+    n_sample: int, optional, default: 2048
+        Number of negative samples shared per mini-batch.
+
+    sample_alpha: float, optional, default: 0.5
+        Popularity-based negative sampling exponent.
+
+    n_epochs: int, optional, default: 10
+
+    max_len: int, optional, default: 50
+
+    num_blocks: int, optional, default: 2
+
+    num_heads: int, optional, default: 1
+
+    dropout: float, optional, default: 0.2
+
+    l2_reg: float, optional, default: 0.0
+
+    bpreg: float, optional, default: 1.0
+        Regularization coefficient for the 'bpr-max' loss.
+
+    elu_param: float, optional, default: 0.5
+        ELU parameter for the 'bpr-max' loss.
+
+    device: str, optional, default: 'cpu'
+
+    use_pos_emb: bool, optional, default: True
+
+    trainable, verbose, seed: standard recommender parameters.
+
+    References
+    ----------
+    Kang, W.-C., & McAuley, J. (2018). Self-attentive sequential
+    recommendation. ICDM.
+    """
+
+    def __init__(
+        self,
+        name="SASRec",
+        embedding_dim=100,
+        mode="session-aware",
+        loss="ce",
+        batch_size=512,
+        learning_rate=0.001,
+        n_sample=2048,
+        sample_alpha=0.5,
+        n_epochs=10,
+        max_len=50,
+        num_blocks=2,
+        num_heads=1,
+        dropout=0.2,
+        l2_reg=0.0,
+        bpreg=1.0,
+        elu_param=0.5,
+        device="cpu",
+        use_pos_emb=True,
+        trainable=True,
+        verbose=False,
+        seed=None,
+    ):
+        super().__init__(name, mode=mode, trainable=trainable, verbose=verbose)
+        if loss not in SUPPORTED_LOSSES:
+            raise ValueError(
+                f"loss='{loss}' not supported; choose from {SUPPORTED_LOSSES}"
+            )
+        self.embedding_dim = embedding_dim
+        self.loss = loss
+        self.batch_size = batch_size
+        self.learning_rate = learning_rate
+        self.n_sample = n_sample
+        self.sample_alpha = sample_alpha
+        self.n_epochs = n_epochs
+        self.max_len = max_len
+        self.num_blocks = num_blocks
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.l2_reg = l2_reg
+        self.bpreg = bpreg
+        self.elu_param = elu_param
+        self.device = device
+        self.use_pos_emb = use_pos_emb
+        self.seed = seed
+        self.rng = get_rng(seed)
+
+    def _build_data_iter(self, pad_index):
+        kwargs = dict(
+            train_set=self.train_set,
+            pad_index=pad_index,
+            batch_size=self.batch_size,
+            max_len=self.max_len,
+            n_sample=self.n_sample,
+            sample_alpha=self.sample_alpha,
+            rng=self.rng,
+            shuffle=True,
+        )
+        if self.mode == "session-aware":
+            return user_seq_iter(**kwargs)
+        return session_seq_iter(**kwargs)
+
+    def fit(self, train_set, val_set=None):
+        super().fit(train_set, val_set)
+        if not self.trainable:
+            return self
+
+        import torch
+
+        from .sasrec import SASRecModel
+        from ..seq_utils.losses import get_loss_function
+
+        torch.manual_seed(self.seed if self.seed is not None else 0)
+
+        self.pad_idx = self.total_items
+        self.model = SASRecModel(
+            item_num=self.total_items,
+            embedding_dim=self.embedding_dim,
+            maxlen=self.max_len,
+            n_layers=self.num_blocks,
+            n_heads=self.num_heads,
+            use_pos_emb=self.use_pos_emb,
+            dropout=self.dropout,
+            pad_idx=self.pad_idx,
+            device=self.device,
+        )
+
+        loss_fn = get_loss_function(self.loss)
+        loss_kwargs = dict(
+            bpreg=self.bpreg,
+            elu_param=self.elu_param,
+            n_sample=self.n_sample,
+        )
+
+        opt = torch.optim.Adam(
+            self.model.parameters(), lr=self.learning_rate, betas=(0.9, 0.98)
+        )
+
+        progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose)
+        for _ in progress_bar:
+            self.model.train()
+            total_loss = 0.0
+            cnt = 0
+            for inc, (in_uids, hist_iids, out_iids) in enumerate(
+                self._build_data_iter(self.pad_idx)
+            ):
+                if len(hist_iids) < 2:
+                    continue
+                hist_iids_t = torch.tensor(
+                    hist_iids, dtype=torch.long, device=self.device, requires_grad=False
+                )
+                out_iids_t = torch.tensor(
+                    out_iids, dtype=torch.long, device=self.device, requires_grad=False
+                )
+
+                self.model.zero_grad()
+                item_scores = self.model(
+                    None, hist_iids_t, out_iids_t, return_hidden=False
+                )
+
+                L = loss_fn(
+                    item_scores,
+                    out_iids=out_iids_t,
+                    batch_size=len(hist_iids),
+                    **loss_kwargs,
+                )
+                if self.l2_reg > 0:
+                    for p in self.model.parameters():
+                        L = L + self.l2_reg * torch.norm(p)
+
+                L.backward()
+                opt.step()
+
+                total_loss += L.cpu().detach().numpy() * len(hist_iids)
+                cnt += len(hist_iids)
+                if inc % 10 == 0 and cnt > 0:
+                    progress_bar.set_postfix(loss=(total_loss / cnt))
+        return self
+
+    def score(self, user_idx, history_items, **kwargs):
+        import torch
+
+        flat = self._flatten_history(history_items)
+        if len(flat) == 0:
+            return np.ones(self.total_items, dtype="float")
+        log_seq = [self.pad_idx] * (self.max_len - len(flat)) + list(flat)
+        log_seq = log_seq[-self.max_len :]
+        log_seq_t = torch.tensor([log_seq], dtype=torch.long, device=self.device)
+        self.model.eval()
+        return self.model.predict(user_idx, log_seq_t)
diff --git a/cornac/models/sasrec/sasrec.py b/cornac/models/sasrec/sasrec.py
new file mode 100644
index 000000000..e785d0f2e
--- /dev/null
+++ b/cornac/models/sasrec/sasrec.py
@@ -0,0 +1,186 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+
+class PointWiseFeedForward(nn.Module):
+    def __init__(self, hidden_units, dropout_rate):
+        super(PointWiseFeedForward, self).__init__()
+        conv1 = nn.Conv1d(hidden_units, hidden_units, kernel_size=1)
+        dropout1 = nn.Dropout(p=dropout_rate)
+        relu = nn.ReLU()
+        conv2 = nn.Conv1d(hidden_units, hidden_units, kernel_size=1)
+        dropout2 = nn.Dropout(p=dropout_rate)
+        self.process = nn.Sequential(conv1, dropout1, relu, conv2, dropout2)
+
+    def forward(self, inputs):
+        outputs = self.process(inputs.transpose(-1, -2))
+        outputs = outputs.transpose(-1, -2)
+        outputs += inputs
+        return outputs
+
+
+class SASRecModel(nn.Module):
+    """SASRec self-attention model (Kang & McAuley, 2018).
+
+    Operates on sequences of past item ids, returning the last-position
+    representation. Item ids are integers in ``[0, item_num)`` with
+    ``item_num`` being used as the padding index.
+
+    The model produces a ``(B, B+N)`` score matrix when called as
+    ``forward(_, hist_iids, out_iids, return_hidden=False)`` where
+    ``out_iids`` contains the ``B`` in-batch positives followed by ``N``
+    shared negatives, matching the contract expected by the loss functions
+    in :mod:`cornac.models.seq_utils`.
+    """
+
+    def __init__(
+        self,
+        item_num,
+        embedding_dim=100,
+        maxlen=20,
+        n_layers=2,
+        n_heads=1,
+        use_pos_emb=True,
+        use_biases=True,
+        dropout=0.2,
+        pad_idx=-1,
+        init_std=0.02,
+        device="cpu",
+    ):
+        super(SASRecModel, self).__init__()
+        self.item_num = item_num
+        self.pad_idx = pad_idx if pad_idx >= 0 else item_num
+        self.maxlen = maxlen
+        self.dev = device
+        self.init_std = init_std
+
+        # +1 row for the padding entry at pad_idx
+        self.item_emb = nn.Embedding(
+            self.item_num + 1, embedding_dim, padding_idx=self.pad_idx
+        )
+        if use_pos_emb:
+            self.pos_emb = nn.Embedding(maxlen + 1, embedding_dim)
+        if use_biases:
+            self.item_biases = nn.Embedding(
+                self.item_num + 1, 1, padding_idx=self.pad_idx
+            )
+        self.emb_dropout = nn.Dropout(p=dropout)
+
+        self.attention_layernorms = nn.ModuleList()
+        self.attention_layers = nn.ModuleList()
+        self.forward_layernorms = nn.ModuleList()
+        self.forward_layers = nn.ModuleList()
+        self.last_layernorm = nn.LayerNorm(embedding_dim, eps=1e-8)
+
+        for _ in range(n_layers):
+            self.attention_layernorms.append(nn.LayerNorm(embedding_dim, eps=1e-8))
+            self.attention_layers.append(
+                nn.MultiheadAttention(embedding_dim, n_heads, dropout)
+            )
+            self.forward_layernorms.append(nn.LayerNorm(embedding_dim, eps=1e-8))
+            self.forward_layers.append(PointWiseFeedForward(embedding_dim, dropout))
+
+        self.apply(self._init_weights)
+        self.to(device)
+
+    def _init_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Conv1d)):
+            module.weight.data.normal_(mean=0.0, std=self.init_std)
+            if module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.Embedding):
+            module.weight.data.normal_(mean=0.0, std=self.init_std)
+            if module.padding_idx is not None:
+                module.weight.data[module.padding_idx].zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+
+    def _score_items(self, hidden, cand_items, biases=None):
+        scores = torch.mm(hidden, cand_items.T)
+        if biases is not None:
+            return scores + biases.T
+        return scores
+
+    def _encode(self, hist_iids):
+        # hist_iids: (B, T)
+        seqs = self.item_emb(hist_iids)
+        seqs = seqs * (self.item_emb.embedding_dim**0.5)
+        positions = np.tile(np.arange(hist_iids.shape[1]), [hist_iids.shape[0], 1])
+        if hasattr(self, "pos_emb"):
+            seqs = seqs + self.pos_emb(
+                torch.tensor(positions, dtype=torch.long, device=seqs.device)
+            )
+        seqs = self.emb_dropout(seqs)
+
+        timeline_mask = (hist_iids == self.pad_idx).to(
+            dtype=seqs.dtype, device=seqs.device
+        )
+        seqs = seqs * (1.0 - timeline_mask).unsqueeze(-1)
+
+        tl = seqs.shape[1]
+        attention_mask = ~torch.tril(
+            torch.ones((tl, tl), dtype=torch.bool, device=seqs.device)
+        )
+
+        for i in range(len(self.attention_layers)):
+            seqs_t = torch.transpose(seqs, 0, 1)
+            Q = self.attention_layernorms[i](seqs_t)
+            mha_out, _ = self.attention_layers[i](
+                Q, seqs_t, seqs_t, attn_mask=attention_mask
+            )
+            seqs_t = Q + mha_out
+            seqs = torch.transpose(seqs_t, 0, 1)
+            seqs = self.forward_layernorms[i](seqs)
+            seqs = self.forward_layers[i](seqs)
+            seqs = seqs * (1.0 - timeline_mask).unsqueeze(-1)
+
+        log_feats = self.last_layernorm(seqs)
+        return log_feats[:, -1, :]
+
+    def forward(self, user_ids, hist_iids, out_iids, return_hidden=False):
+        hidden = self._encode(hist_iids)
+        item_emb = self.item_emb(out_iids)
+        biases = self.item_biases(out_iids) if hasattr(self, "item_biases") else None
+        if return_hidden:
+            return hidden, item_emb, biases
+        return self._score_items(hidden, item_emb, biases)
+
+    @torch.no_grad()
+    def predict(self, user_ids, log_seqs, item_indices=None):
+        """Score all real items for a single padded sequence.
+
+        Returns a 1-D numpy array of size ``item_num`` (padding column is
+        already stripped).
+        """
+        if item_indices is None:
+            item_indices = torch.arange(self.item_num, device=self.dev)
+        else:
+            item_indices = torch.as_tensor(
+                item_indices, dtype=torch.long, device=self.dev
+            )
+        if not isinstance(log_seqs, torch.Tensor):
+            log_seqs = torch.as_tensor(log_seqs, dtype=torch.long, device=self.dev)
+        hidden = self._encode(log_seqs)
+        item_emb = self.item_emb(item_indices)
+        biases = (
+            self.item_biases(item_indices) if hasattr(self, "item_biases") else None
+        )
+        scores = self._score_items(hidden, item_emb, biases)
+        return scores.squeeze().detach().cpu().numpy()
diff --git a/cornac/models/seq_utils/__init__.py b/cornac/models/seq_utils/__init__.py
new file mode 100644
index 000000000..67a9be9a6
--- /dev/null
+++ b/cornac/models/seq_utils/__init__.py
@@ -0,0 +1,69 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Shared utilities for sequential recommendation models.
+
+The subpackage is organised so that ``cornac`` can still be imported in an
+environment without :mod:`torch`. Only the numpy-only iterators are
+re-exported here; the torch-using helpers must be imported explicitly from
+their dedicated modules inside ``fit()``::
+
+    # at module top — safe, numpy only
+    from ..seq_utils import io_iter, user_io_iter, session_seq_iter, user_seq_iter
+
+    def fit(self, train_set, val_set=None):
+        import torch
+        from ..seq_utils.losses import get_loss_function
+        from ..seq_utils.optim import IndexedAdagradM
+        ...
+
+Unified output contract
+-----------------------
+
+* **Model forward output**: every sequential model returns an
+  ``item_scores`` matrix of shape ``(B, B + N)`` where the diagonal holds the
+  positive target scores and the remaining columns are in-batch + sampled
+  negatives.
+* **Loss functions** (see :mod:`.losses`) all consume that ``(B, B+N)``
+  matrix and return a scalar.
+* **Iterators** (see :mod:`.iterators`) yield uniform tuples regardless of
+  mode:
+
+  =========================  =================================================
+  Iterator                   Yielded tuple
+  =========================  =================================================
+  ``io_iter``                ``(in_uids, in_iids, out_iids, start_mask,
+                               valid_id)`` — per-item RNN, session-based.
+  ``user_io_iter``           ``(in_uids, in_iids, out_iids, start_mask,
+                               valid_id)`` — per-item RNN, session-aware.
+  ``session_seq_iter``       ``(in_uids, hist_iids, out_iids)`` — sequence
+                               models, session-based.
+  ``user_seq_iter``          ``(in_uids, hist_iids, out_iids)`` — sequence
+                               models, session-aware.
+  =========================  =================================================
+"""
+
+from .iterators import (
+    io_iter,
+    user_io_iter,
+    session_seq_iter,
+    user_seq_iter,
+)
+
+__all__ = [
+    "io_iter",
+    "user_io_iter",
+    "session_seq_iter",
+    "user_seq_iter",
+]
diff --git a/cornac/models/seq_utils/iterators.py b/cornac/models/seq_utils/iterators.py
new file mode 100644
index 000000000..73c4f4dfc
--- /dev/null
+++ b/cornac/models/seq_utils/iterators.py
@@ -0,0 +1,390 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Mini-batch iterators for session-based and session-aware training.
+
+This module is intentionally numpy-only so it can be imported from the
+top of any ``recom_*.py`` file without pulling in :mod:`torch`.
+"""
+
+from collections import Counter
+
+import numpy as np
+
+from ...utils.common import get_rng
+
+
+def _build_neg_sampler(uir_tuple, sample_alpha):
+    """Precompute popularity-based sampling distribution over items."""
+    item_count = Counter(uir_tuple[1])
+    item_indices = np.array([iid for iid, _ in item_count.most_common()], dtype="int")
+    item_dist = np.array([cnt for _, cnt in item_count.most_common()], dtype="float") ** sample_alpha
+    item_dist = item_dist / item_dist.sum()
+    return item_indices, item_dist
+
+
+def io_iter(s_iter, uir_tuple, n_sample=0, sample_alpha=0, rng=None, batch_size=1, shuffle=False):
+    """Session-based per-item iterator (parallel sessions).
+
+    Yields per training step a 5-tuple
+    ``(in_uids, in_iids, out_iids, start_mask, valid_id)`` where:
+
+    - ``in_uids``: user id owning each parallel slot's current session.
+      Shape ``(B',)``.
+    - ``in_iids``: current input item id in each slot. Shape ``(B',)``.
+    - ``out_iids``: target item ids (followed by ``n_sample`` shared
+      negatives). Shape ``(B' + N,)``.
+    - ``start_mask``: 1 in slots that just started a new session (used to
+      reset RNN hidden state). Shape ``(B',)``.
+    - ``valid_id``: indices of the slots that remain valid in the current
+      batch (used to trim the hidden state when sessions end at different
+      times). Shape ``(B',)``.
+
+    ``B'`` equals ``batch_size`` for the main loop and shrinks during the
+    drain phase as sessions are exhausted.
+
+    The 5-tuple matches :func:`user_io_iter`. ``in_uids`` is provided so
+    user-conditioned models (e.g. FPMC) can train in session-based mode
+    too; models that don't use it can simply discard it.
+    """
+    rng = rng if rng is not None else get_rng(None)
+    start_mask = np.zeros(batch_size, dtype="int")
+    end_mask = np.ones(batch_size, dtype="int")
+    input_iids = None
+    output_iids = None
+    l_pool = []  # pending sessions (list of mapped-id lists)
+    c_pool = [None for _ in range(batch_size)]
+    u_pool = np.zeros(batch_size, dtype="int")
+    sizes = np.zeros(batch_size, dtype="int")
+    if n_sample > 0:
+        item_indices, item_dist = _build_neg_sampler(uir_tuple, sample_alpha)
+
+    def _user_of(mapped_ids):
+        # Any row in the session shares the same user id.
+        return int(uir_tuple[0][mapped_ids[0]])
+
+    for _, batch_mapped_ids in s_iter(batch_size, shuffle):
+        l_pool += batch_mapped_ids
+        while len(l_pool) > 0:
+            if end_mask.sum() == 0:
+                input_iids = uir_tuple[1][[mapped_ids[-sizes[idx]] for idx, mapped_ids in enumerate(c_pool)]]
+                output_iids = uir_tuple[1][[mapped_ids[-sizes[idx] + 1] for idx, mapped_ids in enumerate(c_pool)]]
+                sizes -= 1
+                for idx, size in enumerate(sizes):
+                    if size == 1:
+                        end_mask[idx] = 1
+                if n_sample > 0:
+                    negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist)
+                    output_iids = np.concatenate([output_iids, negatives])
+                yield (
+                    u_pool.copy(),
+                    input_iids,
+                    output_iids,
+                    start_mask.copy(),
+                    np.arange(batch_size, dtype="int"),
+                )
+                start_mask.fill(0)
+            while end_mask.sum() > 0 and len(l_pool) > 0:
+                next_seq = l_pool.pop()
+                if len(next_seq) > 1:
+                    idx = np.nonzero(end_mask)[0][0]
+                    end_mask[idx] = 0
+                    start_mask[idx] = 1
+                    c_pool[idx] = next_seq
+                    u_pool[idx] = _user_of(next_seq)
+                    sizes[idx] = len(c_pool[idx])
+
+    valid_id = np.ones(batch_size, dtype="int")
+    while True:
+        for idx, size in enumerate(sizes):
+            if size == 1:
+                end_mask[idx] = 1
+                valid_id[idx] = 0
+        keep = [idx for idx in range(len(c_pool)) if sizes[idx] > 1]
+        if not keep:
+            break
+        input_iids = uir_tuple[1][[c_pool[idx][-sizes[idx]] for idx in keep]]
+        output_iids = uir_tuple[1][[c_pool[idx][-sizes[idx] + 1] for idx in keep]]
+        sizes -= 1
+        for idx, size in enumerate(sizes):
+            if size == 1:
+                end_mask[idx] = 1
+        keep_mask = np.nonzero(valid_id)[0]
+        start_mask = start_mask[keep_mask]
+        end_mask = end_mask[keep_mask]
+        sizes = sizes[keep_mask]
+        c_pool = [_ for _, valid in zip(c_pool, valid_id) if valid > 0]
+        u_pool = u_pool[keep_mask]
+        if n_sample > 0:
+            negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist)
+            output_iids = np.concatenate([output_iids, negatives])
+        yield u_pool.copy(), input_iids, output_iids, start_mask.copy(), np.nonzero(valid_id)[0]
+        valid_id = np.ones(len(input_iids), dtype="int")
+        if end_mask.sum() == len(input_iids):
+            break
+        start_mask.fill(0)
+
+
+# NOTE: A per-RNN-step session-aware iterator with a max_len history window
+# (the "uio_iter" from CoVE_models) is intentionally not provided here: for
+# sequence/transformer models the cleaner ``session_seq_iter`` /
+# ``user_seq_iter`` below are used instead. For RNN-style GRU4Rec see
+# ``user_io_iter`` for session-aware per-item iteration over user sequences.
+
+
+def _user_chrono_sequences(train_set):
+    """Build per-user chronological item sequences across sessions.
+
+    Returns a list of (user_idx, item_sequence) tuples. The order within a
+    user follows session order (by first-timestamp in session, if available)
+    and then in-session interaction order.
+    """
+    uir_tuple = train_set.uir_tuple
+    sessions = train_set.sessions
+    user_sessions = train_set.user_session_data
+    timestamps = train_set.timestamps
+
+    sequences = []
+    for uid, sids in user_sessions.items():
+        if timestamps is not None:
+            order = sorted(sids, key=lambda sid: timestamps[sessions[sid][0]])
+        else:
+            order = list(sids)
+        items = []
+        for sid in order:
+            items.extend(int(i) for i in uir_tuple[1][sessions[sid]])
+        if len(items) > 1:
+            sequences.append((int(uid), items))
+    return sequences
+
+
+def user_io_iter(train_set, n_sample=0, sample_alpha=0, rng=None, batch_size=1, shuffle=False):
+    """Session-aware per-item iterator (parallel users).
+
+    Like :func:`io_iter` but the parallel "slots" hold *user* chronological
+    item sequences (concatenated across sessions) instead of single sessions.
+    The hidden state is therefore reset at *user* boundaries rather than
+    *session* boundaries.
+    """
+    rng = rng if rng is not None else get_rng(None)
+    sequences = _user_chrono_sequences(train_set)
+    if shuffle:
+        rng.shuffle(sequences)
+    uir_tuple = train_set.uir_tuple
+    if n_sample > 0:
+        item_indices, item_dist = _build_neg_sampler(uir_tuple, sample_alpha)
+
+    start_mask = np.zeros(batch_size, dtype="int")
+    end_mask = np.ones(batch_size, dtype="int")
+    c_pool = [None for _ in range(batch_size)]
+    u_pool = [None for _ in range(batch_size)]
+    sizes = np.zeros(batch_size, dtype="int")
+    pool_iter = iter(sequences)
+
+    def _refill():
+        nonlocal end_mask
+        while end_mask.sum() > 0:
+            try:
+                uid, seq = next(pool_iter)
+            except StopIteration:
+                return False
+            if len(seq) <= 1:
+                continue
+            idx = np.nonzero(end_mask)[0][0]
+            end_mask[idx] = 0
+            start_mask[idx] = 1
+            u_pool[idx] = uid
+            c_pool[idx] = seq
+            sizes[idx] = len(seq)
+        return True
+
+    if not _refill():
+        # no usable user sequences
+        return
+
+    while True:
+        if end_mask.sum() == 0:
+            input_uids = np.array([u_pool[idx] for idx in range(batch_size)], dtype="int")
+            input_iids = np.array([c_pool[idx][-sizes[idx]] for idx in range(batch_size)], dtype="int")
+            output_iids = np.array([c_pool[idx][-sizes[idx] + 1] for idx in range(batch_size)], dtype="int")
+            sizes -= 1
+            for idx, size in enumerate(sizes):
+                if size == 1:
+                    end_mask[idx] = 1
+            if n_sample > 0:
+                negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist)
+                output_iids = np.concatenate([output_iids, negatives])
+            yield input_uids, input_iids, output_iids, start_mask.copy(), np.arange(batch_size, dtype="int")
+            start_mask.fill(0)
+        if end_mask.sum() > 0:
+            if not _refill():
+                break
+
+    # drain remainder
+    valid_id = np.ones(batch_size, dtype="int")
+    while True:
+        for idx, size in enumerate(sizes):
+            if size == 1:
+                end_mask[idx] = 1
+                valid_id[idx] = 0
+        keep = [idx for idx in range(len(c_pool)) if sizes[idx] > 1]
+        if not keep:
+            break
+        input_uids = np.array([u_pool[idx] for idx in keep], dtype="int")
+        input_iids = np.array([c_pool[idx][-sizes[idx]] for idx in keep], dtype="int")
+        output_iids = np.array([c_pool[idx][-sizes[idx] + 1] for idx in keep], dtype="int")
+        sizes -= 1
+        for idx, size in enumerate(sizes):
+            if size == 1:
+                end_mask[idx] = 1
+        start_mask = start_mask[np.nonzero(valid_id)[0]]
+        end_mask = end_mask[np.nonzero(valid_id)[0]]
+        sizes = sizes[np.nonzero(valid_id)[0]]
+        c_pool = [_ for _, v in zip(c_pool, valid_id) if v > 0]
+        u_pool = [_ for _, v in zip(u_pool, valid_id) if v > 0]
+        if n_sample > 0:
+            negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist)
+            output_iids = np.concatenate([output_iids, negatives])
+        yield input_uids, input_iids, output_iids, start_mask.copy(), np.nonzero(valid_id)[0]
+        valid_id = np.ones(len(input_iids), dtype="int")
+        if end_mask.sum() == len(input_iids):
+            break
+        start_mask.fill(0)
+
+
+def session_seq_iter(
+    train_set,
+    pad_index,
+    batch_size=64,
+    max_len=20,
+    n_sample=2048,
+    sample_alpha=0.5,
+    rng=None,
+    shuffle=True,
+):
+    """Session-based sequence iterator for transformer/seq models.
+
+    Iterates over sessions (each session = one training sequence). For a
+    session ``[i0, i1, ..., iT]`` it yields ``num_sessions * (T)`` training
+    triples ``(uid, hist[max_len], target)`` where ``hist`` is the
+    left-padded prefix and ``target`` is the next item.
+    """
+    rng = rng if rng is not None else get_rng(None)
+    uir_tuple = train_set.uir_tuple
+    sessions = train_set.sessions
+    sids = list(sessions.keys())
+    if shuffle:
+        rng.shuffle(sids)
+    if n_sample > 0:
+        item_indices, item_dist = _build_neg_sampler(uir_tuple, sample_alpha)
+
+    buffer_uids, buffer_hist, buffer_target = [], [], []
+    for sid in sids:
+        mapped_ids = sessions[sid]
+        items = list(uir_tuple[1][mapped_ids])
+        if len(items) < 2:
+            continue
+        uid = int(uir_tuple[0][mapped_ids[0]])
+        for t in range(1, len(items)):
+            hist = items[:t][-max_len:]
+            hist = [pad_index] * (max_len - len(hist)) + list(hist)
+            buffer_uids.append(uid)
+            buffer_hist.append(hist)
+            buffer_target.append(items[t])
+            if len(buffer_uids) == batch_size:
+                target = np.array(buffer_target, dtype="int")
+                if n_sample > 0:
+                    negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist)
+                    out_iids = np.concatenate([target, negatives])
+                else:
+                    out_iids = target
+                yield (
+                    np.array(buffer_uids, dtype="int"),
+                    np.array(buffer_hist, dtype="int"),
+                    out_iids,
+                )
+                buffer_uids, buffer_hist, buffer_target = [], [], []
+    if len(buffer_uids) > 1:
+        target = np.array(buffer_target, dtype="int")
+        if n_sample > 0:
+            negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist)
+            out_iids = np.concatenate([target, negatives])
+        else:
+            out_iids = target
+        yield (
+            np.array(buffer_uids, dtype="int"),
+            np.array(buffer_hist, dtype="int"),
+            out_iids,
+        )
+
+
+def user_seq_iter(
+    train_set,
+    pad_index,
+    batch_size=64,
+    max_len=20,
+    n_sample=2048,
+    sample_alpha=0.5,
+    rng=None,
+    shuffle=True,
+):
+    """Session-aware sequence iterator for transformer/seq models.
+
+    Iterates over users; for each user constructs the chronological
+    cross-session item sequence and yields per-step ``(uid, hist[max_len],
+    target)`` triples. The history therefore spans across sessions.
+    """
+    rng = rng if rng is not None else get_rng(None)
+    sequences = _user_chrono_sequences(train_set)
+    if shuffle:
+        rng.shuffle(sequences)
+    uir_tuple = train_set.uir_tuple
+    if n_sample > 0:
+        item_indices, item_dist = _build_neg_sampler(uir_tuple, sample_alpha)
+
+    buffer_uids, buffer_hist, buffer_target = [], [], []
+    for uid, items in sequences:
+        if len(items) < 2:
+            continue
+        for t in range(1, len(items)):
+            hist = items[:t][-max_len:]
+            hist = [pad_index] * (max_len - len(hist)) + list(hist)
+            buffer_uids.append(uid)
+            buffer_hist.append(hist)
+            buffer_target.append(items[t])
+            if len(buffer_uids) == batch_size:
+                target = np.array(buffer_target, dtype="int")
+                if n_sample > 0:
+                    negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist)
+                    out_iids = np.concatenate([target, negatives])
+                else:
+                    out_iids = target
+                yield (
+                    np.array(buffer_uids, dtype="int"),
+                    np.array(buffer_hist, dtype="int"),
+                    out_iids,
+                )
+                buffer_uids, buffer_hist, buffer_target = [], [], []
+    if len(buffer_uids) > 1:
+        target = np.array(buffer_target, dtype="int")
+        if n_sample > 0:
+            negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist)
+            out_iids = np.concatenate([target, negatives])
+        else:
+            out_iids = target
+        yield (
+            np.array(buffer_uids, dtype="int"),
+            np.array(buffer_hist, dtype="int"),
+            out_iids,
+        )
diff --git a/cornac/models/seq_utils/losses.py b/cornac/models/seq_utils/losses.py
new file mode 100644
index 000000000..cad911463
--- /dev/null
+++ b/cornac/models/seq_utils/losses.py
@@ -0,0 +1,125 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Loss functions for sequential recommendation models.
+
+Importing this module requires :mod:`torch`. To respect Cornac's optional-
+dependency convention, only import it from inside a model's ``fit()`` method
+(or another scope guaranteed to be reached only when torch is available).
+"""
+
+import torch
+import torch.nn.functional as F
+
+
+def softmax_neg(X):
+    """Softmax over negatives, masking out the diagonal (positives)."""
+    hm = 1.0 - torch.eye(*X.shape, out=torch.empty_like(X))
+    X = X * hm
+    e_x = torch.exp(X - X.max(dim=1, keepdim=True)[0]) * hm
+    if e_x.size(0) == 1:
+        return e_x
+    return e_x / (e_x.sum(dim=1, keepdim=True) + 1e-24)
+
+
+def bpr_loss(item_scores, **kwargs):
+    """BPR pairwise logsigmoid loss against in-batch negatives.
+
+    Parameters
+    ----------
+    item_scores: torch.Tensor of shape (B, B+N)
+        Score matrix with positives on the diagonal.
+    """
+    pos = torch.diag(item_scores)
+    pos = pos.reshape(pos.shape[0], -1)
+    logits = F.logsigmoid(pos - item_scores)
+    mask = 1.0 - torch.eye(*logits.shape, out=torch.empty_like(logits))
+    loss = -torch.sum(logits * mask)
+    return loss / logits.size(0) / max(logits.size(1) - 1, 1)
+
+
+def top1_loss(item_scores, n_sample=0, **kwargs):
+    """TOP1 ranking loss from Hidasi et al. (2015)."""
+    target = torch.diag(item_scores)
+    target = target.reshape(target.shape[0], -1)
+    return torch.sum(
+        torch.mean(
+            torch.sigmoid(item_scores - target) + torch.sigmoid(item_scores**2),
+            dim=1,
+        )
+        - torch.sigmoid(target**2) / (item_scores.size(0) + n_sample)
+    ) / item_scores.size(0)
+
+
+def xe_softmax_loss(item_scores, out_iids=None, P0=None, logq=0.0, sample_alpha=0.5, batch_size=None, **kwargs):
+    """Cross-entropy with softmax over in-batch + sampled negatives.
+
+    Supports an optional logQ correction (Hidasi & Karatzoglou, 2018) when
+    ``P0`` (item popularity prior) and ``logq > 0`` are provided.
+    """
+    if logq > 0 and P0 is not None and out_iids is not None and batch_size is not None:
+        item_scores = item_scores - logq * torch.log(
+            torch.cat([P0[out_iids[:batch_size]], P0[out_iids[batch_size:]] ** sample_alpha])
+        )
+    X = torch.exp(item_scores - item_scores.max(dim=1, keepdim=True)[0])
+    X = X / (X.sum(dim=1, keepdim=True) + 1e-24)
+    return -torch.sum(torch.log(torch.diag(X) + 1e-24)) / item_scores.size(0)
+
+
+def bpr_max_loss(item_scores, bpreg=1.0, elu_param=0.5, **kwargs):
+    """BPR-max with softmax-weighted negatives and L2 regularisation on scores."""
+    if elu_param > 0:
+        item_scores = F.elu(item_scores, elu_param)
+    softmax_scores = softmax_neg(item_scores)
+    target = torch.diag(item_scores)
+    target = target.reshape(target.shape[0], -1)
+    return torch.sum(
+        -torch.log(torch.sum(torch.sigmoid(target - item_scores) * softmax_scores, dim=1) + 1e-24)
+        + bpreg * torch.sum((item_scores**2) * softmax_scores, dim=1)
+    ) / item_scores.size(0)
+
+
+def bce_loss(item_scores, **kwargs):
+    """Binary cross-entropy treating the diagonal as positive and all other
+    columns (in-batch negatives + sampled negatives) as negatives.
+    """
+    B, N = item_scores.shape
+    targets = torch.zeros_like(item_scores)
+    targets[torch.arange(B), torch.arange(B)] = 1.0
+    return F.binary_cross_entropy_with_logits(item_scores, targets)
+
+
+def ce_loss(item_scores, **kwargs):
+    """Standard cross-entropy where the target class is the in-batch diagonal."""
+    targets = torch.arange(item_scores.size(0), device=item_scores.device, dtype=torch.long)
+    return F.cross_entropy(item_scores, targets)
+
+
+LOSS_FUNCTIONS = {
+    "bpr": bpr_loss,
+    "top1": top1_loss,
+    "cross-entropy": xe_softmax_loss,
+    "xe_softmax": xe_softmax_loss,
+    "softmax": xe_softmax_loss,
+    "bpr-max": bpr_max_loss,
+    "bce": bce_loss,
+    "ce": ce_loss,
+}
+
+
+def get_loss_function(name):
+    """Look up a loss function by name. Raises ``ValueError`` if unknown."""
+    if name not in LOSS_FUNCTIONS:
+        raise ValueError(f"Unknown loss '{name}'. Supported: {sorted(set(LOSS_FUNCTIONS))}")
+    return LOSS_FUNCTIONS[name]
diff --git a/cornac/models/seq_utils/optim.py b/cornac/models/seq_utils/optim.py
new file mode 100644
index 000000000..df98c5a1a
--- /dev/null
+++ b/cornac/models/seq_utils/optim.py
@@ -0,0 +1,93 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Custom optimizer(s) for sequential recommendation models.
+
+Importing this module requires :mod:`torch`. Import only from inside a
+model's ``fit()`` method (or another scope guaranteed to be reached only
+when torch is available).
+"""
+
+import torch
+from torch.optim import Optimizer
+
+
+class IndexedAdagradM(Optimizer):
+    """Sparse-aware Adagrad with momentum, used by GRU4Rec and FPMC."""
+
+    def __init__(self, params, lr=0.05, momentum=0.0, eps=1e-6):
+        if lr <= 0.0:
+            raise ValueError("Invalid learning rate: {}".format(lr))
+        if momentum < 0.0:
+            raise ValueError("Invalid momentum value: {}".format(momentum))
+        if eps <= 0.0:
+            raise ValueError("Invalid epsilon value: {}".format(eps))
+
+        defaults = dict(lr=lr, momentum=momentum, eps=eps)
+        super(IndexedAdagradM, self).__init__(params, defaults)
+
+        for group in self.param_groups:
+            for p in group["params"]:
+                state = self.state[p]
+                state["acc"] = torch.full_like(p, 0, memory_format=torch.preserve_format)
+                if momentum > 0:
+                    state["mom"] = torch.full_like(p, 0, memory_format=torch.preserve_format)
+
+    def share_memory(self):
+        for group in self.param_groups:
+            for p in group["params"]:
+                state = self.state[p]
+                state["acc"].share_memory_()
+                if group["momentum"] > 0:
+                    state["mom"].share_memory_()
+
+    @torch.no_grad()
+    def step(self, closure=None):
+        loss = None
+        if closure is not None:
+            with torch.enable_grad():
+                loss = closure()
+        for group in self.param_groups:
+            for p in group["params"]:
+                if p.grad is None:
+                    continue
+                grad = p.grad
+                state = self.state[p]
+                clr = group["lr"]
+                momentum = group["momentum"]
+                if grad.is_sparse:
+                    grad = grad.coalesce()
+                    grad_indices = grad._indices()[0]
+                    grad_values = grad._values()
+                    accs = state["acc"][grad_indices] + grad_values.pow(2)
+                    state["acc"].index_copy_(0, grad_indices, accs)
+                    accs.add_(group["eps"]).sqrt_().mul_(-1 / clr)
+                    if momentum > 0:
+                        moma = state["mom"][grad_indices]
+                        moma.mul_(momentum).add_(grad_values / accs)
+                        state["mom"].index_copy_(0, grad_indices, moma)
+                        p.index_add_(0, grad_indices, moma)
+                    else:
+                        p.index_add_(0, grad_indices, grad_values / accs)
+                else:
+                    state["acc"].add_(grad.pow(2))
+                    accs = state["acc"].add(group["eps"])
+                    accs.sqrt_()
+                    if momentum > 0:
+                        mom = state["mom"]
+                        mom.mul_(momentum).addcdiv_(grad, accs, value=-clr)
+                        p.add_(mom)
+                    else:
+                        p.addcdiv_(grad, accs, value=-clr)
+        return loss

From 979601e3b163199a6366a102d71b18070724cd3f Mon Sep 17 00:00:00 2001
From: hieuddo <hieu.dd.1998@gmail.com>
Date: Fri, 29 May 2026 17:00:57 +0800
Subject: [PATCH 02/10] feat: model selection

---
 cornac/models/bert4rec/recom_bert4rec.py | 39 +++++++++++++++++++++++-
 cornac/models/gpt2rec/recom_gpt2rec.py   | 39 +++++++++++++++++++++++-
 cornac/models/gru4rec/recom_gru4rec.py   | 39 +++++++++++++++++++++++-
 cornac/models/sasrec/recom_sasrec.py     | 39 +++++++++++++++++++++++-
 4 files changed, 152 insertions(+), 4 deletions(-)

diff --git a/cornac/models/bert4rec/recom_bert4rec.py b/cornac/models/bert4rec/recom_bert4rec.py
index 553c1181f..3c4ba4f58 100644
--- a/cornac/models/bert4rec/recom_bert4rec.py
+++ b/cornac/models/bert4rec/recom_bert4rec.py
@@ -66,6 +66,10 @@ def __init__(
         trainable=True,
         verbose=False,
         seed=None,
+        model_selection="last",
+        val_eval_every=5,
+        val_k=20,
+        val_metric="recall",
     ):
         super().__init__(name, mode=mode, trainable=trainable, verbose=verbose)
         if loss not in SUPPORTED_LOSSES:
@@ -89,6 +93,14 @@ def __init__(
         self.device = device
         self.seed = seed
         self.rng = get_rng(seed)
+        if model_selection not in ("last", "best"):
+            raise ValueError(
+                f"model_selection='{model_selection}' not supported; choose 'last' or 'best'"
+            )
+        self.model_selection = model_selection
+        self.val_eval_every = val_eval_every
+        self.val_k = val_k
+        self.val_metric = val_metric
 
     def _build_data_iter(self, pad_index):
         kwargs = dict(
@@ -138,8 +150,10 @@ def fit(self, train_set, val_set=None):
             self.model.parameters(), lr=self.learning_rate, betas=(0.9, 0.98)
         )
 
+        best_val = -float("inf")
+        best_state = None
         progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose)
-        for _ in progress_bar:
+        for epoch_id in progress_bar:
             self.model.train()
             total_loss = 0.0
             cnt = 0
@@ -177,6 +191,29 @@ def fit(self, train_set, val_set=None):
                 cnt += len(hist_iids)
                 if inc % 10 == 0 and cnt > 0:
                     progress_bar.set_postfix(loss=(total_loss / cnt))
+
+            if (
+                self.model_selection == "best"
+                and val_set is not None
+                and epoch_id % self.val_eval_every == 0
+            ):
+                val_score = self._val_score(
+                    val_set, metric=self.val_metric, k=self.val_k
+                )
+                if val_score is not None and val_score > best_val:
+                    best_val = val_score
+                    best_state = {
+                        n: p.detach().clone() for n, p in self.model.state_dict().items()
+                    }
+                if self.verbose:
+                    progress_bar.set_postfix(
+                        loss=(total_loss / max(cnt, 1)),
+                        val_recall=val_score,
+                        best=best_val,
+                    )
+
+        if self.model_selection == "best" and best_state is not None:
+            self.model.load_state_dict(best_state)
         return self
 
     def score(self, user_idx, history_items, **kwargs):
diff --git a/cornac/models/gpt2rec/recom_gpt2rec.py b/cornac/models/gpt2rec/recom_gpt2rec.py
index 0ed2f754a..6a76bbede 100644
--- a/cornac/models/gpt2rec/recom_gpt2rec.py
+++ b/cornac/models/gpt2rec/recom_gpt2rec.py
@@ -60,6 +60,10 @@ def __init__(
         trainable=True,
         verbose=False,
         seed=None,
+        model_selection="last",
+        val_eval_every=5,
+        val_k=20,
+        val_metric="recall",
     ):
         super().__init__(name, mode=mode, trainable=trainable, verbose=verbose)
         if loss not in SUPPORTED_LOSSES:
@@ -83,6 +87,14 @@ def __init__(
         self.device = device
         self.seed = seed
         self.rng = get_rng(seed)
+        if model_selection not in ("last", "best"):
+            raise ValueError(
+                f"model_selection='{model_selection}' not supported; choose 'last' or 'best'"
+            )
+        self.model_selection = model_selection
+        self.val_eval_every = val_eval_every
+        self.val_k = val_k
+        self.val_metric = val_metric
 
     def _build_data_iter(self, pad_index):
         kwargs = dict(
@@ -132,8 +144,10 @@ def fit(self, train_set, val_set=None):
             self.model.parameters(), lr=self.learning_rate, betas=(0.9, 0.98)
         )
 
+        best_val = -float("inf")
+        best_state = None
         progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose)
-        for _ in progress_bar:
+        for epoch_id in progress_bar:
             self.model.train()
             total_loss = 0.0
             cnt = 0
@@ -171,6 +185,29 @@ def fit(self, train_set, val_set=None):
                 cnt += len(hist_iids)
                 if inc % 10 == 0 and cnt > 0:
                     progress_bar.set_postfix(loss=(total_loss / cnt))
+
+            if (
+                self.model_selection == "best"
+                and val_set is not None
+                and epoch_id % self.val_eval_every == 0
+            ):
+                val_score = self._val_score(
+                    val_set, metric=self.val_metric, k=self.val_k
+                )
+                if val_score is not None and val_score > best_val:
+                    best_val = val_score
+                    best_state = {
+                        n: p.detach().clone() for n, p in self.model.state_dict().items()
+                    }
+                if self.verbose:
+                    progress_bar.set_postfix(
+                        loss=(total_loss / max(cnt, 1)),
+                        val_recall=val_score,
+                        best=best_val,
+                    )
+
+        if self.model_selection == "best" and best_state is not None:
+            self.model.load_state_dict(best_state)
         return self
 
     def score(self, user_idx, history_items, **kwargs):
diff --git a/cornac/models/gru4rec/recom_gru4rec.py b/cornac/models/gru4rec/recom_gru4rec.py
index a753b7891..4942abb49 100644
--- a/cornac/models/gru4rec/recom_gru4rec.py
+++ b/cornac/models/gru4rec/recom_gru4rec.py
@@ -148,6 +148,10 @@ def __init__(
         trainable=True,
         verbose=False,
         seed=None,
+        model_selection="last",
+        val_eval_every=5,
+        val_k=20,
+        val_metric="recall",
     ):
         super().__init__(name, mode=mode, trainable=trainable, verbose=verbose)
         if loss not in SUPPORTED_LOSSES:
@@ -172,6 +176,14 @@ def __init__(
         self.device = device
         self.seed = seed
         self.rng = get_rng(seed)
+        if model_selection not in ("last", "best"):
+            raise ValueError(
+                f"model_selection='{model_selection}' not supported; choose 'last' or 'best'"
+            )
+        self.model_selection = model_selection
+        self.val_eval_every = val_eval_every
+        self.val_k = val_k
+        self.val_metric = val_metric
 
     def _build_loss_kwargs(self):
         return dict(
@@ -229,8 +241,10 @@ def fit(self, train_set, val_set=None):
             self.model.parameters(), self.learning_rate, self.momentum
         )
 
+        best_val = -float("inf")
+        best_state = None
         progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose)
-        for _ in progress_bar:
+        for epoch_id in progress_bar:
             H = [
                 torch.zeros(
                     (self.batch_size, self.layers[i]),
@@ -268,6 +282,29 @@ def fit(self, train_set, val_set=None):
                 cnt += len(in_iids)
                 if inc % 10 == 0 and cnt > 0:
                     progress_bar.set_postfix(loss=(total_loss / cnt))
+
+            if (
+                self.model_selection == "best"
+                and val_set is not None
+                and epoch_id % self.val_eval_every == 0
+            ):
+                val_score = self._val_score(
+                    val_set, metric=self.val_metric, k=self.val_k
+                )
+                if val_score is not None and val_score > best_val:
+                    best_val = val_score
+                    best_state = {
+                        n: p.detach().clone() for n, p in self.model.state_dict().items()
+                    }
+                if self.verbose:
+                    progress_bar.set_postfix(
+                        loss=(total_loss / max(cnt, 1)),
+                        val_recall=val_score,
+                        best=best_val,
+                    )
+
+        if self.model_selection == "best" and best_state is not None:
+            self.model.load_state_dict(best_state)
         return self
 
     def _build_data_iter(self):
diff --git a/cornac/models/sasrec/recom_sasrec.py b/cornac/models/sasrec/recom_sasrec.py
index b48a9f0c3..30cb473bb 100644
--- a/cornac/models/sasrec/recom_sasrec.py
+++ b/cornac/models/sasrec/recom_sasrec.py
@@ -116,6 +116,10 @@ def __init__(
         trainable=True,
         verbose=False,
         seed=None,
+        model_selection="last",
+        val_eval_every=5,
+        val_k=20,
+        val_metric="recall",
     ):
         super().__init__(name, mode=mode, trainable=trainable, verbose=verbose)
         if loss not in SUPPORTED_LOSSES:
@@ -140,6 +144,14 @@ def __init__(
         self.use_pos_emb = use_pos_emb
         self.seed = seed
         self.rng = get_rng(seed)
+        if model_selection not in ("last", "best"):
+            raise ValueError(
+                f"model_selection='{model_selection}' not supported; choose 'last' or 'best'"
+            )
+        self.model_selection = model_selection
+        self.val_eval_every = val_eval_every
+        self.val_k = val_k
+        self.val_metric = val_metric
 
     def _build_data_iter(self, pad_index):
         kwargs = dict(
@@ -192,8 +204,10 @@ def fit(self, train_set, val_set=None):
             self.model.parameters(), lr=self.learning_rate, betas=(0.9, 0.98)
         )
 
+        best_val = -float("inf")
+        best_state = None
         progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose)
-        for _ in progress_bar:
+        for epoch_id in progress_bar:
             self.model.train()
             total_loss = 0.0
             cnt = 0
@@ -231,6 +245,29 @@ def fit(self, train_set, val_set=None):
                 cnt += len(hist_iids)
                 if inc % 10 == 0 and cnt > 0:
                     progress_bar.set_postfix(loss=(total_loss / cnt))
+
+            if (
+                self.model_selection == "best"
+                and val_set is not None
+                and epoch_id % self.val_eval_every == 0
+            ):
+                val_score = self._val_score(
+                    val_set, metric=self.val_metric, k=self.val_k
+                )
+                if val_score is not None and val_score > best_val:
+                    best_val = val_score
+                    best_state = {
+                        n: p.detach().clone() for n, p in self.model.state_dict().items()
+                    }
+                if self.verbose:
+                    progress_bar.set_postfix(
+                        loss=(total_loss / max(cnt, 1)),
+                        val_recall=val_score,
+                        best=best_val,
+                    )
+
+        if self.model_selection == "best" and best_state is not None:
+            self.model.load_state_dict(best_state)
         return self
 
     def score(self, user_idx, history_items, **kwargs):

From 77470780ae538592d2702377c6de6913bb7b6392 Mon Sep 17 00:00:00 2001
From: hieuddo <hieu.dd.1998@gmail.com>
Date: Fri, 29 May 2026 21:59:24 +0800
Subject: [PATCH 03/10] migrate NextItemRecommender to SequentialRecommender

---
 cornac/eval_methods/__init__.py               |   4 +-
 ...evaluation.py => sequential_evaluation.py} | 317 ++++++------------
 cornac/models/__init__.py                     |   1 -
 cornac/models/recommender.py                  | 172 ++++++----
 cornac/models/seq_utils/__init__.py           |   9 +-
 cornac/models/seq_utils/iterators.py          | 102 +++++-
 cornac/models/spop/recom_spop.py              |  20 +-
 examples/gru4rec_yoochoose.py                 |   6 +-
 examples/spop_yoochoose.py                    |   8 +-
 .../eval_methods/test_next_item_evaluation.py |  67 ----
 .../test_sequential_evaluation.py             |  89 +++++
 tests/cornac/models/test_recommender.py       |  19 +-
 12 files changed, 440 insertions(+), 374 deletions(-)
 rename cornac/eval_methods/{next_item_evaluation.py => sequential_evaluation.py} (51%)
 delete mode 100644 tests/cornac/eval_methods/test_next_item_evaluation.py
 create mode 100644 tests/cornac/eval_methods/test_sequential_evaluation.py

diff --git a/cornac/eval_methods/__init__.py b/cornac/eval_methods/__init__.py
index 74979c804..b32bfffb9 100644
--- a/cornac/eval_methods/__init__.py
+++ b/cornac/eval_methods/__init__.py
@@ -22,7 +22,7 @@
 from .timestamp_split import TimestampSplit
 from .cross_validation import CrossValidation
 from .next_basket_evaluation import NextBasketEvaluation
-from .next_item_evaluation import NextItemEvaluation
+from .sequential_evaluation import SequentialEvaluation
 from .propensity_stratified_evaluation import PropensityStratifiedEvaluation
 
 __all__ = [
@@ -32,6 +32,6 @@
     "TimestampSplit",
     "CrossValidation",
     "NextBasketEvaluation",
-    "NextItemEvaluation",
+    "SequentialEvaluation",
     "PropensityStratifiedEvaluation",
 ]
diff --git a/cornac/eval_methods/next_item_evaluation.py b/cornac/eval_methods/sequential_evaluation.py
similarity index 51%
rename from cornac/eval_methods/next_item_evaluation.py
rename to cornac/eval_methods/sequential_evaluation.py
index 50a22244f..6a338cd1f 100644
--- a/cornac/eval_methods/next_item_evaluation.py
+++ b/cornac/eval_methods/sequential_evaluation.py
@@ -1,4 +1,4 @@
-# Copyright 2023 The Cornac Authors. All Rights Reserved.
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,173 +22,138 @@
 
 from ..data import SequentialDataset
 from ..experiment.result import Result
-from ..models import NextItemRecommender
+from ..models import SequentialRecommender
 from . import BaseMethod
 
-EVALUATION_MODES = frozenset([
-    "last",
-    "next",
-])
+EVALUATION_MODES = frozenset(["any", "first", "last"])
+
 
 def ranking_eval(
     model,
     metrics,
     train_set,
     test_set,
-    user_based=False,
+    user_based=True,
     exclude_unknowns=True,
-    mode="last",
+    mode="any",
     verbose=False,
 ):
-    """Evaluate model on provided ranking metrics.
-
-    Parameters
-    ----------
-    model: :obj:`cornac.models.NextItemRecommender`, required
-        NextItemRecommender model to be evaluated.
-
-    metrics: :obj:`iterable`, required
-        List of rating metrics :obj:`cornac.metrics.RankingMetric`.
-
-    train_set: :obj:`cornac.data.SequentialDataset`, required
-        SequentialDataset to be used for model training. This will be used to exclude
-        observations already appeared during training.
-
-    test_set: :obj:`cornac.data.SequentialDataset`, required
-        SequentialDataset to be used for evaluation.
-
-    exclude_unknowns: bool, optional, default: True
-        Ignore unknown users and items during evaluation.
-
-    verbose: bool, optional, default: False
-        Output evaluation progress.
-
-    Returns
-    -------
-    res: (List, List)
-        Tuple of two lists:
-         - average result for each of the metrics
-         - average result per user for each of the metrics
-
+    """Session-aware ranking evaluation.
+
+    Iterates the ``test_set`` by user (``usi_iter``) so that a model sees a
+    user's *prior* sessions as history when scoring the held-out session.
+    The session list passed as ``history_items`` is nested
+    (``list[list[int]]``), and is consumed by
+    :meth:`cornac.models.SequentialRecommender._flatten_history` according
+    to the model's ``mode``.
+
+    Modes
+    -----
+    - ``"any"``:   all items in the last session are positives.
+    - ``"first"``: only the first item of the last session is the positive.
+    - ``"last"``:  only the last item of the last session is the positive;
+                   the rest of that session is appended to ``history_items``.
     """
-
     if len(metrics) == 0:
         return [], []
 
     avg_results = []
-    session_results = [defaultdict(list) for _ in enumerate(metrics)]
     user_results = [defaultdict(list) for _ in enumerate(metrics)]
 
     user_sessions = defaultdict(list)
-    session_ids = []
-    for [sid], [mapped_ids], [session_items] in tqdm(
-        test_set.si_iter(batch_size=1, shuffle=False),
-        total=len(test_set.sessions),
+    for [user_idx], [sids], [mapped_ids], [session_items] in tqdm(
+        test_set.usi_iter(batch_size=1, shuffle=False),
+        total=test_set.num_users,
         desc="Ranking",
         disable=not verbose,
         miniters=100,
     ):
-        if len(session_items) < 2:  # exclude all session with size smaller than 2
+        if len(session_items) == 0 or len(session_items[-1]) == 0:
             continue
-        user_idx = test_set.uir_tuple[0][mapped_ids[0]]
-        if user_based:
-            user_sessions[user_idx].append(sid)
-        session_ids.append(sid)
-
-        start_pos = 1 if mode == "next" else len(session_items) - 1
-        for test_pos in range(start_pos, len(session_items), 1):
-            test_pos_items = session_items[test_pos]
-
-            # binary mask for ground-truth positive items
-            u_gt_pos_mask = np.zeros(test_set.num_items, dtype="int")
-            u_gt_pos_mask[test_pos_items] = 1
-
-            # binary mask for ground-truth negative items, removing all positive items
-            u_gt_neg_mask = np.ones(test_set.num_items, dtype="int")
-            u_gt_neg_mask[test_pos_items] = 0
-
-            # filter items being considered for evaluation
-            if exclude_unknowns:
-                u_gt_pos_mask = u_gt_pos_mask[: train_set.num_items]
-                u_gt_neg_mask = u_gt_neg_mask[: train_set.num_items]
-
-            u_gt_pos_items = np.nonzero(u_gt_pos_mask)[0]
-            u_gt_neg_items = np.nonzero(u_gt_neg_mask)[0]
-            item_indices = np.nonzero(u_gt_pos_mask + u_gt_neg_mask)[0]
-
-            item_rank, item_scores = model.rank(
-                user_idx,
-                item_indices,
-                history_items=session_items[:test_pos],
-                history_mapped_ids=mapped_ids[:test_pos],
-                sessions=test_set.sessions,
-                session_indices=test_set.session_indices,
-                extra_data=test_set.extra_data,
-            )
+        user_sessions[user_idx].append(sids[-1])
 
-            for i, mt in enumerate(metrics):
-                mt_score = mt.compute(
-                    gt_pos=u_gt_pos_items,
-                    gt_neg=u_gt_neg_items,
-                    pd_rank=item_rank,
-                    pd_scores=item_scores,
-                    item_indices=item_indices,
-                )
-                if user_based:
-                    user_results[i][user_idx].append(mt_score)
-                else:
-                    session_results[i][sid].append(mt_score)
-
-    # avg results of ranking metrics
-    for i, mt in enumerate(metrics):
-        if user_based:
-            user_ids = list(user_sessions.keys())
-            user_avg_results = [np.mean(user_results[i][user_idx]) for user_idx in user_ids]
-            avg_results.append(np.mean(user_avg_results))
-        else:
-            session_result = [score for sid in session_ids for score in session_results[i][sid]]
-            avg_results.append(np.mean(session_result))
-    return avg_results, user_results
+        if mode == "any":
+            test_pos_items = session_items[-1]
+        elif mode == "first":
+            test_pos_items = session_items[-1][0:1]
+        else:  # "last"
+            test_pos_items = session_items[-1][-1:]
 
+        # ground-truth masks over the test_set item space
+        u_gt_pos_mask = np.zeros(test_set.num_items, dtype="int")
+        u_gt_pos_mask[test_pos_items] = 1
 
-class NextItemEvaluation(BaseMethod):
-    """Next Item Recommendation Evaluation method
+        u_gt_neg_mask = np.ones(test_set.num_items, dtype="int")
+        u_gt_neg_mask[test_pos_items] = 0
 
-    Parameters
-    ----------
-    data: list, required
-        Raw preference data in the tuple format [(user_id, sessions)].
+        if exclude_unknowns:
+            u_gt_pos_mask = u_gt_pos_mask[: train_set.num_items]
+            u_gt_neg_mask = u_gt_neg_mask[: train_set.num_items]
 
-    test_size: float, optional, default: 0.2
-        The proportion of the test set, \
-        if > 1 then it is treated as the size of the test set.
+        u_gt_pos_items = np.nonzero(u_gt_pos_mask)[0]
+        u_gt_neg_items = np.nonzero(u_gt_neg_mask)[0]
+        item_indices = np.nonzero(u_gt_pos_mask + u_gt_neg_mask)[0]
 
-    val_size: float, optional, default: 0.0
-        The proportion of the validation set, \
-        if > 1 then it is treated as the size of the validation set.
+        if mode == "last":
+            history_items = list(session_items[:-1]) + [session_items[-1][:-1]]
+        else:
+            history_items = list(session_items[:-1])
+
+        item_rank, item_scores = model.rank(
+            user_idx,
+            item_indices,
+            history_items=history_items,
+            history_mapped_ids=mapped_ids[:-1],
+            sessions=test_set.sessions,
+            session_indices=test_set.session_indices,
+            extra_data=test_set.extra_data,
+            mode=mode,
+        )
 
-    fmt: str, default: 'SIT'
-        Format of the input data. Currently, we are supporting:
+        for i, mt in enumerate(metrics):
+            mt_score = mt.compute(
+                gt_pos=u_gt_pos_items,
+                gt_neg=u_gt_neg_items,
+                pd_rank=item_rank,
+                pd_scores=item_scores,
+                item_indices=item_indices,
+            )
+            user_results[i][user_idx].append(mt_score)
 
-        'SIT': Session, Item, Timestamp
-        'USIT': User, Session, Item, Timestamp
-        'SITJson': Session, Item, Timestamp, Json
-        'USITJson': User, Session, Item, Timestamp, Json
+    # average across users (user-based)
+    for i in range(len(metrics)):
+        user_ids = list(user_sessions.keys())
+        if len(user_ids) == 0:
+            avg_results.append(0.0)
+            continue
+        per_user = [np.mean(user_results[i][uid]) for uid in user_ids]
+        avg_results.append(np.mean(per_user))
 
-    seed: int, optional, default: None
-        Random seed for reproducibility.
+    return avg_results, user_results
 
-    mode: str, optional, default: 'last'
-        Evaluation mode is either 'next' or 'last'.
-        If 'last', only evaluate the last item.
-        If 'next', evaluate every next item in the sequence,
 
-    exclude_unknowns: bool, optional, default: True
-        If `True`, unknown items will be ignored during model evaluation.
+class SequentialEvaluation(BaseMethod):
+    """Next Session Recommendation Evaluation method.
 
-    verbose: bool, optional, default: False
-        Output running log.
+    Iterates the test set by user and feeds the model the user's prior
+    sessions as history when ranking items in the held-out last session.
 
+    Parameters
+    ----------
+    data: list, optional
+        Raw preference data in tuple format. Format defined by ``fmt``.
+
+    test_size: float, default: 0.2
+    val_size: float, default: 0.0
+    fmt: str, default: 'USIT'
+        One of 'SIT', 'USIT', 'SITJson', 'USITJson'. User information is
+        required for ``mode="session-aware"`` on the model side; pure SIT
+        formats only support session-based models.
+    seed: int, optional
+    mode: str, default: 'any'
+        One of 'any', 'first', 'last'.
+    exclude_unknowns: bool, default: True
+    verbose: bool, default: False
     """
 
     def __init__(
@@ -196,9 +161,9 @@ def __init__(
         data=None,
         test_size=0.2,
         val_size=0.0,
-        fmt="SIT",
+        fmt="USIT",
         seed=None,
-        mode="last",
+        mode="any",
         exclude_unknowns=True,
         verbose=False,
         **kwargs,
@@ -215,10 +180,10 @@ def __init__(
             mode=mode,
             **kwargs,
         )
-        
+
         if mode not in EVALUATION_MODES:
             raise ValueError(f"{mode} is not supported. ({EVALUATION_MODES})")
-            
+
         self.mode = mode
         self.global_sid_map = kwargs.get("global_sid_map", OrderedDict())
 
@@ -263,6 +228,7 @@ def _build_datasets(self, train_data, test_data, val_data=None):
                 fmt=self.fmt,
                 global_uid_map=self.global_uid_map,
                 global_iid_map=self.global_iid_map,
+                global_sid_map=self.global_sid_map,
                 seed=self.seed,
                 exclude_unknowns=self.exclude_unknowns,
             )
@@ -288,9 +254,9 @@ def eval(
         test_set,
         exclude_unknowns,
         ranking_metrics,
-        user_based=False,
+        user_based=True,
         verbose=False,
-        mode="last",
+        mode="any",
         **kwargs,
     ):
         metric_avg_results = OrderedDict()
@@ -314,29 +280,8 @@ def eval(
         return Result(model.name, metric_avg_results, metric_user_results)
 
     def evaluate(self, model, metrics, user_based, show_validation=True):
-        """Evaluate given models according to given metrics. Supposed to be called by Experiment.
-
-        Parameters
-        ----------
-        model: :obj:`cornac.models.NextItemRecommender`
-            NextItemRecommender model to be evaluated.
-
-        metrics: :obj:`iterable`
-            List of metrics.
-
-        user_based: bool, required
-            Evaluation strategy for the rating metrics. Whether results
-            are averaging based on number of users or number of ratings.
-
-        show_validation: bool, optional, default: True
-            Whether to show the results on validation set (if exists).
-
-        Returns
-        -------
-        res: :obj:`cornac.experiment.Result`
-        """
-        if not isinstance(model, NextItemRecommender):
-            raise ValueError("model must be a NextItemRecommender but '%s' is provided" % type(model))
+        if not isinstance(model, SequentialRecommender):
+            raise ValueError("model must be a SequentialRecommender but '%s' is provided" % type(model))
 
         if self.train_set is None:
             raise ValueError("train_set is required but None!")
@@ -345,9 +290,6 @@ def evaluate(self, model, metrics, user_based, show_validation=True):
 
         self._reset()
 
-        ###########
-        # FITTING #
-        ###########
         if self.verbose:
             print("\n[{}] Training started!".format(model.name))
 
@@ -355,15 +297,15 @@ def evaluate(self, model, metrics, user_based, show_validation=True):
         model.fit(self.train_set, self.val_set)
         train_time = time.time() - start
 
-        ##############
-        # EVALUATION #
-        ##############
         if self.verbose:
             print("\n[{}] Evaluation started!".format(model.name))
 
         rating_metrics, ranking_metrics = self.organize_metrics(metrics)
         if len(rating_metrics) > 0:
-            warnings.warn("NextItemEvaluation only supports ranking metrics. The given rating metrics {} will be ignored!".format([mt.name for mt in rating_metrics]))
+            warnings.warn(
+                "SequentialEvaluation only supports ranking metrics. "
+                "The given rating metrics {} will be ignored!".format([mt.name for mt in rating_metrics])
+            )
 
         start = time.time()
         model.transform(self.test_set)
@@ -408,51 +350,12 @@ def from_splits(
         train_data,
         test_data,
         val_data=None,
-        fmt="SIT",
+        fmt="USIT",
         exclude_unknowns=False,
         seed=None,
         verbose=False,
         **kwargs,
     ):
-        """Constructing evaluation method given data.
-
-        Parameters
-        ----------
-        train_data: array-like
-            Training data
-
-        test_data: array-like
-            Test data
-
-        val_data: array-like, optional, default: None
-            Validation data
-
-        fmt: str, default: 'SIT'
-            Format of the input data. Currently, we are supporting:
-
-            'SIT': Session, Item, Timestamp
-            'USIT': User, Session, Item, Timestamp
-            'SITJson': Session, Item, Timestamp, Json
-            'USITJson': User, Session, Item, Timestamp, Json
-
-        rating_threshold: float, default: 1.0
-            Threshold to decide positive or negative preferences.
-
-        exclude_unknowns: bool, default: False
-            Whether to exclude unknown users/items in evaluation.
-
-        seed: int, optional, default: None
-            Random seed for reproduce the splitting.
-
-        verbose: bool, default: False
-            The verbosity flag.
-
-        Returns
-        -------
-        method: :obj:`<cornac.eval_methods.NextItemEvaluation>`
-            Evaluation method object.
-
-        """
         method = cls(
             fmt=fmt,
             exclude_unknowns=exclude_unknowns,
diff --git a/cornac/models/__init__.py b/cornac/models/__init__.py
index 643b25258..085eea298 100644
--- a/cornac/models/__init__.py
+++ b/cornac/models/__init__.py
@@ -15,7 +15,6 @@
 
 from .recommender import Recommender
 from .recommender import NextBasketRecommender
-from .recommender import NextItemRecommender
 from .recommender import SequentialRecommender
 
 from .amr import AMR
diff --git a/cornac/models/recommender.py b/cornac/models/recommender.py
index ab9dccefc..ea86e060d 100644
--- a/cornac/models/recommender.py
+++ b/cornac/models/recommender.py
@@ -709,80 +709,38 @@ def score(self, user_idx, history_baskets, **kwargs):
         raise NotImplementedError("The algorithm is not able to make score prediction!")
 
 
-class NextItemRecommender(Recommender):
-    """Generic class for a next item recommender model. All next item recommendation models should inherit from this class.
-
-    Parameters
-    ----------------
-    name: str, required
-        Name of the recommender model.
-
-    trainable: boolean, optional, default: True
-        When False, the model is not trainable.
-
-    verbose: boolean, optional, default: False
-        When True, running logs are displayed.
-
-    Attributes
-    ----------
-    num_users: int
-        Number of users in training data.
-
-    num_items: int
-        Number of items in training data.
-
-    total_users: int
-        Number of users in training, validation, and test data.
-        In other words, this includes unknown/unseen users.
-
-    total_items: int
-        Number of items in training, validation, and test data.
-        In other words, this includes unknown/unseen items.
-
-    uid_map: int
-        Global mapping of user ID-index.
-
-    iid_map: int
-        Global mapping of item ID-index.
-    """
-
-    def __init__(self, name, trainable=True, verbose=False):
-        super().__init__(name=name, trainable=trainable, verbose=verbose)
-
-    def score(self, user_idx, history_items, **kwargs):
-        """Predict the scores for all items based on input history items
-
-        Parameters
-        ----------
-        history_items: list of lists
-            The list of history items in sequential manner for next-item prediction.
+SUPPORTED_MODES = ("session-based", "session-aware")
+SUPPORTED_INPUT_FORMATS = ("flat", "hierarchical")
 
-        Returns
-        -------
-        res : a Numpy array
-            Relative scores of all known items
 
-        """
-        raise NotImplementedError("The algorithm is not able to make score prediction!")
+class SequentialRecommender(Recommender):
+    """Generic class for a sequential next-item recommender model.
 
+    Sequential recommenders are characterised by two orthogonal properties:
 
-SUPPORTED_MODES = ("session-based", "session-aware")
+    1. **The user's choice of evaluation/training mode** (``self.mode``):
 
+       - ``"session-based"``: only the most recent session is used as
+         context; the hidden state / history window resets at session
+         boundaries.
+       - ``"session-aware"``: the user's chronological cross-session item
+         sequence is used as context.
 
-class SequentialRecommender(NextItemRecommender):
-    """Generic class for a sequential next-item recommender model.
+    2. **The model's input format** (class attribute ``INPUT_FORMAT``):
 
-    Sequential recommenders consume a user's interaction history (either a
-    flat list of item ids or a list of session item lists) and operate in one
-    of two modes:
+       - ``"flat"``: the model consumes a flat sequence of item ids
+         (GRU4Rec, SASRec, BERT4Rec, GPT2Rec, FPMC).
+       - ``"hierarchical"``: the model consumes a nested ``(prior_sessions,
+         current_prefix, target)`` structure (HGRU4Rec, SHAN — none in the
+         current codebase but the dispatch is ready).
 
-    - ``"session-based"``: only the most recent session is used as context;
-      the hidden state / history window resets at session boundaries.
-    - ``"session-aware"``: the user's chronological cross-session item
-      sequence is used as context.
+    The base class wires ``mode`` + ``INPUT_FORMAT`` to the right iterator
+    via :meth:`_build_data_iter`. Subclasses normally don't need to override
+    that method; they just set ``INPUT_FORMAT`` and pick up the right
+    iterator automatically.
 
-    Subclasses should set the desired default in their own ``__init__`` and
-    forward ``mode`` to ``super().__init__``.
+    Subclasses should set their default ``mode`` in their own ``__init__``
+    and forward ``mode`` to ``super().__init__``.
 
     Parameters
     ----------
@@ -799,23 +757,46 @@ class SequentialRecommender(NextItemRecommender):
         When True, running logs are displayed.
     """
 
+    INPUT_FORMAT = "flat"
+
     def __init__(self, name, mode="session-based", trainable=True, verbose=False):
         if mode not in SUPPORTED_MODES:
             raise ValueError(
                 f"mode='{mode}' not supported; choose from {SUPPORTED_MODES}"
             )
+        if self.INPUT_FORMAT not in SUPPORTED_INPUT_FORMATS:
+            raise ValueError(
+                f"{type(self).__name__}.INPUT_FORMAT='{self.INPUT_FORMAT}' "
+                f"not supported; choose from {SUPPORTED_INPUT_FORMATS}"
+            )
         super().__init__(name=name, trainable=trainable, verbose=verbose)
         self.mode = mode
 
+    def fit(self, train_set, val_set=None):
+        """Validate data/mode compatibility before subclass training.
+
+        ``mode="session-aware"`` requires per-user identity (USIT-like
+        formats). Pure SIT datasets collapse every row to a single
+        anonymous user, which makes session-aware iteration meaningless.
+        We catch this early with a clear error rather than producing a
+        silently broken iterator.
+        """
+        if self.mode == "session-aware" and getattr(train_set, "num_users", 0) <= 1:
+            raise ValueError(
+                f"{type(self).__name__}: mode='session-aware' requires a "
+                f"dataset with user identity (e.g. fmt='USIT'); "
+                f"train_set.num_users={getattr(train_set, 'num_users', 'N/A')}."
+            )
+        return super().fit(train_set, val_set)
+
     def _flatten_history(self, history_items):
         """Coerce ``history_items`` into a flat list of item ids.
 
         Accepts both:
 
-        - ``[i0, i1, i2, ...]`` (flat list, as produced by
-          :class:`cornac.eval_methods.NextItemEvaluation`).
+        - ``[i0, i1, i2, ...]`` (flat list of item ids).
         - ``[[i0, i1], [i2, i3], ...]`` (list of session item lists, as
-          produced by the session-aware ``NextSessionEvaluation`` flow).
+          produced by :class:`cornac.eval_methods.SequentialEvaluation`).
 
         For ``mode == "session-based"`` the list of sessions is collapsed to
         just the items of the *last non-empty* session. For
@@ -836,3 +817,58 @@ def _flatten_history(self, history_items):
                 flat.extend(list(sess))
             return flat
         return list(history_items)
+
+    def _val_score(self, val_set, metric="recall", k=20, mode="last"):
+        """Compute a session-aware ranking metric on ``val_set`` for
+        best-on-val selection during training.
+
+        Iterates val_set via the same logic as
+        :func:`cornac.eval_methods.sequential_evaluation.ranking_eval`
+        so the val score matches the eventual test protocol.
+
+        Parameters
+        ----------
+        metric : str
+            One of ``"recall"``, ``"ndcg"``, ``"auc"``, ``"mrr"``
+            (case-insensitive). ``k`` is ignored for ``auc`` and ``mrr``.
+
+        Returns ``None`` if ``val_set`` is ``None``.
+        """
+        if val_set is None:
+            return None
+        from ..eval_methods.sequential_evaluation import ranking_eval
+        from ..metrics import AUC, MRR, NDCG, Recall
+
+        name = metric.lower()
+        if name == "recall":
+            m = Recall(k=k)
+        elif name == "ndcg":
+            m = NDCG(k=k)
+        elif name == "auc":
+            m = AUC()
+        elif name == "mrr":
+            m = MRR()
+        else:
+            raise ValueError(
+                f"val_metric='{metric}' not supported; "
+                f"choose from recall/ndcg/auc/mrr"
+            )
+
+        inner = getattr(self, "model", None)
+        was_training = bool(getattr(inner, "training", False))
+        if inner is not None and hasattr(inner, "eval"):
+            inner.eval()
+        try:
+            avg, _ = ranking_eval(
+                model=self,
+                metrics=[m],
+                train_set=self.train_set,
+                test_set=val_set,
+                mode=mode,
+                exclude_unknowns=True,
+                verbose=False,
+            )
+        finally:
+            if inner is not None and was_training and hasattr(inner, "train"):
+                inner.train()
+        return float(avg[0]) if avg else 0.0
diff --git a/cornac/models/seq_utils/__init__.py b/cornac/models/seq_utils/__init__.py
index 67a9be9a6..ba9a3945f 100644
--- a/cornac/models/seq_utils/__init__.py
+++ b/cornac/models/seq_utils/__init__.py
@@ -50,7 +50,12 @@ def fit(self, train_set, val_set=None):
   ``session_seq_iter``       ``(in_uids, hist_iids, out_iids)`` — sequence
                                models, session-based.
   ``user_seq_iter``          ``(in_uids, hist_iids, out_iids)`` — sequence
-                               models, session-aware.
+                               models, session-aware (flat history, skips
+                               cross-session target tuples).
+  ``user_hier_seq_iter``     ``(uid, prior_sessions, current_prefix, target)``
+                               — per-row hierarchical iterator for future
+                               session-aware models (HGRU4Rec, SHAN). No
+                               current consumer.
   =========================  =================================================
 """
 
@@ -59,6 +64,7 @@ def fit(self, train_set, val_set=None):
     user_io_iter,
     session_seq_iter,
     user_seq_iter,
+    user_hier_seq_iter,
 )
 
 __all__ = [
@@ -66,4 +72,5 @@ def fit(self, train_set, val_set=None):
     "user_io_iter",
     "session_seq_iter",
     "user_seq_iter",
+    "user_hier_seq_iter",
 ]
diff --git a/cornac/models/seq_utils/iterators.py b/cornac/models/seq_utils/iterators.py
index 73c4f4dfc..918603e3d 100644
--- a/cornac/models/seq_utils/iterators.py
+++ b/cornac/models/seq_utils/iterators.py
@@ -146,9 +146,15 @@ def _user_of(mapped_ids):
 def _user_chrono_sequences(train_set):
     """Build per-user chronological item sequences across sessions.
 
-    Returns a list of (user_idx, item_sequence) tuples. The order within a
-    user follows session order (by first-timestamp in session, if available)
-    and then in-session interaction order.
+    Returns a list of ``(user_idx, item_sequence, session_starts)`` tuples.
+    The order within a user follows session order (by first-timestamp in
+    session, if available) and then in-session interaction order.
+
+    ``session_starts`` is a sorted list of indices into ``item_sequence``
+    marking the first item of each session. The first entry is always 0.
+    For example, sessions ``[[a,b,c],[d,e,f]]`` produce
+    ``items=[a,b,c,d,e,f]`` and ``session_starts=[0, 3]``. Targets at these
+    indices (except 0) are cross-session boundary predictions.
     """
     uir_tuple = train_set.uir_tuple
     sessions = train_set.sessions
@@ -162,10 +168,12 @@ def _user_chrono_sequences(train_set):
         else:
             order = list(sids)
         items = []
+        session_starts = []
         for sid in order:
+            session_starts.append(len(items))
             items.extend(int(i) for i in uir_tuple[1][sessions[sid]])
         if len(items) > 1:
-            sequences.append((int(uid), items))
+            sequences.append((int(uid), items, session_starts))
     return sequences
 
 
@@ -176,6 +184,13 @@ def user_io_iter(train_set, n_sample=0, sample_alpha=0, rng=None, batch_size=1,
     item sequences (concatenated across sessions) instead of single sessions.
     The hidden state is therefore reset at *user* boundaries rather than
     *session* boundaries.
+
+    Note
+    ----
+    Unlike :func:`user_seq_iter`, this iterator does NOT mask cross-session
+    target transitions. Per-row loss masking would require changes to every
+    loss function in :mod:`losses`; for now GRU4Rec session-aware training
+    accepts the cross-session targets as a small amount of label noise.
     """
     rng = rng if rng is not None else get_rng(None)
     sequences = _user_chrono_sequences(train_set)
@@ -196,7 +211,7 @@ def _refill():
         nonlocal end_mask
         while end_mask.sum() > 0:
             try:
-                uid, seq = next(pool_iter)
+                uid, seq, _session_starts = next(pool_iter)
             except StopIteration:
                 return False
             if len(seq) <= 1:
@@ -338,12 +353,19 @@ def user_seq_iter(
     sample_alpha=0.5,
     rng=None,
     shuffle=True,
+    skip_cross_session_targets=True,
 ):
     """Session-aware sequence iterator for transformer/seq models.
 
     Iterates over users; for each user constructs the chronological
     cross-session item sequence and yields per-step ``(uid, hist[max_len],
     target)`` triples. The history therefore spans across sessions.
+
+    When ``skip_cross_session_targets`` is True (default), training tuples
+    whose target is the first item of a new session are dropped so the loss
+    is not penalized for predicting items across session boundaries. The
+    cross-session items are still available as *history* in subsequent
+    within-session steps.
     """
     rng = rng if rng is not None else get_rng(None)
     sequences = _user_chrono_sequences(train_set)
@@ -354,10 +376,15 @@ def user_seq_iter(
         item_indices, item_dist = _build_neg_sampler(uir_tuple, sample_alpha)
 
     buffer_uids, buffer_hist, buffer_target = [], [], []
-    for uid, items in sequences:
+    for uid, items, session_starts in sequences:
         if len(items) < 2:
             continue
+        boundary_targets = (
+            set(session_starts[1:]) if skip_cross_session_targets else set()
+        )
         for t in range(1, len(items)):
+            if t in boundary_targets:
+                continue
             hist = items[:t][-max_len:]
             hist = [pad_index] * (max_len - len(hist)) + list(hist)
             buffer_uids.append(uid)
@@ -388,3 +415,66 @@ def user_seq_iter(
             np.array(buffer_hist, dtype="int"),
             out_iids,
         )
+
+
+def user_hier_seq_iter(
+    train_set,
+    rng=None,
+    shuffle=True,
+):
+    """Session-aware *hierarchical* iterator for future HGRU/SHAN-style models.
+
+    For every within-session step ``t`` of session ``S_k`` (k >= 0, t >= 1)
+    yields one row:
+
+        ``(uid, prior_sessions, current_prefix, target)``
+
+    where:
+
+    - ``uid`` (int): user id.
+    - ``prior_sessions`` (``list[list[int]]``): the user's earlier sessions
+      (``S_0, ..., S_{k-1}``) in chronological order, each as a list of item
+      ids. Empty list for the user's first session.
+    - ``current_prefix`` (``list[int]``): ``S_k[:t]`` — the in-progress
+      session up to (but not including) the target step.
+    - ``target`` (int): ``S_k[t]`` — the held-out next item.
+
+    Padding and batching are intentionally left to the consuming model so
+    each architecture can pick its own ``max_history_sessions`` /
+    ``max_session_len`` budgets.
+
+    This iterator has no consumer in the current codebase; it exists to
+    complete the dispatch contract for ``mode="session-aware"`` +
+    ``INPUT_FORMAT="hierarchical"`` so that adding HGRU4Rec/SHAN later does
+    not require touching the base class.
+    """
+    rng = rng if rng is not None else get_rng(None)
+    uir_tuple = train_set.uir_tuple
+    sessions = train_set.sessions
+    user_sessions = train_set.user_session_data
+    timestamps = train_set.timestamps
+
+    user_ids = list(user_sessions.keys())
+    if shuffle:
+        rng.shuffle(user_ids)
+
+    for uid in user_ids:
+        sids = user_sessions[uid]
+        if timestamps is not None:
+            order = sorted(sids, key=lambda sid: timestamps[sessions[sid][0]])
+        else:
+            order = list(sids)
+        per_session_items = [
+            [int(i) for i in uir_tuple[1][sessions[sid]]] for sid in order
+        ]
+        prior = []
+        for sess_items in per_session_items:
+            if len(sess_items) >= 2:
+                for t in range(1, len(sess_items)):
+                    yield (
+                        int(uid),
+                        [list(s) for s in prior],
+                        list(sess_items[:t]),
+                        int(sess_items[t]),
+                    )
+            prior.append(sess_items)
diff --git a/cornac/models/spop/recom_spop.py b/cornac/models/spop/recom_spop.py
index 1910c6073..558cbc130 100644
--- a/cornac/models/spop/recom_spop.py
+++ b/cornac/models/spop/recom_spop.py
@@ -17,10 +17,10 @@
 
 import numpy as np
 
-from ..recommender import NextItemRecommender
+from ..recommender import SequentialRecommender
 
 
-class SPop(NextItemRecommender):
+class SPop(SequentialRecommender):
     """Recommend most popular items of the current session.
 
     Parameters
@@ -28,8 +28,15 @@ class SPop(NextItemRecommender):
     name: string, default: 'SPop'
         The name of the recommender model.
 
+    mode: str, optional, default: 'session-based'
+        One of 'session-based' or 'session-aware'. SPop's popularity logic
+        is the same in both; the only difference is how
+        :meth:`_flatten_history` collapses the nested ``history_items``
+        coming from :class:`cornac.eval_methods.SequentialEvaluation`.
+
     use_session_popularity: boolean, optional, default: True
-        When False, no item frequency from history items in current session are being used.
+        When False, no item frequency from history items in current session
+        are used; only the global training popularity is returned.
 
     References
     ----------
@@ -37,8 +44,8 @@ class SPop(NextItemRecommender):
     Session-based Recommendations with Recurrent Neural Networks, ICLR 2016
     """
 
-    def __init__(self, name="SPop", use_session_popularity=True):
-        super().__init__(name=name, trainable=False)
+    def __init__(self, name="SPop", mode="session-based", use_session_popularity=True):
+        super().__init__(name=name, mode=mode, trainable=False)
         self.use_session_popularity = use_session_popularity
         self.item_freq = Counter()
 
@@ -53,7 +60,8 @@ def score(self, user_idx, history_items, **kwargs):
         for iid, freq in self.item_freq.items():
             item_scores[iid] = freq / max_item_freq
         if self.use_session_popularity:
-            s_item_freq = Counter([iid for iid in history_items])
+            flat_history = self._flatten_history(history_items)
+            s_item_freq = Counter(flat_history)
             for iid, cnt in s_item_freq.most_common():
                 item_scores[iid] += cnt
         return item_scores
diff --git a/examples/gru4rec_yoochoose.py b/examples/gru4rec_yoochoose.py
index 1d8b6f261..307e4155b 100644
--- a/examples/gru4rec_yoochoose.py
+++ b/examples/gru4rec_yoochoose.py
@@ -17,7 +17,7 @@
 import cornac
 from cornac.data import Reader
 from cornac.datasets import yoochoose
-from cornac.eval_methods import NextItemEvaluation
+from cornac.eval_methods import SequentialEvaluation
 from cornac.metrics import MRR, NDCG, Recall
 from cornac.models import GRU4Rec, SPop
 
@@ -29,7 +29,7 @@
 test_data = yoochoose.load_test(reader=Reader(min_sequence_size=2, item_set=item_set))
 print("test data loaded")
 
-next_item_eval = NextItemEvaluation.from_splits(
+eval_method = SequentialEvaluation.from_splits(
     train_data=buy_data,
     test_data=test_data[:10000],  # illustration purpose only, subset of test data for faster experiment
     exclude_unknowns=True,
@@ -63,7 +63,7 @@
 ]
 
 cornac.Experiment(
-    eval_method=next_item_eval,
+    eval_method=eval_method,
     models=models,
     metrics=metrics,
 ).run()
diff --git a/examples/spop_yoochoose.py b/examples/spop_yoochoose.py
index eb4f785fd..2a4c77176 100644
--- a/examples/spop_yoochoose.py
+++ b/examples/spop_yoochoose.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-"""Example of a next-item recommendation model based on item popularity"""
+"""Example of a session-based recommendation model based on item popularity"""
 
 import cornac
 from cornac.datasets import yoochoose
-from cornac.eval_methods import NextItemEvaluation
+from cornac.eval_methods import SequentialEvaluation
 from cornac.metrics import MRR, NDCG, Recall
 from cornac.models import SPop
 
@@ -25,7 +25,7 @@
 test_data = yoochoose.load_test()
 print("test data loaded")
 
-next_item_eval = NextItemEvaluation.from_splits(
+eval_method = SequentialEvaluation.from_splits(
     train_data=buy_data,
     test_data=test_data[:10000],  # illustration purpose only, subset of test data for faster experiment
     verbose=True,
@@ -46,7 +46,7 @@
 ]
 
 cornac.Experiment(
-    eval_method=next_item_eval,
+    eval_method=eval_method,
     models=models,
     metrics=metrics,
 ).run()
diff --git a/tests/cornac/eval_methods/test_next_item_evaluation.py b/tests/cornac/eval_methods/test_next_item_evaluation.py
deleted file mode 100644
index 31ceb8d9b..000000000
--- a/tests/cornac/eval_methods/test_next_item_evaluation.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright 2023 The Cornac Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-import unittest
-
-from cornac.eval_methods import NextItemEvaluation
-from cornac.data import Reader
-from cornac.models import SPop
-from cornac.metrics import HitRatio, Recall
-
-
-class TestNextItemEvaluation(unittest.TestCase):
-    def setUp(self):
-        self.data = Reader().read("./tests/sequence.txt", fmt="USIT", sep=" ")
-
-    def test_from_splits(self):
-        next_item_eval = NextItemEvaluation.from_splits(train_data=self.data[:50], test_data=self.data[50:], fmt="USIT")
-
-        self.assertTrue(next_item_eval.train_set != None)
-        self.assertTrue(next_item_eval.test_set != None)
-        self.assertTrue(next_item_eval.val_set == None)
-        self.assertTrue(next_item_eval.total_sessions == 16)
-
-    def test_evaluate(self):
-        next_item_eval = NextItemEvaluation.from_splits(train_data=self.data[:50], test_data=self.data[50:], fmt="USIT")
-        result = next_item_eval.evaluate(
-            SPop(), [HitRatio(k=2), Recall(k=2)], user_based=False
-        )
-        self.assertEqual(result[0].metric_avg_results.get('HitRatio@2'), 0)
-        self.assertEqual(result[0].metric_avg_results.get('Recall@2'), 0)
-
-        next_item_eval = NextItemEvaluation.from_splits(train_data=self.data[:50], test_data=self.data[50:], fmt="USIT")
-        result = next_item_eval.evaluate(
-            SPop(), [HitRatio(k=5), Recall(k=5)], user_based=True
-        )
-        self.assertEqual(result[0].metric_avg_results.get('HitRatio@5'), 2/3)
-        self.assertEqual(result[0].metric_avg_results.get('Recall@5'), 2/3)
-
-        next_item_eval = NextItemEvaluation.from_splits(train_data=self.data[:50], test_data=self.data[50:], fmt="USIT", mode="next")
-        result = next_item_eval.evaluate(
-            SPop(), [HitRatio(k=2), Recall(k=2)], user_based=False
-        )
-
-        self.assertEqual(result[0].metric_avg_results.get('HitRatio@2'), 1/8)
-        self.assertEqual(result[0].metric_avg_results.get('Recall@2'), 1/8)
-
-        next_item_eval = NextItemEvaluation.from_splits(train_data=self.data[:50], test_data=self.data[50:], fmt="USIT", mode="next")
-        result = next_item_eval.evaluate(
-            SPop(), [HitRatio(k=5), Recall(k=5)], user_based=True
-        )
-        self.assertEqual(result[0].metric_avg_results.get('HitRatio@5'), 3/4)
-        self.assertEqual(result[0].metric_avg_results.get('Recall@5'), 3/4)
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/cornac/eval_methods/test_sequential_evaluation.py b/tests/cornac/eval_methods/test_sequential_evaluation.py
new file mode 100644
index 000000000..511a3aad0
--- /dev/null
+++ b/tests/cornac/eval_methods/test_sequential_evaluation.py
@@ -0,0 +1,89 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import unittest
+
+from cornac.data import Reader
+from cornac.eval_methods import SequentialEvaluation
+from cornac.metrics import MRR, NDCG, Recall
+from cornac.models import SPop
+
+
+class TestSequentialEvaluation(unittest.TestCase):
+    def setUp(self):
+        self.data = Reader().read("./tests/sequence.txt", fmt="USIT", sep=" ")
+
+    def test_from_splits(self):
+        eval_method = SequentialEvaluation.from_splits(
+            train_data=self.data[:50], test_data=self.data[50:], fmt="USIT"
+        )
+        self.assertIsNotNone(eval_method.train_set)
+        self.assertIsNotNone(eval_method.test_set)
+        self.assertIsNone(eval_method.val_set)
+        self.assertEqual(
+            eval_method.total_sessions,
+            eval_method.train_set.num_sessions + eval_method.test_set.num_sessions,
+        )
+
+    def test_invalid_mode_raises(self):
+        with self.assertRaises(ValueError):
+            SequentialEvaluation.from_splits(
+                train_data=self.data[:50],
+                test_data=self.data[50:],
+                fmt="USIT",
+                mode="bogus",
+            )
+
+    def _run_with_mode(self, mode):
+        eval_method = SequentialEvaluation.from_splits(
+            train_data=self.data[:50],
+            test_data=self.data[50:],
+            fmt="USIT",
+            mode=mode,
+        )
+        test_result, _ = eval_method.evaluate(
+            SPop(),
+            [Recall(k=5), NDCG(k=5), MRR()],
+            user_based=True,
+        )
+        return test_result.metric_avg_results
+
+    def test_evaluate_mode_last(self):
+        results = self._run_with_mode("last")
+        for name, value in results.items():
+            if name in ("Train (s)", "Test (s)"):
+                continue
+            self.assertGreaterEqual(value, 0.0)
+            self.assertLessEqual(value, 1.0)
+
+    def test_evaluate_mode_first(self):
+        results = self._run_with_mode("first")
+        for name, value in results.items():
+            if name in ("Train (s)", "Test (s)"):
+                continue
+            self.assertGreaterEqual(value, 0.0)
+            self.assertLessEqual(value, 1.0)
+
+    def test_evaluate_mode_any(self):
+        results = self._run_with_mode("any")
+        for name, value in results.items():
+            if name in ("Train (s)", "Test (s)"):
+                continue
+            self.assertGreaterEqual(value, 0.0)
+            self.assertLessEqual(value, 1.0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/cornac/models/test_recommender.py b/tests/cornac/models/test_recommender.py
index 2006064a0..2b40c2321 100644
--- a/tests/cornac/models/test_recommender.py
+++ b/tests/cornac/models/test_recommender.py
@@ -16,9 +16,10 @@
 import unittest
 
 import numpy as np
+import numpy.testing as npt
 
-from cornac.data import BasketDataset, Dataset, SequentialDataset, Reader
-from cornac.models import MF, GPTop, SPop, NextBasketRecommender, NextItemRecommender
+from cornac.data import BasketDataset, Dataset, Reader, SequentialDataset
+from cornac.models import MF, GPTop, NextBasketRecommender, SequentialRecommender, SPop
 
 
 class TestRecommender(unittest.TestCase):
@@ -71,26 +72,26 @@ def test_fit(self):
         model.rank(0, history_baskets=[[]])
 
 
-class TestNextItemRecommender(unittest.TestCase):
+class TestSequentialRecommender(unittest.TestCase):
     def setUp(self):
         self.data = Reader().read("./tests/sequence.txt", fmt="USIT", sep=" ")
 
     def test_init(self):
-        model = NextItemRecommender("test")
+        model = SequentialRecommender("test")
         self.assertTrue(model.name == "test")
 
     def test_fit(self):
         dataset = SequentialDataset.from_usit(self.data)
-        model = NextItemRecommender("")
+        model = SequentialRecommender("")
         model.fit(dataset)
         model = SPop()
         model.fit(dataset)
         model.score(0, [])
         result = model.rank(0, history_items=[])
-        self.assertTrue((result[0][0:3] == [3, 2, 4]).all())
-        self.assertTrue((np.sort(result[0][3:5]) == [0, 1]).all()) # identical scores, sorting may affect the ordering
-        self.assertTrue((result[0][5:6] == [5]).all())
-        self.assertTrue((np.sort(result[0][6:9]) == [6, 7, 8]).all()) # identical scores, sorting may affect the ordering
+        npt.assert_array_equal(result[0][0:3], [3, 2, 4])
+        npt.assert_array_equal(np.sort(result[0][3:5]), [0, 1])  # identical scores, sorting may affect the ordering
+        npt.assert_array_equal(result[0][5:6], [5])
+        npt.assert_array_equal(np.sort(result[0][6:9]), [6, 7, 8])  # identical scores, sorting may affect the ordering
 
 
 if __name__ == "__main__":

From e91947ce46d1634071d0e9af9b4d69d0e56f5e7c Mon Sep 17 00:00:00 2001
From: hieuddo <hieu.dd.1998@gmail.com>
Date: Fri, 29 May 2026 22:39:59 +0800
Subject: [PATCH 04/10] add alias for SBR and SAR

---
 cornac/models/recommender.py            | 6 +++++-
 tests/cornac/models/test_recommender.py | 8 ++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/cornac/models/recommender.py b/cornac/models/recommender.py
index ea86e060d..75555f881 100644
--- a/cornac/models/recommender.py
+++ b/cornac/models/recommender.py
@@ -760,9 +760,13 @@ class SequentialRecommender(Recommender):
     INPUT_FORMAT = "flat"
 
     def __init__(self, name, mode="session-based", trainable=True, verbose=False):
+        mode = {"sbr": "session-based", "sar": "session-aware"}.get(
+            mode.lower() if isinstance(mode, str) else mode, mode
+        )
         if mode not in SUPPORTED_MODES:
             raise ValueError(
-                f"mode='{mode}' not supported; choose from {SUPPORTED_MODES}"
+                f"mode='{mode}' not supported; choose from {SUPPORTED_MODES} "
+                f"(aliases 'sbr'/'sar' also accepted, case-insensitive)"
             )
         if self.INPUT_FORMAT not in SUPPORTED_INPUT_FORMATS:
             raise ValueError(
diff --git a/tests/cornac/models/test_recommender.py b/tests/cornac/models/test_recommender.py
index 2b40c2321..45448b81c 100644
--- a/tests/cornac/models/test_recommender.py
+++ b/tests/cornac/models/test_recommender.py
@@ -93,6 +93,14 @@ def test_fit(self):
         npt.assert_array_equal(result[0][5:6], [5])
         npt.assert_array_equal(np.sort(result[0][6:9]), [6, 7, 8])  # identical scores, sorting may affect the ordering
 
+    def test_mode_aliases(self):
+        for alias in ("sbr", "SBR", "Sbr", "session-based"):
+            self.assertEqual(SequentialRecommender("t", mode=alias).mode, "session-based")
+        for alias in ("sar", "SAR", "Sar", "session-aware"):
+            self.assertEqual(SequentialRecommender("t", mode=alias).mode, "session-aware")
+        with self.assertRaises(ValueError):
+            SequentialRecommender("t", mode="bogus")
+
 
 if __name__ == "__main__":
     unittest.main()

From 28006afbaed939406e43ef007b14b70d370d1e7a Mon Sep 17 00:00:00 2001
From: hieuddo <hieu.dd.1998@gmail.com>
Date: Fri, 29 May 2026 22:40:13 +0800
Subject: [PATCH 05/10] add example for sequential models

---
 examples/sequential_diginetica.py | 98 +++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100644 examples/sequential_diginetica.py

diff --git a/examples/sequential_diginetica.py b/examples/sequential_diginetica.py
new file mode 100644
index 000000000..5c7dc77f9
--- /dev/null
+++ b/examples/sequential_diginetica.py
@@ -0,0 +1,98 @@
+# Copyright 2026 The Cornac Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Sequential recommendation on Diginetica with GRU4Rec / SASRec / BERT4Rec / GPT2Rec.
+
+Each model is trained twice:
+
+- ``mode="sbr"``  (session-based: only the current session is context)
+- ``mode="sar"``  (session-aware:  the user's cross-session history is context)
+
+``"sbr"`` and ``"sar"`` are short-form aliases for ``"session-based"`` and
+``"session-aware"`` and are matched case-insensitively.
+
+Runs a short 10-epoch schedule so the example fits in a reasonable time;
+bump ``N_EPOCHS`` for real comparisons.
+
+Note: GRU4Rec converges slower than Transformers-based models.
+"""
+
+import cornac
+from cornac.data import Reader
+from cornac.datasets import diginetica
+from cornac.eval_methods import SequentialEvaluation
+from cornac.metrics import AUC, MRR, NDCG, Recall
+from cornac.models import BERT4Rec, GPT2Rec, GRU4Rec, SASRec
+
+N_EPOCHS = 10
+DEVICE = "cuda"  # set to "cpu" if no GPU is available
+SEED = 123
+
+reader = Reader(min_sequence_size=2)
+train_data = diginetica.load_train(reader=reader)
+val_data = diginetica.load_val(reader=reader)
+test_data = diginetica.load_test(reader=reader)
+
+eval_method = SequentialEvaluation.from_splits(
+    train_data=train_data,
+    val_data=val_data,
+    test_data=test_data,
+    fmt="USIT",
+    exclude_unknowns=True,
+    verbose=True,
+    mode="last",
+)
+
+shared = dict(
+    embedding_dim=64,
+    batch_size=256,
+    n_sample=512,
+    n_epochs=N_EPOCHS,
+    max_len=20,
+    num_blocks=2,
+    num_heads=2,
+    device=DEVICE,
+    verbose=True,
+    seed=SEED,
+)
+
+models = []
+for setting in ("sbr", "sar"):
+    models += [
+        GRU4Rec(
+            name=f"GRU4Rec-{setting}",
+            layers=[64],
+            loss="cross-entropy",
+            mode=setting,
+            batch_size=shared["batch_size"],
+            n_sample=shared["n_sample"],
+            sample_alpha=0.75,
+            dropout_p_hidden=0.3,
+            n_epochs=N_EPOCHS,
+            device=DEVICE,
+            verbose=True,
+            seed=SEED,
+        ),
+        SASRec(name=f"SASRec-{setting}", mode=setting, **shared),
+        BERT4Rec(name=f"BERT4Rec-{setting}", mode=setting, **shared),
+        GPT2Rec(name=f"GPT2Rec-{setting}", mode=setting, **shared),
+    ]
+
+metrics = [AUC(), MRR(), NDCG(k=10), NDCG(k=50), Recall(k=10), Recall(k=50)]
+
+cornac.Experiment(
+    eval_method=eval_method,
+    models=models,
+    metrics=metrics,
+).run()

From 05831f8a2c17b137e05cec660c9b141fdd709d26 Mon Sep 17 00:00:00 2001
From: hieuddo <hieu.dd.1998@gmail.com>
Date: Fri, 29 May 2026 23:04:03 +0800
Subject: [PATCH 06/10] model_selection for FPMC

---
 cornac/models/fpmc/recom_fpmc.py | 50 +++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/cornac/models/fpmc/recom_fpmc.py b/cornac/models/fpmc/recom_fpmc.py
index a40a3682c..6f3e0830f 100644
--- a/cornac/models/fpmc/recom_fpmc.py
+++ b/cornac/models/fpmc/recom_fpmc.py
@@ -62,6 +62,17 @@ class FPMC(SequentialRecommender):
     momentum: float, optional, default: 0.0
         Momentum for the IndexedAdagradM optimizer.
 
+    model_selection: str, optional, default: 'last'
+        One of 'last' or 'best'. When 'best', the model with the highest
+        validation score (evaluated every ``val_eval_every`` epochs) is
+        restored at the end of ``fit``.
+
+    val_eval_every: int, optional, default: 5
+    val_k: int, optional, default: 20
+    val_metric: str, optional, default: 'recall'
+        Cutoff and metric used for best-on-val selection. See
+        :meth:`SequentialRecommender._val_score`.
+
     References
     ----------
     Rendle, S., Freudenthaler, C., & Schmidt-Thieme, L. (2010).
@@ -86,6 +97,10 @@ def __init__(
         trainable=True,
         verbose=False,
         seed=None,
+        model_selection="last",
+        val_eval_every=5,
+        val_k=20,
+        val_metric="recall",
     ):
         super().__init__(name, mode=mode, trainable=trainable, verbose=verbose)
         if loss not in SUPPORTED_LOSSES:
@@ -105,6 +120,14 @@ def __init__(
         self.device = device
         self.seed = seed
         self.rng = get_rng(seed)
+        if model_selection not in ("last", "best"):
+            raise ValueError(
+                f"model_selection='{model_selection}' not supported; choose 'last' or 'best'"
+            )
+        self.model_selection = model_selection
+        self.val_eval_every = val_eval_every
+        self.val_k = val_k
+        self.val_metric = val_metric
 
     def _build_data_iter(self, pad_index):
         # FPMC only uses the immediately previous item, so set max_len = 1.
@@ -153,8 +176,10 @@ def fit(self, train_set, val_set=None):
             self.model.parameters(), lr=self.learning_rate, momentum=self.momentum
         )
 
+        best_val = -float("inf")
+        best_state = None
         progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose)
-        for _ in progress_bar:
+        for epoch_id in progress_bar:
             self.model.train()
             total_loss = 0.0
             cnt = 0
@@ -193,6 +218,29 @@ def fit(self, train_set, val_set=None):
                 cnt += len(in_uids)
                 if inc % 10 == 0 and cnt > 0:
                     progress_bar.set_postfix(loss=(total_loss / cnt))
+
+            if (
+                self.model_selection == "best"
+                and val_set is not None
+                and epoch_id % self.val_eval_every == 0
+            ):
+                val_score = self._val_score(
+                    val_set, metric=self.val_metric, k=self.val_k
+                )
+                if val_score is not None and val_score > best_val:
+                    best_val = val_score
+                    best_state = {
+                        n: p.detach().clone() for n, p in self.model.state_dict().items()
+                    }
+                if self.verbose:
+                    progress_bar.set_postfix(
+                        loss=(total_loss / max(cnt, 1)),
+                        val_recall=val_score,
+                        best=best_val,
+                    )
+
+        if self.model_selection == "best" and best_state is not None:
+            self.model.load_state_dict(best_state)
         return self
 
     def score(self, user_idx, history_items, **kwargs):

From 24b30df0721da1a099cbc6856a5dc5a1d2578aac Mon Sep 17 00:00:00 2001
From: hieuddo <hieu.dd.1998@gmail.com>
Date: Fri, 29 May 2026 23:04:21 +0800
Subject: [PATCH 07/10] add requirements.txt for sequential models

---
 cornac/models/bert4rec/requirements.txt | 2 ++
 cornac/models/fpmc/requirements.txt     | 1 +
 cornac/models/gpt2rec/requirements.txt  | 2 ++
 cornac/models/sasrec/requirements.txt   | 1 +
 4 files changed, 6 insertions(+)
 create mode 100644 cornac/models/bert4rec/requirements.txt
 create mode 100644 cornac/models/fpmc/requirements.txt
 create mode 100644 cornac/models/gpt2rec/requirements.txt
 create mode 100644 cornac/models/sasrec/requirements.txt

diff --git a/cornac/models/bert4rec/requirements.txt b/cornac/models/bert4rec/requirements.txt
new file mode 100644
index 000000000..a808c3f6f
--- /dev/null
+++ b/cornac/models/bert4rec/requirements.txt
@@ -0,0 +1,2 @@
+torch>=1.12.0
+transformers>=4.30.0
diff --git a/cornac/models/fpmc/requirements.txt b/cornac/models/fpmc/requirements.txt
new file mode 100644
index 000000000..be222b022
--- /dev/null
+++ b/cornac/models/fpmc/requirements.txt
@@ -0,0 +1 @@
+torch>=1.12.0
diff --git a/cornac/models/gpt2rec/requirements.txt b/cornac/models/gpt2rec/requirements.txt
new file mode 100644
index 000000000..a808c3f6f
--- /dev/null
+++ b/cornac/models/gpt2rec/requirements.txt
@@ -0,0 +1,2 @@
+torch>=1.12.0
+transformers>=4.30.0
diff --git a/cornac/models/sasrec/requirements.txt b/cornac/models/sasrec/requirements.txt
new file mode 100644
index 000000000..be222b022
--- /dev/null
+++ b/cornac/models/sasrec/requirements.txt
@@ -0,0 +1 @@
+torch>=1.12.0

From d808b5ba2223e602e1acc3bb288964d5309537f8 Mon Sep 17 00:00:00 2001
From: hieuddo <hieu.dd.1998@gmail.com>
Date: Fri, 29 May 2026 23:07:30 +0800
Subject: [PATCH 08/10] add FPMC to example

---
 examples/sequential_diginetica.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/examples/sequential_diginetica.py b/examples/sequential_diginetica.py
index 5c7dc77f9..6a63d4706 100644
--- a/examples/sequential_diginetica.py
+++ b/examples/sequential_diginetica.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-"""Sequential recommendation on Diginetica with GRU4Rec / SASRec / BERT4Rec / GPT2Rec.
+"""Sequential recommendation on Diginetica with FPMC / GRU4Rec / SASRec / BERT4Rec / GPT2Rec.
 
 Each model is trained twice:
 
@@ -33,7 +33,7 @@
 from cornac.datasets import diginetica
 from cornac.eval_methods import SequentialEvaluation
 from cornac.metrics import AUC, MRR, NDCG, Recall
-from cornac.models import BERT4Rec, GPT2Rec, GRU4Rec, SASRec
+from cornac.models import FPMC, BERT4Rec, GPT2Rec, GRU4Rec, SASRec
 
 N_EPOCHS = 10
 DEVICE = "cuda"  # set to "cpu" if no GPU is available
@@ -70,6 +70,18 @@
 models = []
 for setting in ("sbr", "sar"):
     models += [
+        FPMC(
+            name=f"FPMC-{setting}",
+            embedding_dim=shared["embedding_dim"],
+            loss="bpr",
+            mode=setting,
+            batch_size=shared["batch_size"],
+            n_sample=shared["n_sample"],
+            n_epochs=N_EPOCHS,
+            device=DEVICE,
+            verbose=True,
+            seed=SEED,
+        ),
         GRU4Rec(
             name=f"GRU4Rec-{setting}",
             layers=[64],

From 3db6f09f82a940417b069c45c3fa6282ea258487 Mon Sep 17 00:00:00 2001
From: hieuddo <hieu.dd.1998@gmail.com>
Date: Fri, 29 May 2026 23:29:02 +0800
Subject: [PATCH 09/10] update models in README

---
 README.md          | 8 ++++++--
 examples/README.md | 8 +++++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 103a03014..6e79d78e1 100644
--- a/README.md
+++ b/README.md
@@ -158,6 +158,7 @@ The table below lists the recommendation models/algorithms featured in Cornac. E
 | 2021 | [Bilateral Variational Autoencoder for Collaborative Filtering (BiVAECF)](cornac/models/bivaecf), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.bivaecf.recom_bivaecf), [paper](https://dl.acm.org/doi/pdf/10.1145/3437963.3441759) | Collaborative Filtering / Content-Based | [requirements](cornac/models/bivaecf/requirements.txt), CPU / GPU | [quick-start](https://github.com/PreferredAI/bi-vae), [deep-dive](https://github.com/recommenders-team/recommenders/blob/main/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb)
 |      | [Causal Inference for Visual Debiasing in Visually-Aware Recommendation (CausalRec)](cornac/models/causalrec), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.causalrec.recom_causalrec), [paper](https://arxiv.org/abs/2107.02390) | Content-Based / Image | [requirements](cornac/models/causalrec/requirements.txt), CPU / GPU | [quick-start](examples/causalrec_clothing.py)
 |      | [Explainable Recommendation with Comparative Constraints on Product Aspects (ComparER)](cornac/models/comparer), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.comparer.recom_comparer_sub), [paper](https://dl.acm.org/doi/pdf/10.1145/3437963.3441754) | Explainable | CPU | [quick-start](https://github.com/PreferredAI/ComparER)
+|      | [GPT2Rec (Inspired by Transformers4Rec)](cornac/models/gpt2rec), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.gpt2rec.recom_gpt2rec), [paper](https://dl.acm.org/doi/10.1145/3460231.3474255) | Sequential | [requirements](cornac/models/gpt2rec/requirements.txt), CPU / GPU | [quick-start](examples/sequential_diginetica.py)
 | 2020 | [Adversarial Multimedia Recommendation (AMR)](cornac/models/amr), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.amr.recom_amr), [paper](https://ieeexplore.ieee.org/document/8618394) | Content-Based / Image | [requirements](cornac/models/amr/requirements.txt), CPU / GPU | [quick-start](examples/amr_clothing.py)
 |      | [Hybrid Deep Representation Learning of Ratings and Reviews (HRDR)](cornac/models/hrdr), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.hrdr.recom_hrdr), [paper](https://www.sciencedirect.com/science/article/abs/pii/S0925231219313207) | Content-Based / Text | [requirements](cornac/models/hrdr/requirements.txt), CPU / GPU | [quick-start](examples/hrdr_example.py)
 |      | [LightGCN: Simplifying and Powering Graph Convolution Network](cornac/models/lightgcn), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.lightgcn.recom_lightgcn), [paper](https://arxiv.org/pdf/2002.02126.pdf) | Collaborative Filtering | [requirements](cornac/models/lightgcn/requirements.txt), CPU / GPU | [quick-start](examples/lightgcn_example.py)
@@ -166,6 +167,7 @@ The table below lists the recommendation models/algorithms featured in Cornac. E
 |      | [Temporal-Item-Frequency-based User-KNN (TIFUKNN)](cornac/models/tifuknn), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.tifuknn.recom_tifuknn), [paper](https://arxiv.org/pdf/2006.00556.pdf) | Next-Basket | CPU | [quick-start](examples/tifuknn_tafeng.py)
 |      | [Variational Autoencoder for Top-N Recommendations (RecVAE)](cornac/models/recvae), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.recvae.recom_recvae), [paper](https://doi.org/10.1145/3336191.3371831) | Collaborative Filtering | [requirements](cornac/models/recvae/requirements.txt), CPU / GPU | [quick-start](examples/recvae_example.py)
 | 2019 | [Correlation-Sensitive Next-Basket Recommendation (Beacon)](cornac/models/beacon), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#correlation-sensitive-next-basket-recommendation-beacon), [paper](https://www.ijcai.org/proceedings/2019/0389.pdf) | Next-Basket | [requirements](cornac/models/beacon/requirements.txt), CPU / GPU | [quick-start](examples/beacon_tafeng.py)
+|      | [BERT4Rec: Sequential Recommendation with Bidirectional Encoder Representations from Transformer (BERT4Rec)](cornac/models/bert4rec), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.bert4rec.recom_bert4rec), [paper](https://arxiv.org/pdf/1904.06690.pdf) | Sequential | [requirements](cornac/models/bert4rec/requirements.txt), CPU / GPU | [quick-start](examples/sequential_diginetica.py)
 |      | [Embarrassingly Shallow Autoencoders for Sparse Data (EASEᴿ)](cornac/models/ease), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.ease.recom_ease), [paper](https://arxiv.org/pdf/1905.03375.pdf) | Collaborative Filtering | CPU | [quick-start](examples/ease_movielens.py)
 |      | [Neural Graph Collaborative Filtering (NGCF)](cornac/models/ngcf), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.ngcf.recom_ngcf), [paper](https://arxiv.org/pdf/1905.08108.pdf) | Collaborative Filtering | [requirements](cornac/models/ngcf/requirements.txt), CPU / GPU | [quick-start](examples/ngcf_example.py)
 |      | [Sampler Design for Bayesian Personalized Ranking by Leveraging View Data (VEBPR)](cornac/models/bpr), [paper](https://arxiv.org/pdf/1809.08162) | Collaborative Filtering | CPU | [quick-start](examples/vebpr_example.py)
@@ -174,6 +176,7 @@ The table below lists the recommendation models/algorithms featured in Cornac. E
 |      | [Multi-Task Explainable Recommendation (MTER)](cornac/models/mter), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.mter.recom_mter), [paper](https://arxiv.org/pdf/1806.03568.pdf) | Explainable | CPU | [quick-start](examples/mter_example.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/07_explanations.ipynb)
 |      | [Neural Attention Rating Regression with Review-level Explanations (NARRE)](cornac/models/narre), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.narre.recom_narre), [paper](http://www.thuir.cn/group/~YQLiu/publications/WWW2018_CC.pdf) | Explainable / Content-Based | [requirements](cornac/models/narre/requirements.txt), CPU / GPU | [quick-start](examples/narre_example.py)
 |      | [Probabilistic Collaborative Representation Learning (PCRL)](cornac/models/pcrl), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.pcrl.recom_pcrl), [paper](http://www.hadylauw.com/publications/uai18.pdf) | Content-Based / Graph | [requirements](cornac/models/pcrl/requirements.txt), CPU / GPU | [quick-start](examples/pcrl_example.py)
+|      | [Self-Attentive Sequential Recommendation (SASRec)](cornac/models/sasrec), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.sasrec.recom_sasrec), [paper](https://arxiv.org/pdf/1808.09781.pdf) | Sequential | [requirements](cornac/models/sasrec/requirements.txt), CPU / GPU | [quick-start](examples/sequential_diginetica.py)
 |      | [Variational Autoencoder for Collaborative Filtering (VAECF)](cornac/models/vaecf), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.vaecf.recom_vaecf), [paper](https://arxiv.org/pdf/1802.05814.pdf) | Collaborative Filtering | [requirements](cornac/models/vaecf/requirements.txt), CPU / GPU | [quick-start](examples/vaecf_citeulike.py), [param-search](tutorials/param_search_vaecf.ipynb), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/09_deep_learning.ipynb)
 | 2017 | [Collaborative Variational Autoencoder (CVAE)](cornac/models/cvae), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.cvae.recom_cvae), [paper](http://eelxpeng.github.io/assets/paper/Collaborative_Variational_Autoencoder.pdf) | Content-Based / Text | [requirements](cornac/models/cvae/requirements.txt), CPU / GPU | [quick-start](examples/cvae_example.py)
 |      | [Conditional Variational Autoencoder for Collaborative Filtering (CVAECF)](cornac/models/cvaecf), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.cvaecf.recom_cvaecf), [paper](https://dl.acm.org/doi/10.1145/3132847.3132972) | Content-Based / Text | [requirements](cornac/models/cvaecf/requirements.txt), CPU / GPU | [quick-start](examples/cvaecf_filmtrust.py)
@@ -188,7 +191,7 @@ The table below lists the recommendation models/algorithms featured in Cornac. E
 |      | [Collaborative Ordinal Embedding (COE)](cornac/models/coe), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.coe.recom_coe), [paper](http://www.hadylauw.com/publications/sdm16.pdf) | Collaborative Filtering | [requirements](cornac/models/coe/requirements.txt), CPU / GPU |
 |      | [Convolutional Matrix Factorization (ConvMF)](cornac/models/conv_mf), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.conv_mf.recom_convmf), [paper](http://uclab.khu.ac.kr/resources/publication/C_351.pdf) | Content-Based / Text | [requirements](cornac/models/conv_mf/requirements.txt), CPU / GPU | [quick-start](examples/conv_mf_example.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/09_deep_learning.ipynb)
 |      | [Learning to Rank Features for Recommendation over Multiple Categories (LRPPM)](cornac/models/lrppm), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#learn-to-rank-user-preferences-based-on-phrase-level-sentiment-analysis-across-multiple-categories-lrppm), [paper](https://www.yongfeng.me/attach/sigir16-chen.pdf) | Explainable | CPU | [quick-start](examples/lrppm_example.py)
-|      | [Session-based Recommendations With Recurrent Neural Networks (GRU4Rec)](cornac/models/gru4rec), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.gru4rec.recom_gru4rec), [paper](https://arxiv.org/pdf/1511.06939.pdf) | Next-Item | [requirements](cornac/models/gru4rec/requirements.txt), CPU / GPU | [quick-start](examples/gru4rec_yoochoose.py)
+|      | [Session-based Recommendations With Recurrent Neural Networks (GRU4Rec)](cornac/models/gru4rec), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.gru4rec.recom_gru4rec), [paper](https://arxiv.org/pdf/1511.06939.pdf) | Sequential | [requirements](cornac/models/gru4rec/requirements.txt), CPU / GPU | [quick-start](examples/gru4rec_yoochoose.py)
 |      | [Spherical K-means (SKM)](cornac/models/skm), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.skm.recom_skmeans), [paper](https://www.sciencedirect.com/science/article/pii/S092523121501509X) | Collaborative Filtering | CPU | [quick-start](examples/skm_movielens.py)
 |      | [Visual Bayesian Personalized Ranking (VBPR)](cornac/models/vbpr), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.vbpr.recom_vbpr), [paper](https://arxiv.org/pdf/1510.01784.pdf) | Content-Based / Image | [requirements](cornac/models/vbpr/requirements.txt), CPU / GPU | [quick-start](examples/vbpr_tradesy.py), [cross-modality](tutorials/vbpr_text.ipynb), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/05_multimodality.ipynb)
 | 2015 | [Collaborative Deep Learning (CDL)](cornac/models/cdl), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.cdl.recom_cdl), [paper](https://arxiv.org/pdf/1409.2944.pdf) | Content-Based / Text | [requirements](cornac/models/cdl/requirements.txt), CPU / GPU | [quick-start](examples/cdl_example.py), [deep-dive](https://github.com/lgabs/cornac/blob/luan/describe-gpu-supported-models-readme/tutorials/working_with_auxiliary_data.md)
@@ -199,6 +202,7 @@ The table below lists the recommendation models/algorithms featured in Cornac. E
 | 2013 | [Hidden Factors and Hidden Topics (HFT)](cornac/models/hft), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.hft.recom_hft), [paper](https://cs.stanford.edu/people/jure/pubs/reviews-recsys13.pdf) | Content-Based / Text | CPU | [quick-start](examples/hft_example.py)
 | 2012 | [Weighted Bayesian Personalized Ranking (WBPR)](cornac/models/bpr), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#weighted-bayesian-personalized-ranking-wbpr), [paper](http://proceedings.mlr.press/v18/gantner12a/gantner12a.pdf) | Collaborative Filtering | CPU | [quick-start](examples/bpr_netflix.py)
 | 2011 | [Collaborative Topic Regression (CTR)](cornac/models/ctr), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.ctr.recom_ctr), [paper](http://www.cs.columbia.edu/~blei/papers/WangBlei2011.pdf) | Content-Based / Text | CPU | [quick-start](examples/ctr_example_citeulike.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/05_multimodality.ipynb)
+| 2010 | [Factorizing Personalized Markov Chains for Next-Basket Recommendation (FPMC)](cornac/models/fpmc), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.fpmc.recom_fpmc), [paper](https://dl.acm.org/doi/10.1145/1772690.1772773) | Sequential | [requirements](cornac/models/fpmc/requirements.txt), CPU / GPU | [quick-start](examples/sequential_diginetica.py)
 | Earlier | [Baseline Only](cornac/models/baseline_only), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#baseline-only), [paper](http://courses.ischool.berkeley.edu/i290-dm/s11/SECURE/a1-koren.pdf) | Baseline | CPU | [quick-start](examples/svd_example.py)
 |      | [Bayesian Personalized Ranking (BPR)](cornac/models/bpr), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#bayesian-personalized-ranking-bpr) [paper](https://arxiv.org/ftp/arxiv/papers/1205/1205.2618.pdf) | Collaborative Filtering | CPU | [quick-start](examples/bpr_netflix.py), [deep-dive](https://github.com/recommenders-team/recommenders/blob/main/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb)
 |      | [Factorization Machines (FM)](cornac/models/fm), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#factorization-machines-fm), [paper](https://www.csie.ntu.edu.tw/~b97053/paper/Factorization%20Machines%20with%20libFM.pdf) | Collaborative Filtering / Content-Based | Linux, CPU | [quick-start](examples/fm_example.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/06_contextual_awareness.ipynb)
@@ -210,7 +214,7 @@ The table below lists the recommendation models/algorithms featured in Cornac. E
 |      | [Most Popular (MostPop)](cornac/models/most_pop), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.most_pop.recom_most_pop), [paper](https://arxiv.org/ftp/arxiv/papers/1205/1205.2618.pdf) | Baseline | CPU | [quick-start](examples/bpr_netflix.py)
 |      | [Non-negative Matrix Factorization (NMF)](cornac/models/nmf), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.nmf.recom_nmf), [paper](http://papers.nips.cc/paper/1861-algorithms-for-non-negative-matrix-factorization.pdf) | Collaborative Filtering | CPU | [quick-start](examples/nmf_example.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/03_matrix_factorization.ipynb)
 |      | [Probabilistic Matrix Factorization (PMF)](cornac/models/pmf), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.pmf.recom_pmf), [paper](https://papers.nips.cc/paper/3208-probabilistic-matrix-factorization.pdf) | Collaborative Filtering | CPU | [quick-start](examples/pmf_ratio.py)
-|      | [Session Popular (SPop)](cornac/models/spop), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.spop.recom_spop), [paper](https://arxiv.org/pdf/1511.06939.pdf) | Next-Item / Baseline | CPU | [quick-start](examples/spop_yoochoose.py)
+|      | [Session Popular (SPop)](cornac/models/spop), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.spop.recom_spop), [paper](https://arxiv.org/pdf/1511.06939.pdf) | Sequential / Baseline | CPU | [quick-start](examples/spop_yoochoose.py)
 |      | [Singular Value Decomposition (SVD)](cornac/models/svd), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.svd.recom_svd), [paper](https://people.engr.tamu.edu/huangrh/Spring16/papers_course/matrix_factorization.pdf) | Collaborative Filtering | CPU | [quick-start](examples/svd_example.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/03_matrix_factorization.ipynb)
 |      | [Social Recommendation using PMF (SoRec)](cornac/models/sorec), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.sorec.recom_sorec), [paper](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.304.2464&rep=rep1&type=pdf) | Content-Based / Social | CPU | [quick-start](examples/sorec_filmtrust.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/05_multimodality.ipynb)
 |      | [User K-Nearest-Neighbors (UserKNN)](cornac/models/knn), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#user-k-nearest-neighbors-userknn), [paper](https://arxiv.org/pdf/1301.7363.pdf) | Neighborhood-Based | CPU | [quick-start](examples/knn_movielens.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/02_neighborhood.ipynb)
diff --git a/examples/README.md b/examples/README.md
index f667087fd..ad3d2ace9 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -120,12 +120,18 @@
 
 ----
 
-## Next-Item Algorithms
+## Sequential Recommendations
+
+### Session-Based With Yoochoose Dataset
 
 [spop_yoochoose.py](spop_yoochoose.py) - Next-item recommendation based on item popularity.
 
 [gru4rec_yoochoose.py](gru4rec_yoochoose.py) - Example of Session-based Recommendations with Recurrent Neural Networks (GRU4Rec).
 
+### Session-Based & Session-Aware With Diginetica Dataset
+
+[sequential_diginetica.py](sequential_diginetica.py) - Session-based and session-aware recommendations with FPMC, BERT4Rec, GPT2Rec, GRU4Rec, SASRec models.
+
 ----
 
 ## Next-Basket Algorithms

From 71ee633835c3c231e8f6537e0345d9d0a1ec023e Mon Sep 17 00:00:00 2001
From: hieuddo <hieu.dd.1998@gmail.com>
Date: Fri, 29 May 2026 23:42:13 +0800
Subject: [PATCH 10/10] remove unused nonlocal

---
 cornac/models/seq_utils/iterators.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cornac/models/seq_utils/iterators.py b/cornac/models/seq_utils/iterators.py
index 918603e3d..4f09320e8 100644
--- a/cornac/models/seq_utils/iterators.py
+++ b/cornac/models/seq_utils/iterators.py
@@ -208,7 +208,6 @@ def user_io_iter(train_set, n_sample=0, sample_alpha=0, rng=None, batch_size=1,
     pool_iter = iter(sequences)
 
     def _refill():
-        nonlocal end_mask
         while end_mask.sum() > 0:
             try:
                 uid, seq, _session_starts = next(pool_iter)