From 122a59d0852d5f66569f50cb5d8d233b6c519ad6 Mon Sep 17 00:00:00 2001 From: hieuddo Date: Thu, 21 May 2026 15:37:44 +0800 Subject: [PATCH 01/10] feat: refine session-based recommendation framework with models including FPMC, BERT4Rec, SASRec, GPT2Rec, and updated GRU4Rec --- cornac/models/__init__.py | 5 + cornac/models/bert4rec/__init__.py | 16 + cornac/models/bert4rec/bert4rec.py | 117 +++++++ cornac/models/bert4rec/recom_bert4rec.py | 192 +++++++++++ cornac/models/fpmc/__init__.py | 16 + cornac/models/fpmc/fpmc.py | 84 +++++ cornac/models/fpmc/recom_fpmc.py | 212 ++++++++++++ cornac/models/gpt2rec/__init__.py | 16 + cornac/models/gpt2rec/gpt2rec.py | 112 +++++++ cornac/models/gpt2rec/recom_gpt2rec.py | 186 +++++++++++ cornac/models/gru4rec/gru4rec.py | 374 ++++++---------------- cornac/models/gru4rec/recom_gru4rec.py | 214 +++++++++---- cornac/models/recommender.py | 72 +++++ cornac/models/sasrec/__init__.py | 16 + cornac/models/sasrec/recom_sasrec.py | 246 ++++++++++++++ cornac/models/sasrec/sasrec.py | 186 +++++++++++ cornac/models/seq_utils/__init__.py | 69 ++++ cornac/models/seq_utils/iterators.py | 390 +++++++++++++++++++++++ cornac/models/seq_utils/losses.py | 125 ++++++++ cornac/models/seq_utils/optim.py | 93 ++++++ 20 files changed, 2408 insertions(+), 333 deletions(-) create mode 100644 cornac/models/bert4rec/__init__.py create mode 100644 cornac/models/bert4rec/bert4rec.py create mode 100644 cornac/models/bert4rec/recom_bert4rec.py create mode 100644 cornac/models/fpmc/__init__.py create mode 100644 cornac/models/fpmc/fpmc.py create mode 100644 cornac/models/fpmc/recom_fpmc.py create mode 100644 cornac/models/gpt2rec/__init__.py create mode 100644 cornac/models/gpt2rec/gpt2rec.py create mode 100644 cornac/models/gpt2rec/recom_gpt2rec.py create mode 100644 cornac/models/sasrec/__init__.py create mode 100644 cornac/models/sasrec/recom_sasrec.py create mode 100644 cornac/models/sasrec/sasrec.py create mode 100644 cornac/models/seq_utils/__init__.py create mode 100644 cornac/models/seq_utils/iterators.py create mode 100644 cornac/models/seq_utils/losses.py create mode 100644 cornac/models/seq_utils/optim.py diff --git a/cornac/models/__init__.py b/cornac/models/__init__.py index 3451261ae..643b25258 100644 --- a/cornac/models/__init__.py +++ b/cornac/models/__init__.py @@ -16,6 +16,7 @@ from .recommender import Recommender from .recommender import NextBasketRecommender from .recommender import NextItemRecommender +from .recommender import SequentialRecommender from .amr import AMR from .ann import AnnoyANN @@ -24,6 +25,7 @@ from .ann import ScaNNANN from .baseline_only import BaselineOnly from .beacon import Beacon +from .bert4rec import BERT4Rec from .bivaecf import BiVAECF from .bpr import BPR from .bpr import WBPR @@ -45,9 +47,11 @@ from .ease import EASE from .efm import EFM from .fm import FM +from .fpmc import FPMC from .gcmc import GCMC from .global_avg import GlobalAvg from .gp_top import GPTop +from .gpt2rec import GPT2Rec from .gru4rec import GRU4Rec from .hft import HFT from .hpf import HPF @@ -74,6 +78,7 @@ from .pmf import PMF from .recvae import RecVAE from .sansa import SANSA +from .sasrec import SASRec from .sbpr import SBPR from .skm import SKMeans from .sorec import SoRec diff --git a/cornac/models/bert4rec/__init__.py b/cornac/models/bert4rec/__init__.py new file mode 100644 index 000000000..af46ce60a --- /dev/null +++ b/cornac/models/bert4rec/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from .recom_bert4rec import BERT4Rec diff --git a/cornac/models/bert4rec/bert4rec.py b/cornac/models/bert4rec/bert4rec.py new file mode 100644 index 000000000..5fc1d77c2 --- /dev/null +++ b/cornac/models/bert4rec/bert4rec.py @@ -0,0 +1,117 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch +import torch.nn as nn + + +class BERT4RecModel(nn.Module): + """BERT4Rec: bidirectional transformer encoder for sequence rec. + + Uses HuggingFace's :class:`~transformers.BertModel` as the backbone. The + sequence-final hidden state is used to score candidate items via the dot + product with their output embeddings (separate "head" linear or tied to + ``item_emb``). + + Returns ``(B, B+N)`` score matrices when called as + ``forward(_, hist_iids, out_iids, return_hidden=False)``. + """ + + def __init__( + self, + item_num, + embedding_dim=100, + maxlen=20, + n_layers=2, + n_heads=1, + dropout=0.1, + pad_idx=-1, + tie_weights=False, + init_std=0.02, + device="cpu", + ): + super().__init__() + from transformers.models.bert import BertConfig, BertModel + + self.item_num = item_num + self.pad_idx = pad_idx if pad_idx >= 0 else item_num + self.maxlen = maxlen + self.dev = device + self.init_std = init_std + self.tie_weights = tie_weights + + config = BertConfig( + vocab_size=item_num + 1, + hidden_size=embedding_dim, + num_hidden_layers=n_layers, + num_attention_heads=n_heads, + intermediate_size=embedding_dim * 4, + hidden_act="gelu", + hidden_dropout_prob=dropout, + attention_probs_dropout_prob=dropout, + max_position_embeddings=maxlen + 1, + initializer_range=init_std, + pad_token_id=self.pad_idx, + layer_norm_eps=1e-12, + use_cache=False, + ) + + self.item_emb = nn.Embedding( + num_embeddings=item_num + 1, + embedding_dim=embedding_dim, + padding_idx=self.pad_idx, + ) + self.transformer_model = BertModel(config) + self.item_biases = nn.Embedding(item_num + 1, 1, padding_idx=self.pad_idx) + self._init_weights() + self.to(device) + + def _init_weights(self): + self.item_emb.weight.data.normal_(mean=0.0, std=self.init_std) + self.item_emb.weight.data[self.pad_idx].zero_() + self.item_biases.weight.data.zero_() + + def _encode(self, hist_iids): + attention_mask = (hist_iids != self.pad_idx).long() + embeds = self.item_emb(hist_iids) + out = self.transformer_model( + inputs_embeds=embeds, attention_mask=attention_mask + ) + return out.last_hidden_state[:, -1, :] + + def forward(self, user_ids, hist_iids, out_iids, return_hidden=False): + hidden = self._encode(hist_iids) + item_e = self.item_emb(out_iids) + bias = self.item_biases(out_iids) + if return_hidden: + return hidden, item_e, bias + scores = torch.mm(hidden, item_e.T) + bias.T + return scores + + @torch.no_grad() + def predict(self, user_ids, log_seqs, item_indices=None): + if item_indices is None: + item_indices = torch.arange(self.item_num, device=self.dev) + else: + item_indices = torch.as_tensor( + item_indices, dtype=torch.long, device=self.dev + ) + if not isinstance(log_seqs, torch.Tensor): + log_seqs = torch.as_tensor(log_seqs, dtype=torch.long, device=self.dev) + hidden = self._encode(log_seqs) + item_e = self.item_emb(item_indices) + bias = self.item_biases(item_indices) + scores = torch.mm(hidden, item_e.T) + bias.T + return scores.squeeze().detach().cpu().numpy() diff --git a/cornac/models/bert4rec/recom_bert4rec.py b/cornac/models/bert4rec/recom_bert4rec.py new file mode 100644 index 000000000..553c1181f --- /dev/null +++ b/cornac/models/bert4rec/recom_bert4rec.py @@ -0,0 +1,192 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +from tqdm.auto import trange + +from ...utils import get_rng +from ..recommender import SequentialRecommender +from ..seq_utils import session_seq_iter, user_seq_iter + +SUPPORTED_LOSSES = ( + "bce", + "ce", + "bpr", + "bpr-max", + "softmax", + "cross-entropy", + "xe_softmax", + "top1", +) + + +class BERT4Rec(SequentialRecommender): + """BERT4Rec: bidirectional transformer for sequential recommendation. + + Parameters mirror :class:`cornac.models.SASRec`. See the SASRec docstring + for details about ``mode``, ``loss``, and other parameters. + + References + ---------- + Sun, Fei, et al. "BERT4Rec: Sequential recommendation with bidirectional encoder representations from transformer." + CIKM 2019. + """ + + def __init__( + self, + name="BERT4Rec", + embedding_dim=100, + mode="session-aware", + loss="ce", + batch_size=512, + learning_rate=0.001, + n_sample=2048, + sample_alpha=0.5, + n_epochs=10, + max_len=50, + num_blocks=2, + num_heads=1, + dropout=0.2, + l2_reg=0.0, + bpreg=1.0, + elu_param=0.5, + device="cpu", + trainable=True, + verbose=False, + seed=None, + ): + super().__init__(name, mode=mode, trainable=trainable, verbose=verbose) + if loss not in SUPPORTED_LOSSES: + raise ValueError( + f"loss='{loss}' not supported; choose from {SUPPORTED_LOSSES}" + ) + self.embedding_dim = embedding_dim + self.loss = loss + self.batch_size = batch_size + self.learning_rate = learning_rate + self.n_sample = n_sample + self.sample_alpha = sample_alpha + self.n_epochs = n_epochs + self.max_len = max_len + self.num_blocks = num_blocks + self.num_heads = num_heads + self.dropout = dropout + self.l2_reg = l2_reg + self.bpreg = bpreg + self.elu_param = elu_param + self.device = device + self.seed = seed + self.rng = get_rng(seed) + + def _build_data_iter(self, pad_index): + kwargs = dict( + train_set=self.train_set, + pad_index=pad_index, + batch_size=self.batch_size, + max_len=self.max_len, + n_sample=self.n_sample, + sample_alpha=self.sample_alpha, + rng=self.rng, + shuffle=True, + ) + if self.mode == "session-aware": + return user_seq_iter(**kwargs) + return session_seq_iter(**kwargs) + + def fit(self, train_set, val_set=None): + super().fit(train_set, val_set) + if not self.trainable: + return self + + import torch + + from .bert4rec import BERT4RecModel + from ..seq_utils.losses import get_loss_function + + torch.manual_seed(self.seed if self.seed is not None else 0) + + self.pad_idx = self.total_items + self.model = BERT4RecModel( + item_num=self.total_items, + embedding_dim=self.embedding_dim, + maxlen=self.max_len, + n_layers=self.num_blocks, + n_heads=self.num_heads, + dropout=self.dropout, + pad_idx=self.pad_idx, + device=self.device, + ) + + loss_fn = get_loss_function(self.loss) + loss_kwargs = dict( + bpreg=self.bpreg, elu_param=self.elu_param, n_sample=self.n_sample + ) + + opt = torch.optim.Adam( + self.model.parameters(), lr=self.learning_rate, betas=(0.9, 0.98) + ) + + progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose) + for _ in progress_bar: + self.model.train() + total_loss = 0.0 + cnt = 0 + for inc, (in_uids, hist_iids, out_iids) in enumerate( + self._build_data_iter(self.pad_idx) + ): + if len(hist_iids) < 2: + continue + hist_iids_t = torch.tensor( + hist_iids, dtype=torch.long, device=self.device, requires_grad=False + ) + out_iids_t = torch.tensor( + out_iids, dtype=torch.long, device=self.device, requires_grad=False + ) + + self.model.zero_grad() + item_scores = self.model( + None, hist_iids_t, out_iids_t, return_hidden=False + ) + + L = loss_fn( + item_scores, + out_iids=out_iids_t, + batch_size=len(hist_iids), + **loss_kwargs, + ) + if self.l2_reg > 0: + for p in self.model.parameters(): + L = L + self.l2_reg * torch.norm(p) + + L.backward() + opt.step() + + total_loss += L.cpu().detach().numpy() * len(hist_iids) + cnt += len(hist_iids) + if inc % 10 == 0 and cnt > 0: + progress_bar.set_postfix(loss=(total_loss / cnt)) + return self + + def score(self, user_idx, history_items, **kwargs): + import torch + + flat = self._flatten_history(history_items) + if len(flat) == 0: + return np.ones(self.total_items, dtype="float") + log_seq = [self.pad_idx] * (self.max_len - len(flat)) + list(flat) + log_seq = log_seq[-self.max_len :] + log_seq_t = torch.tensor([log_seq], dtype=torch.long, device=self.device) + self.model.eval() + return self.model.predict(user_idx, log_seq_t) diff --git a/cornac/models/fpmc/__init__.py b/cornac/models/fpmc/__init__.py new file mode 100644 index 000000000..ca661be34 --- /dev/null +++ b/cornac/models/fpmc/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from .recom_fpmc import FPMC diff --git a/cornac/models/fpmc/fpmc.py b/cornac/models/fpmc/fpmc.py new file mode 100644 index 000000000..2acb3ec76 --- /dev/null +++ b/cornac/models/fpmc/fpmc.py @@ -0,0 +1,84 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch +import torch.nn as nn + + +class FPMC_Model(nn.Module): + """Factorizing Personalized Markov Chains (Rendle et al., 2010). + + Score for user ``u``, last item ``i``, candidate ``j``: + + s(u, i, j) = + + + Implemented to return ``(B, B+N)`` matrices when out_iids contains the + in-batch positives followed by ``N`` shared negatives. + """ + + def __init__(self, user_num, item_num, factor_num, pad_idx=None, device="cpu"): + super().__init__() + self.user_num = user_num + self.item_num = item_num + self.factor_num = factor_num + self.device = device + self.pad_idx = pad_idx if pad_idx is not None else item_num + + # User-Item (target) embeddings + self.UI_emb = nn.Embedding(user_num + 1, factor_num, padding_idx=user_num) + # Item-User factor: scoring with UI + self.IU_emb = nn.Embedding(item_num + 1, factor_num, padding_idx=self.pad_idx) + # Last-Item factor (input) + self.LI_emb = nn.Embedding(item_num + 1, factor_num, padding_idx=self.pad_idx) + # Item-Last factor: scoring with LI + self.IL_emb = nn.Embedding(item_num + 1, factor_num, padding_idx=self.pad_idx) + self.item_biases = nn.Embedding(item_num + 1, 1, padding_idx=self.pad_idx) + + nn.init.normal_(self.UI_emb.weight, std=0.01) + nn.init.normal_(self.IU_emb.weight, std=0.01) + nn.init.normal_(self.LI_emb.weight, std=0.01) + nn.init.normal_(self.IL_emb.weight, std=0.01) + nn.init.constant_(self.item_biases.weight, 0) + for emb in ( + self.UI_emb, + self.IU_emb, + self.LI_emb, + self.IL_emb, + self.item_biases, + ): + if emb.padding_idx is not None: + emb.weight.data[emb.padding_idx].zero_() + + def forward(self, in_uids, in_iids, out_iids): + last_item_emb = self.LI_emb(in_iids) # (B, D) + user_emb = self.UI_emb(in_uids) # (B, D) + iu_emb = self.IU_emb(out_iids) # (B+N, D) + il_emb = self.IL_emb(out_iids) # (B+N, D) + bias = self.item_biases(out_iids) # (B+N, 1) + + mf = torch.einsum("be,ne->bn", user_emb, iu_emb) + fmc = torch.einsum("ne,be->bn", il_emb, last_item_emb) + return mf + fmc + bias.T + + @torch.no_grad() + def predict(self, user_idx, last_iid, item_indices=None): + if item_indices is None: + item_indices = torch.arange(self.item_num, device=self.device) + else: + item_indices = torch.as_tensor( + item_indices, dtype=torch.long, device=self.device + ) + scores = self.forward(user_idx, last_iid, item_indices).squeeze() + return scores.detach().cpu().numpy() diff --git a/cornac/models/fpmc/recom_fpmc.py b/cornac/models/fpmc/recom_fpmc.py new file mode 100644 index 000000000..a40a3682c --- /dev/null +++ b/cornac/models/fpmc/recom_fpmc.py @@ -0,0 +1,212 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +from tqdm.auto import trange + +from ...utils import get_rng +from ..recommender import SequentialRecommender +from ..seq_utils import session_seq_iter, user_seq_iter + +SUPPORTED_LOSSES = ( + "bpr", + "bce", + "ce", + "bpr-max", + "softmax", + "cross-entropy", + "xe_softmax", + "top1", +) + + +class FPMC(SequentialRecommender): + """Factorizing Personalized Markov Chains for next-basket recommendation. + + Operates on (user, last_item) → next_item triples. The training data + iterator is the same one used by transformer/seq models, but with + ``max_len=1`` so each example contributes a single (last_item, target) + pair. In ``session-based`` mode the last-item only comes from the + current session; in ``session-aware`` mode it may come from any prior + session of the user (i.e. the prior interaction in user-chronological + order regardless of session boundaries). + + Parameters + ---------- + name: string, default: 'FPMC' + + embedding_dim: int, optional, default: 100 + Latent factor dimension. + + mode: str, optional, default: 'session-aware' + + loss: str, optional, default: 'bpr' + + batch_size, learning_rate, n_sample, sample_alpha, n_epochs: + Standard training hyperparameters. + + bpreg, elu_param: only used when ``loss="bpr-max"``. + + momentum: float, optional, default: 0.0 + Momentum for the IndexedAdagradM optimizer. + + References + ---------- + Rendle, S., Freudenthaler, C., & Schmidt-Thieme, L. (2010). + Factorizing personalized Markov chains for next-basket recommendation. WWW. + """ + + def __init__( + self, + name="FPMC", + embedding_dim=100, + mode="session-aware", + loss="bpr", + batch_size=512, + learning_rate=0.05, + momentum=0.0, + n_sample=2048, + sample_alpha=0.5, + n_epochs=10, + bpreg=1.0, + elu_param=0.5, + device="cpu", + trainable=True, + verbose=False, + seed=None, + ): + super().__init__(name, mode=mode, trainable=trainable, verbose=verbose) + if loss not in SUPPORTED_LOSSES: + raise ValueError( + f"loss='{loss}' not supported; choose from {SUPPORTED_LOSSES}" + ) + self.embedding_dim = embedding_dim + self.loss = loss + self.batch_size = batch_size + self.learning_rate = learning_rate + self.momentum = momentum + self.n_sample = n_sample + self.sample_alpha = sample_alpha + self.n_epochs = n_epochs + self.bpreg = bpreg + self.elu_param = elu_param + self.device = device + self.seed = seed + self.rng = get_rng(seed) + + def _build_data_iter(self, pad_index): + # FPMC only uses the immediately previous item, so set max_len = 1. + kwargs = dict( + train_set=self.train_set, + pad_index=pad_index, + batch_size=self.batch_size, + max_len=1, + n_sample=self.n_sample, + sample_alpha=self.sample_alpha, + rng=self.rng, + shuffle=True, + ) + if self.mode == "session-aware": + return user_seq_iter(**kwargs) + return session_seq_iter(**kwargs) + + def fit(self, train_set, val_set=None): + super().fit(train_set, val_set) + if not self.trainable: + return self + + import torch + + from .fpmc import FPMC_Model + from ..seq_utils.losses import get_loss_function + from ..seq_utils.optim import IndexedAdagradM + + torch.manual_seed(self.seed if self.seed is not None else 0) + + self.pad_idx = self.total_items + self.model = FPMC_Model( + user_num=self.total_users, + item_num=self.total_items, + factor_num=self.embedding_dim, + pad_idx=self.pad_idx, + device=self.device, + ).to(self.device) + + loss_fn = get_loss_function(self.loss) + loss_kwargs = dict( + bpreg=self.bpreg, elu_param=self.elu_param, n_sample=self.n_sample + ) + + opt = IndexedAdagradM( + self.model.parameters(), lr=self.learning_rate, momentum=self.momentum + ) + + progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose) + for _ in progress_bar: + self.model.train() + total_loss = 0.0 + cnt = 0 + for inc, (in_uids, hist_iids, out_iids) in enumerate( + self._build_data_iter(self.pad_idx) + ): + if len(hist_iids) < 2: + continue + in_uids_t = torch.tensor( + in_uids, dtype=torch.long, device=self.device, requires_grad=False + ) + # FPMC uses just the most recent item (hist_iids shape (B, 1)) + last_iid_t = torch.tensor( + hist_iids[:, -1], + dtype=torch.long, + device=self.device, + requires_grad=False, + ) + out_iids_t = torch.tensor( + out_iids, dtype=torch.long, device=self.device, requires_grad=False + ) + + self.model.zero_grad() + item_scores = self.model(in_uids_t, last_iid_t, out_iids_t) + + L = loss_fn( + item_scores, + out_iids=out_iids_t, + batch_size=len(in_uids), + **loss_kwargs, + ) + L.backward() + opt.step() + + total_loss += L.cpu().detach().numpy() * len(in_uids) + cnt += len(in_uids) + if inc % 10 == 0 and cnt > 0: + progress_bar.set_postfix(loss=(total_loss / cnt)) + return self + + def score(self, user_idx, history_items, **kwargs): + import torch + + flat = self._flatten_history(history_items) + if len(flat) == 0: + return np.ones(self.total_items, dtype="float") + last = int(flat[-1]) + # Cap user index to known users (cold-start fallback to padding row) + u_idx = user_idx if 0 <= user_idx < self.total_users else self.total_users + self.model.eval() + with torch.no_grad(): + u_t = torch.tensor([u_idx], dtype=torch.long, device=self.device) + i_t = torch.tensor([last], dtype=torch.long, device=self.device) + cdds = torch.arange(self.total_items, dtype=torch.long, device=self.device) + return self.model.predict(u_t, i_t, cdds) diff --git a/cornac/models/gpt2rec/__init__.py b/cornac/models/gpt2rec/__init__.py new file mode 100644 index 000000000..ed6d4676c --- /dev/null +++ b/cornac/models/gpt2rec/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from .recom_gpt2rec import GPT2Rec diff --git a/cornac/models/gpt2rec/gpt2rec.py b/cornac/models/gpt2rec/gpt2rec.py new file mode 100644 index 000000000..93d4e56e4 --- /dev/null +++ b/cornac/models/gpt2rec/gpt2rec.py @@ -0,0 +1,112 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch +import torch.nn as nn + + +class GPT2RecModel(nn.Module): + """GPT2-based causal transformer for next-item recommendation. + + Same input/output contract as :class:`SASRecModel` and + :class:`BERT4RecModel`. Returns a ``(B, B+N)`` score matrix. + """ + + def __init__( + self, + item_num, + embedding_dim=100, + maxlen=20, + n_layers=2, + n_heads=1, + dropout=0.1, + pad_idx=-1, + tie_weights=False, + init_std=0.02, + device="cpu", + ): + super().__init__() + from transformers.models.gpt2 import GPT2Config, GPT2Model + + self.item_num = item_num + self.pad_idx = pad_idx if pad_idx >= 0 else item_num + self.maxlen = maxlen + self.dev = device + self.init_std = init_std + self.tie_weights = tie_weights + + config = GPT2Config( + vocab_size=item_num + 1, + n_positions=maxlen + 1, + n_embd=embedding_dim, + n_layer=n_layers, + n_head=n_heads, + n_inner=embedding_dim * 4, + activation_function="gelu_new", + resid_pdrop=dropout, + embd_pdrop=dropout, + attn_pdrop=dropout, + initializer_range=init_std, + pad_token_id=self.pad_idx, + layer_norm_epsilon=1e-12, + use_cache=False, + ) + + self.item_emb = nn.Embedding( + item_num + 1, embedding_dim, padding_idx=self.pad_idx + ) + self.transformer_model = GPT2Model(config) + self.item_biases = nn.Embedding(item_num + 1, 1, padding_idx=self.pad_idx) + + self._init_weights() + self.to(device) + + def _init_weights(self): + self.item_emb.weight.data.normal_(mean=0.0, std=self.init_std) + self.item_emb.weight.data[self.pad_idx].zero_() + self.item_biases.weight.data.zero_() + + def _encode(self, hist_iids): + attention_mask = (hist_iids != self.pad_idx).long() + embeds = self.item_emb(hist_iids) + out = self.transformer_model( + inputs_embeds=embeds, attention_mask=attention_mask + ) + return out.last_hidden_state[:, -1, :] + + def forward(self, user_ids, hist_iids, out_iids, return_hidden=False): + hidden = self._encode(hist_iids) + item_e = self.item_emb(out_iids) + bias = self.item_biases(out_iids) + if return_hidden: + return hidden, item_e, bias + scores = torch.mm(hidden, item_e.T) + bias.T + return scores + + @torch.no_grad() + def predict(self, user_ids, log_seqs, item_indices=None): + if item_indices is None: + item_indices = torch.arange(self.item_num, device=self.dev) + else: + item_indices = torch.as_tensor( + item_indices, dtype=torch.long, device=self.dev + ) + if not isinstance(log_seqs, torch.Tensor): + log_seqs = torch.as_tensor(log_seqs, dtype=torch.long, device=self.dev) + hidden = self._encode(log_seqs) + item_e = self.item_emb(item_indices) + bias = self.item_biases(item_indices) + scores = torch.mm(hidden, item_e.T) + bias.T + return scores.squeeze().detach().cpu().numpy() diff --git a/cornac/models/gpt2rec/recom_gpt2rec.py b/cornac/models/gpt2rec/recom_gpt2rec.py new file mode 100644 index 000000000..0ed2f754a --- /dev/null +++ b/cornac/models/gpt2rec/recom_gpt2rec.py @@ -0,0 +1,186 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +from tqdm.auto import trange + +from ...utils import get_rng +from ..recommender import SequentialRecommender +from ..seq_utils import session_seq_iter, user_seq_iter + +SUPPORTED_LOSSES = ( + "bce", + "ce", + "bpr", + "bpr-max", + "softmax", + "cross-entropy", + "xe_softmax", + "top1", +) + + +class GPT2Rec(SequentialRecommender): + """GPT2-based causal transformer for sequential recommendation. + + Parameters mirror :class:`cornac.models.SASRec`. + """ + + def __init__( + self, + name="GPT2Rec", + embedding_dim=100, + mode="session-aware", + loss="ce", + batch_size=512, + learning_rate=0.001, + n_sample=2048, + sample_alpha=0.5, + n_epochs=10, + max_len=50, + num_blocks=2, + num_heads=1, + dropout=0.2, + l2_reg=0.0, + bpreg=1.0, + elu_param=0.5, + device="cpu", + trainable=True, + verbose=False, + seed=None, + ): + super().__init__(name, mode=mode, trainable=trainable, verbose=verbose) + if loss not in SUPPORTED_LOSSES: + raise ValueError( + f"loss='{loss}' not supported; choose from {SUPPORTED_LOSSES}" + ) + self.embedding_dim = embedding_dim + self.loss = loss + self.batch_size = batch_size + self.learning_rate = learning_rate + self.n_sample = n_sample + self.sample_alpha = sample_alpha + self.n_epochs = n_epochs + self.max_len = max_len + self.num_blocks = num_blocks + self.num_heads = num_heads + self.dropout = dropout + self.l2_reg = l2_reg + self.bpreg = bpreg + self.elu_param = elu_param + self.device = device + self.seed = seed + self.rng = get_rng(seed) + + def _build_data_iter(self, pad_index): + kwargs = dict( + train_set=self.train_set, + pad_index=pad_index, + batch_size=self.batch_size, + max_len=self.max_len, + n_sample=self.n_sample, + sample_alpha=self.sample_alpha, + rng=self.rng, + shuffle=True, + ) + if self.mode == "session-aware": + return user_seq_iter(**kwargs) + return session_seq_iter(**kwargs) + + def fit(self, train_set, val_set=None): + super().fit(train_set, val_set) + if not self.trainable: + return self + + import torch + + from .gpt2rec import GPT2RecModel + from ..seq_utils.losses import get_loss_function + + torch.manual_seed(self.seed if self.seed is not None else 0) + + self.pad_idx = self.total_items + self.model = GPT2RecModel( + item_num=self.total_items, + embedding_dim=self.embedding_dim, + maxlen=self.max_len, + n_layers=self.num_blocks, + n_heads=self.num_heads, + dropout=self.dropout, + pad_idx=self.pad_idx, + device=self.device, + ) + + loss_fn = get_loss_function(self.loss) + loss_kwargs = dict( + bpreg=self.bpreg, elu_param=self.elu_param, n_sample=self.n_sample + ) + + opt = torch.optim.Adam( + self.model.parameters(), lr=self.learning_rate, betas=(0.9, 0.98) + ) + + progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose) + for _ in progress_bar: + self.model.train() + total_loss = 0.0 + cnt = 0 + for inc, (in_uids, hist_iids, out_iids) in enumerate( + self._build_data_iter(self.pad_idx) + ): + if len(hist_iids) < 2: + continue + hist_iids_t = torch.tensor( + hist_iids, dtype=torch.long, device=self.device, requires_grad=False + ) + out_iids_t = torch.tensor( + out_iids, dtype=torch.long, device=self.device, requires_grad=False + ) + + self.model.zero_grad() + item_scores = self.model( + None, hist_iids_t, out_iids_t, return_hidden=False + ) + + L = loss_fn( + item_scores, + out_iids=out_iids_t, + batch_size=len(hist_iids), + **loss_kwargs, + ) + if self.l2_reg > 0: + for p in self.model.parameters(): + L = L + self.l2_reg * torch.norm(p) + + L.backward() + opt.step() + + total_loss += L.cpu().detach().numpy() * len(hist_iids) + cnt += len(hist_iids) + if inc % 10 == 0 and cnt > 0: + progress_bar.set_postfix(loss=(total_loss / cnt)) + return self + + def score(self, user_idx, history_items, **kwargs): + import torch + + flat = self._flatten_history(history_items) + if len(flat) == 0: + return np.ones(self.total_items, dtype="float") + log_seq = [self.pad_idx] * (self.max_len - len(flat)) + list(flat) + log_seq = log_seq[-self.max_len :] + log_seq_t = torch.tensor([log_seq], dtype=torch.long, device=self.device) + self.model.eval() + return self.model.predict(user_idx, log_seq_t) diff --git a/cornac/models/gru4rec/gru4rec.py b/cornac/models/gru4rec/gru4rec.py index 2ea880b58..476c281e2 100644 --- a/cornac/models/gru4rec/gru4rec.py +++ b/cornac/models/gru4rec/gru4rec.py @@ -1,12 +1,25 @@ -from collections import Counter +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ import numpy as np import torch from torch import nn from torch.autograd import Variable -from torch.optim import Optimizer -from cornac.utils.common import get_rng +from ..seq_utils.iterators import io_iter # noqa: F401 +from ..seq_utils.optim import IndexedAdagradM # noqa: F401 def init_parameter_matrix( @@ -18,78 +31,6 @@ def init_parameter_matrix( return nn.init._no_grad_uniform_(tensor, -sigma, sigma) -class IndexedAdagradM(Optimizer): - def __init__(self, params, lr=0.05, momentum=0.0, eps=1e-6): - if lr <= 0.0: - raise ValueError("Invalid learning rate: {}".format(lr)) - if momentum < 0.0: - raise ValueError("Invalid momentum value: {}".format(momentum)) - if eps <= 0.0: - raise ValueError("Invalid epsilon value: {}".format(eps)) - - defaults = dict(lr=lr, momentum=momentum, eps=eps) - super(IndexedAdagradM, self).__init__(params, defaults) - - for group in self.param_groups: - for p in group["params"]: - state = self.state[p] - state["acc"] = torch.full_like( - p, 0, memory_format=torch.preserve_format - ) - if momentum > 0: - state["mom"] = torch.full_like( - p, 0, memory_format=torch.preserve_format - ) - - def share_memory(self): - for group in self.param_groups: - for p in group["params"]: - state = self.state[p] - state["acc"].share_memory_() - if group["momentum"] > 0: - state["mom"].share_memory_() - - @torch.no_grad() - def step(self, closure=None): - loss = None - if closure is not None: - with torch.enable_grad(): - loss = closure() - for group in self.param_groups: - for p in group["params"]: - if p.grad is None: - continue - grad = p.grad - state = self.state[p] - clr = group["lr"] - momentum = group["momentum"] - if grad.is_sparse: - grad = grad.coalesce() - grad_indices = grad._indices()[0] - grad_values = grad._values() - accs = state["acc"][grad_indices] + grad_values.pow(2) - state["acc"].index_copy_(0, grad_indices, accs) - accs.add_(group["eps"]).sqrt_().mul_(-1 / clr) - if momentum > 0: - moma = state["mom"][grad_indices] - moma.mul_(momentum).add_(grad_values / accs) - state["mom"].index_copy_(0, grad_indices, moma) - p.index_add_(0, grad_indices, moma) - else: - p.index_add_(0, grad_indices, grad_values / accs) - else: - state["acc"].add_(grad.pow(2)) - accs = state["acc"].add(group["eps"]) - accs.sqrt_() - if momentum > 0: - mom = state["mom"] - mom.mul_(momentum).addcdiv_(grad, accs, value=-clr) - p.add_(mom) - else: - p.addcdiv_(grad, accs, value=-clr) - return loss - - class GRUEmbedding(nn.Module): def __init__(self, dim_in, dim_out): super(GRUEmbedding, self).__init__() @@ -118,6 +59,21 @@ def forward(self, X, H): class GRU4RecModel(nn.Module): + """GRU4Rec PyTorch architecture. + + The model computes a hidden state from the latest item id (and previous + hidden state) and scores all candidate items via either: + + - ``constrained_embedding=True``: tied input/output item embeddings. + - ``embedding > 0``: separate input embedding of given size. + - otherwise: a custom :class:`GRUEmbedding` directly producing the hidden. + + The model returns a ``(B, B+N)`` score matrix where columns 0..B-1 are + the in-batch positives and columns B..B+N-1 are shared sampled negatives. + Padding row ``n_items`` is included so it is safe to pass ``n_items`` as + a fake input id for cold-start fallback at score-time. + """ + def __init__( self, n_items, @@ -146,12 +102,14 @@ def __init__( self.elu_param = elu_param self.bpreg = bpreg self.loss = loss - self.set_loss_function(self.loss) self.start = 0 if constrained_embedding: n_input = layers[-1] elif embedding: - self.E = nn.Embedding(n_items, embedding, sparse=True) + # +1 for an extra padding row to support cold-start at inference + self.E = nn.Embedding( + n_items + 1, embedding, sparse=True, padding_idx=n_items + ) n_input = embedding else: self.GE = GRUEmbedding(n_items, layers[0]) @@ -165,8 +123,11 @@ def __init__( self.D.append(nn.Dropout(dropout_p_hidden)) self.G = nn.ModuleList(self.G) self.D = nn.ModuleList(self.D) - self.Wy = nn.Embedding(n_items, layers[-1], sparse=True) - self.By = nn.Embedding(n_items, 1, sparse=True) + # +1 row for padding (index = n_items) so we can pad cold-start inputs + self.Wy = nn.Embedding( + n_items + 1, layers[-1], sparse=True, padding_idx=n_items + ) + self.By = nn.Embedding(n_items + 1, 1, sparse=True, padding_idx=n_items) self.reset_parameters() @torch.no_grad() @@ -182,73 +143,30 @@ def reset_parameters(self): nn.init.zeros_(self.G[i].bias_hh) init_parameter_matrix(self.Wy.weight) nn.init.zeros_(self.By.weight) - - def set_loss_function(self, loss): - if loss == "cross-entropy": - self.loss_function = self.xe_loss_with_softmax - elif loss == "bpr-max": - self.loss_function = self.bpr_max_loss_with_elu - elif loss == "top1": - self.loss_function = self.top1 - else: - raise NotImplementedError - - def xe_loss_with_softmax(self, O, Y, M): - if self.logq > 0: - O = O - self.logq * torch.log( - torch.cat([self.P0[Y[:M]], self.P0[Y[M:]] ** self.sample_alpha]) - ) - X = torch.exp(O - O.max(dim=1, keepdim=True)[0]) - X = X / X.sum(dim=1, keepdim=True) - return -torch.sum(torch.log(torch.diag(X) + 1e-24)) - - def softmax_neg(self, X): - hm = 1.0 - torch.eye(*X.shape, out=torch.empty_like(X)) - X = X * hm - e_x = torch.exp(X - X.max(dim=1, keepdim=True)[0]) * hm - return e_x / e_x.sum(dim=1, keepdim=True) - - def bpr_max_loss_with_elu(self, O, Y, M): - if self.elu_param > 0: - O = nn.functional.elu(O, self.elu_param) - softmax_scores = self.softmax_neg(O) - target_scores = torch.diag(O) - target_scores = target_scores.reshape(target_scores.shape[0], -1) - return torch.sum( - ( - -torch.log( - torch.sum(torch.sigmoid(target_scores - O) * softmax_scores, dim=1) - + 1e-24 - ) - + self.bpreg * torch.sum((O**2) * softmax_scores, dim=1) - ) - ) - - def top1(self, O, Y, M): - target_scores = torch.diag(O) - target_scores = target_scores.reshape(target_scores.shape[0], -1) - return torch.sum( - ( - torch.mean( - torch.sigmoid(O - target_scores) + torch.sigmoid(O**2), axis=1 - ) - - torch.sigmoid(target_scores**2) / (M + self.n_sample) - ) - ) + if self.Wy.padding_idx is not None: + self.Wy.weight.data[self.Wy.padding_idx].zero_() + if self.By.padding_idx is not None: + self.By.weight.data[self.By.padding_idx].zero_() def _init_numpy_weights(self, shape): - sigma = np.sqrt(6.0 / (shape[0] + shape[1])) - m = np.random.rand(*shape).astype("float32") * 2 * sigma - sigma + sigma = float(np.sqrt(6.0 / (shape[0] + shape[1]))) + m = (np.random.rand(*shape) * 2 * sigma - sigma).astype("float32") return m @torch.no_grad() def _reset_weights_to_compatibility_mode(self): + """Reset weights using numpy RNG with seed 42 for reproducibility. + + Note: when ``constrained_embedding=False`` and ``embedding > 0`` the + ``E`` embedding includes a padding row (``n_items``), and ``Wy``/``By`` + also include padding rows. We only set the first ``n_items`` rows. + """ np.random.seed(42) if self.constrained_embedding: n_input = self.layers[-1] elif self.embedding: n_input = self.embedding - self.E.weight.set_( + self.E.weight.data[: self.n_items].copy_( torch.tensor( self._init_numpy_weights((self.n_items, n_input)), device=self.E.weight.device, @@ -256,20 +174,29 @@ def _reset_weights_to_compatibility_mode(self): ) else: n_input = self.n_items - m = [] - m.append(self._init_numpy_weights((n_input, self.layers[0]))) - m.append(self._init_numpy_weights((n_input, self.layers[0]))) - m.append(self._init_numpy_weights((n_input, self.layers[0]))) + m = [ + self._init_numpy_weights((n_input, self.layers[0])), + self._init_numpy_weights((n_input, self.layers[0])), + self._init_numpy_weights((n_input, self.layers[0])), + ] self.GE.Wx0.weight.set_( - torch.tensor(np.hstack(m), device=self.GE.Wx0.weight.device) + torch.tensor( + np.hstack(m), dtype=torch.float32, device=self.GE.Wx0.weight.device + ) + ) + m2 = [ + self._init_numpy_weights((self.layers[0], self.layers[0])), + self._init_numpy_weights((self.layers[0], self.layers[0])), + ] + self.GE.Wrz0.set_( + torch.tensor( + np.hstack(m2), dtype=torch.float32, device=self.GE.Wrz0.device + ) ) - m2 = [] - m2.append(self._init_numpy_weights((self.layers[0], self.layers[0]))) - m2.append(self._init_numpy_weights((self.layers[0], self.layers[0]))) - self.GE.Wrz0.set_(torch.tensor(np.hstack(m2), device=self.GE.Wrz0.device)) self.GE.Wh0.set_( torch.tensor( self._init_numpy_weights((self.layers[0], self.layers[0])), + dtype=torch.float32, device=self.GE.Wh0.device, ) ) @@ -277,19 +204,27 @@ def _reset_weights_to_compatibility_mode(self): torch.zeros((self.layers[0] * 3,), device=self.GE.Bh0.device) ) for i in range(self.start, len(self.layers)): - m = [] - m.append(self._init_numpy_weights((n_input, self.layers[i]))) - m.append(self._init_numpy_weights((n_input, self.layers[i]))) - m.append(self._init_numpy_weights((n_input, self.layers[i]))) + m = [ + self._init_numpy_weights((n_input, self.layers[i])), + self._init_numpy_weights((n_input, self.layers[i])), + self._init_numpy_weights((n_input, self.layers[i])), + ] self.G[i].weight_ih.set_( - torch.tensor(np.vstack(m), device=self.G[i].weight_ih.device) + torch.tensor( + np.vstack(m), dtype=torch.float32, device=self.G[i].weight_ih.device + ) ) - m2 = [] - m2.append(self._init_numpy_weights((self.layers[i], self.layers[i]))) - m2.append(self._init_numpy_weights((self.layers[i], self.layers[i]))) - m2.append(self._init_numpy_weights((self.layers[i], self.layers[i]))) + m2 = [ + self._init_numpy_weights((self.layers[i], self.layers[i])), + self._init_numpy_weights((self.layers[i], self.layers[i])), + self._init_numpy_weights((self.layers[i], self.layers[i])), + ] self.G[i].weight_hh.set_( - torch.tensor(np.vstack(m2), device=self.G[i].weight_hh.device) + torch.tensor( + np.vstack(m2), + dtype=torch.float32, + device=self.G[i].weight_hh.device, + ) ) self.G[i].bias_hh.set_( torch.zeros((self.layers[i] * 3,), device=self.G[i].bias_hh.device) @@ -297,15 +232,20 @@ def _reset_weights_to_compatibility_mode(self): self.G[i].bias_ih.set_( torch.zeros((self.layers[i] * 3,), device=self.G[i].bias_ih.device) ) - self.Wy.weight.set_( + self.Wy.weight.data[: self.n_items].copy_( torch.tensor( self._init_numpy_weights((self.n_items, self.layers[-1])), + dtype=torch.float32, device=self.Wy.weight.device, ) ) - self.By.weight.set_( + self.By.weight.data[: self.n_items].copy_( torch.zeros((self.n_items, 1), device=self.By.weight.device) ) + if self.Wy.padding_idx is not None: + self.Wy.weight.data[self.Wy.padding_idx].zero_() + if self.By.padding_idx is not None: + self.By.weight.data[self.By.padding_idx].zero_() def embed_constrained(self, X, Y=None): if Y is not None: @@ -359,8 +299,8 @@ def hidden_step(self, X, H, training=False): return X def score_items(self, X, O, B): - O = torch.mm(X, O.T) + B.T - return O + out = torch.mm(X, O.T) + B.T + return out def forward(self, X, H, Y, training=False): E, O, B = self.embed(X, H, Y) @@ -373,118 +313,13 @@ def forward(self, X, H, Y, training=False): return R -def io_iter( - s_iter, uir_tuple, n_sample=0, sample_alpha=0, rng=None, batch_size=1, shuffle=False -): - """Paralellize mini-batch of input-output items. Create an iterator over data yielding batch of input item indices, batch of output item indices, - batch of start masking, batch of end masking, and batch of valid ids (relative positions of current sequences in the last batch). - - Parameters - ---------- - batch_size: int, optional, default = 1 - - shuffle: bool, optional, default: False - If `True`, orders of triplets will be randomized. If `False`, default orders kept. - - Returns - ------- - iterator : batch of input item indices, batch of output item indices, batch of starting sequence mask, batch of ending sequence mask, batch of valid ids +def score(model, layers, device, history_items): + """Score all items given a flat ``history_items`` list of integers. + Returns a numpy array of length ``n_items`` (or ``n_items + 1`` if the + output embedding includes a padding row; in that case the caller is + responsible for trimming). """ - rng = rng if rng is not None else get_rng(None) - start_mask = np.zeros(batch_size, dtype="int") - end_mask = np.ones(batch_size, dtype="int") - input_iids = None - output_iids = None - l_pool = [] - c_pool = [None for _ in range(batch_size)] - sizes = np.zeros(batch_size, dtype="int") - if n_sample > 0: - item_count = Counter(uir_tuple[1]) - item_indices = np.array( - [iid for iid, _ in item_count.most_common()], dtype="int" - ) - item_dist = ( - np.array([cnt for _, cnt in item_count.most_common()], dtype="float") - ** sample_alpha - ) - item_dist = item_dist / item_dist.sum() - for _, batch_mapped_ids in s_iter(batch_size, shuffle): - l_pool += batch_mapped_ids - while len(l_pool) > 0: - if end_mask.sum() == 0: - input_iids = uir_tuple[1][ - [mapped_ids[-sizes[idx]] for idx, mapped_ids in enumerate(c_pool)] - ] - output_iids = uir_tuple[1][ - [ - mapped_ids[-sizes[idx] + 1] - for idx, mapped_ids in enumerate(c_pool) - ] - ] - sizes -= 1 - for idx, size in enumerate(sizes): - if size == 1: - end_mask[idx] = 1 - if n_sample > 0: - negative_samples = rng.choice( - item_indices, size=n_sample, replace=True, p=item_dist - ) - output_iids = np.concatenate([output_iids, negative_samples]) - yield input_iids, output_iids, start_mask, np.arange( - batch_size, dtype="int" - ) - start_mask.fill(0) # reset start masking - while end_mask.sum() > 0 and len(l_pool) > 0: - next_seq = l_pool.pop() - if len(next_seq) > 1: - idx = np.nonzero(end_mask)[0][0] - end_mask[idx] = 0 - start_mask[idx] = 1 - c_pool[idx] = next_seq - sizes[idx] = len(c_pool[idx]) - - valid_id = np.ones(batch_size, dtype="int") - while True: - for idx, size in enumerate(sizes): - if size == 1: - end_mask[idx] = 1 - valid_id[idx] = 0 - input_iids = uir_tuple[1][ - [ - mapped_ids[-sizes[idx]] - for idx, mapped_ids in enumerate(c_pool) - if sizes[idx] > 1 - ] - ] - output_iids = uir_tuple[1][ - [ - mapped_ids[-sizes[idx] + 1] - for idx, mapped_ids in enumerate(c_pool) - if sizes[idx] > 1 - ] - ] - sizes -= 1 - for idx, size in enumerate(sizes): - if size == 1: - end_mask[idx] = 1 - start_mask = start_mask[np.nonzero(valid_id)[0]] - end_mask = end_mask[np.nonzero(valid_id)[0]] - sizes = sizes[np.nonzero(valid_id)[0]] - c_pool = [_ for _, valid in zip(c_pool, valid_id) if valid > 0] - if n_sample > 0: - negative_samples = rng.choice( - item_indices, size=n_sample, replace=True, p=item_dist - ) - output_iids = np.concatenate([output_iids, negative_samples]) - yield input_iids, output_iids, start_mask, np.nonzero(valid_id)[0] - valid_id = np.ones(len(input_iids), dtype="int") - if end_mask.sum() == len(input_iids): - break - start_mask.fill(0) # reset start masking - - -def score(model, layers, device, history_items): model.eval() H = [] for i in range(len(layers)): @@ -493,6 +328,7 @@ def score(model, layers, device, history_items): (1, layers[i]), dtype=torch.float32, requires_grad=False, device=device ) ) + O = None for iid in history_items: O = model.forward( torch.tensor([iid], requires_grad=False, device=device), @@ -500,4 +336,6 @@ def score(model, layers, device, history_items): None, training=False, ) + if O is None: + return None return O.squeeze().cpu().detach().numpy() diff --git a/cornac/models/gru4rec/recom_gru4rec.py b/cornac/models/gru4rec/recom_gru4rec.py index 7e161d924..a753b7891 100644 --- a/cornac/models/gru4rec/recom_gru4rec.py +++ b/cornac/models/gru4rec/recom_gru4rec.py @@ -1,4 +1,4 @@ -# Copyright 2023 The Cornac Authors. All Rights Reserved. +# Copyright 2026 The Cornac Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,17 +13,41 @@ # limitations under the License. # ============================================================================ -import numpy as np -from tqdm.auto import trange from collections import Counter -from cornac.models.recommender import NextItemRecommender +import numpy as np +from tqdm.auto import trange from ...utils import get_rng - - -class GRU4Rec(NextItemRecommender): - """Session-based Recommendations with Recurrent Neural Networks +from ..recommender import SequentialRecommender +from ..seq_utils import io_iter, user_io_iter + +SUPPORTED_LOSSES = ( + "cross-entropy", + "xe_softmax", + "softmax", + "bpr", + "bpr-max", + "top1", + "bce", + "ce", +) + + +class GRU4Rec(SequentialRecommender): + """Session-based / Session-aware Recommendations with Recurrent Neural Networks. + + The model is the same GRU-based architecture in either mode; only the + way training data is iterated differs: + + - ``mode="session-based"``: items are processed per session and the + hidden state is reset at session boundaries. This matches the original + GRU4Rec (Hidasi et al., 2015). + - ``mode="session-aware"``: per-user chronological item sequences are + concatenated across sessions and processed as a single sequence. The + hidden state is reset at *user* boundaries instead. User IDs are not + used in scoring but the data layout allows the RNN to capture + cross-session dependencies. Parameters ---------- @@ -31,35 +55,44 @@ class GRU4Rec(NextItemRecommender): The name of the recommender model. layers: list of int, optional, default: [100] - The number of hidden units in each layer + The number of hidden units in each layer. loss: str, optional, default: 'cross-entropy' - Select the loss function. + Loss function. Supported: 'cross-entropy', 'bpr', 'bpr-max', 'top1', + 'bce', 'ce'. + + mode: str, optional, default: 'session-based' + Training data iteration strategy. One of 'session-based' or + 'session-aware'. batch_size: int, optional, default: 512 - Batch size + Batch size. dropout_p_embed: float, optional, default: 0.0 - Dropout ratio for embedding layers + Dropout ratio for embedding layer. dropout_p_hidden: float, optional, default: 0.0 - Dropout ratio for hidden layers + Dropout ratio for hidden layers. learning_rate: float, optional, default: 0.05 - Learning rate for the optimizer + Learning rate for the optimizer. momentum: float, optional, default: 0.0 - Momentum for adaptive learning rate + Momentum for the adaptive learning rate optimizer. sample_alpha: float, optional, default: 0.5 - Tradeoff factor controls the contribution of negative sample towards final loss + Tradeoff factor controls the contribution of negative samples + towards the final loss (popularity-based sampling exponent). n_sample: int, optional, default: 2048 - Number of negative samples + Number of additional shared negative samples per mini-batch. embedding: int, optional, default: 0 + Size of the separate input embedding. ``0`` means no separate + embedding; use ``"layersize"`` to set it to ``layers[0]``. constrained_embedding: bool, optional, default: True + Whether input and output item embeddings are tied. n_epochs: int, optional, default: 10 @@ -67,18 +100,19 @@ class GRU4Rec(NextItemRecommender): Regularization coefficient for 'bpr-max' loss. elu_param: float, optional, default: 0.5 - Elu param for 'bpr-max' loss + ELU parameter for 'bpr-max' loss. - logq: float, optional, default: 0, - LogQ correction to offset the sampling bias affecting 'cross-entropy' loss. + logq: float, optional, default: 0.0 + LogQ correction strength to offset sampling bias for the + cross-entropy loss. device: str, optional, default: 'cpu' Set to 'cuda' for GPU support. - trainable: boolean, optional, default: True - When False, the model will not be re-trained, and input of pre-trained parameters are required. + trainable: bool, optional, default: True + When False, the model will not be re-trained. - verbose: boolean, optional, default: True + verbose: bool, optional, default: False When True, running logs are displayed. seed: int, optional, default: None @@ -89,7 +123,6 @@ class GRU4Rec(NextItemRecommender): Hidasi, B., Karatzoglou, A., Baltrunas, L., & Tikk, D. (2015). Session-based recommendations with recurrent neural networks. arXiv preprint arXiv:1511.06939. - """ def __init__( @@ -97,6 +130,7 @@ def __init__( name="GRU4Rec", layers=[100], loss="cross-entropy", + mode="session-based", batch_size=512, dropout_p_embed=0.0, dropout_p_hidden=0.0, @@ -115,7 +149,11 @@ def __init__( verbose=False, seed=None, ): - super().__init__(name, trainable=trainable, verbose=verbose) + super().__init__(name, mode=mode, trainable=trainable, verbose=verbose) + if loss not in SUPPORTED_LOSSES: + raise ValueError( + f"loss='{loss}' not supported; choose from {SUPPORTED_LOSSES}" + ) self.layers = layers self.loss = loss self.batch_size = batch_size @@ -135,22 +173,42 @@ def __init__( self.seed = seed self.rng = get_rng(seed) + def _build_loss_kwargs(self): + return dict( + P0=self.P0, + logq=self.logq, + sample_alpha=self.sample_alpha, + batch_size=None, # filled per-batch + bpreg=self.bpreg, + elu_param=self.elu_param, + n_sample=self.n_sample, + ) + def fit(self, train_set, val_set=None): super().fit(train_set, val_set) + if not self.trainable: + return self + import torch - from .gru4rec import GRU4RecModel, IndexedAdagradM, io_iter + from .gru4rec import GRU4RecModel + from ..seq_utils.losses import get_loss_function + from ..seq_utils.optim import IndexedAdagradM item_freq = Counter(self.train_set.uir_tuple[1]) - P0 = torch.tensor( - [item_freq[iid] for (_, iid) in self.train_set.iid_map.items()], - dtype=torch.float32, - device=self.device, - ) if self.logq > 0 else None + self.P0 = ( + torch.tensor( + [item_freq[iid] for (_, iid) in self.train_set.iid_map.items()], + dtype=torch.float32, + device=self.device, + ) + if self.logq > 0 + else None + ) self.model = GRU4RecModel( n_items=self.total_items, - P0=P0, + P0=self.P0, layers=self.layers, dropout_p_embed=self.dropout_p_embed, dropout_p_hidden=self.dropout_p_hidden, @@ -162,60 +220,86 @@ def fit(self, train_set, val_set=None): elu_param=self.elu_param, loss=self.loss, ).to(self.device) - self.model._reset_weights_to_compatibility_mode() + loss_fn = get_loss_function(self.loss) + loss_kwargs = self._build_loss_kwargs() + opt = IndexedAdagradM( self.model.parameters(), self.learning_rate, self.momentum ) progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose) for _ in progress_bar: - H = [] - for i in range(len(self.layers)): - H.append( - torch.zeros( - (self.batch_size, self.layers[i]), - dtype=torch.float32, - requires_grad=False, - device=self.device, - ) + H = [ + torch.zeros( + (self.batch_size, self.layers[i]), + dtype=torch.float32, + requires_grad=False, + device=self.device, ) - total_loss = 0 + for i in range(len(self.layers)) + ] + total_loss = 0.0 cnt = 0 - for inc, (in_iids, out_iids, start_mask, valid_id) in enumerate( - io_iter( - s_iter=self.train_set.s_iter, - uir_tuple=self.train_set.uir_tuple, - n_sample=self.n_sample, - sample_alpha=self.sample_alpha, - rng=self.rng, - batch_size=self.batch_size, - shuffle=True, - ) - ): + data_iter = self._build_data_iter() + for inc, batch in enumerate(data_iter): + # Unified 5-tuple from io_iter / user_io_iter: + # (in_uids, in_iids, out_iids, start_mask, valid_id) + _, in_iids, out_iids, start_mask, valid_id = batch for i in range(len(H)): H[i][np.nonzero(start_mask)[0], :] = 0 H[i].detach_() H[i] = H[i][valid_id] - in_iids = torch.tensor(in_iids, requires_grad=False, device=self.device) - out_iids = torch.tensor( - out_iids, requires_grad=False, device=self.device + in_iids_t = torch.tensor( + in_iids, dtype=torch.long, requires_grad=False, device=self.device + ) + out_iids_t = torch.tensor( + out_iids, dtype=torch.long, requires_grad=False, device=self.device ) self.model.zero_grad() - R = self.model.forward(in_iids, H, out_iids, training=True) - L = self.model.loss_function(R, out_iids, len(in_iids)) / len(in_iids) + R = self.model.forward(in_iids_t, H, out_iids_t, training=True) + loss_kwargs["batch_size"] = len(in_iids) + loss_kwargs["out_iids"] = out_iids_t + L = loss_fn(R, **loss_kwargs) L.backward() opt.step() total_loss += L.cpu().detach().numpy() * len(in_iids) cnt += len(in_iids) - if inc % 10 == 0: + if inc % 10 == 0 and cnt > 0: progress_bar.set_postfix(loss=(total_loss / cnt)) return self - def score(self, user_idx, history_items, **kwargs): - from .gru4rec import score + def _build_data_iter(self): + if self.mode == "session-aware": + return user_io_iter( + train_set=self.train_set, + n_sample=self.n_sample, + sample_alpha=self.sample_alpha, + rng=self.rng, + batch_size=self.batch_size, + shuffle=True, + ) + return io_iter( + s_iter=self.train_set.s_iter, + uir_tuple=self.train_set.uir_tuple, + n_sample=self.n_sample, + sample_alpha=self.sample_alpha, + rng=self.rng, + batch_size=self.batch_size, + shuffle=True, + ) - if len(history_items) > 0: - return score(self.model, self.layers, self.device, history_items) - return np.ones(self.total_items, dtype="float") + def score(self, user_idx, history_items, **kwargs): + from .gru4rec import score as _score + + flat_history = self._flatten_history(history_items) + if len(flat_history) == 0: + return np.ones(self.total_items, dtype="float") + scores = _score(self.model, self.layers, self.device, flat_history) + if scores is None: + return np.ones(self.total_items, dtype="float") + # The output embedding has +1 padding row; trim if present + if scores.shape[-1] == self.total_items + 1: + scores = scores[: self.total_items] + return scores diff --git a/cornac/models/recommender.py b/cornac/models/recommender.py index 5d8514e95..ab9dccefc 100644 --- a/cornac/models/recommender.py +++ b/cornac/models/recommender.py @@ -764,3 +764,75 @@ def score(self, user_idx, history_items, **kwargs): """ raise NotImplementedError("The algorithm is not able to make score prediction!") + + +SUPPORTED_MODES = ("session-based", "session-aware") + + +class SequentialRecommender(NextItemRecommender): + """Generic class for a sequential next-item recommender model. + + Sequential recommenders consume a user's interaction history (either a + flat list of item ids or a list of session item lists) and operate in one + of two modes: + + - ``"session-based"``: only the most recent session is used as context; + the hidden state / history window resets at session boundaries. + - ``"session-aware"``: the user's chronological cross-session item + sequence is used as context. + + Subclasses should set the desired default in their own ``__init__`` and + forward ``mode`` to ``super().__init__``. + + Parameters + ---------- + name: str, required + Name of the recommender model. + + mode: str, optional, default: 'session-based' + One of ``'session-based'`` or ``'session-aware'``. + + trainable: boolean, optional, default: True + When False, the model is not trainable. + + verbose: boolean, optional, default: False + When True, running logs are displayed. + """ + + def __init__(self, name, mode="session-based", trainable=True, verbose=False): + if mode not in SUPPORTED_MODES: + raise ValueError( + f"mode='{mode}' not supported; choose from {SUPPORTED_MODES}" + ) + super().__init__(name=name, trainable=trainable, verbose=verbose) + self.mode = mode + + def _flatten_history(self, history_items): + """Coerce ``history_items`` into a flat list of item ids. + + Accepts both: + + - ``[i0, i1, i2, ...]`` (flat list, as produced by + :class:`cornac.eval_methods.NextItemEvaluation`). + - ``[[i0, i1], [i2, i3], ...]`` (list of session item lists, as + produced by the session-aware ``NextSessionEvaluation`` flow). + + For ``mode == "session-based"`` the list of sessions is collapsed to + just the items of the *last non-empty* session. For + ``mode == "session-aware"`` all sessions are concatenated + chronologically. + """ + if history_items is None or len(history_items) == 0: + return [] + first = history_items[0] + if hasattr(first, "__iter__") and not isinstance(first, (str, bytes)): + if self.mode == "session-based": + for sess in reversed(history_items): + if len(sess) > 0: + return list(sess) + return [] + flat = [] + for sess in history_items: + flat.extend(list(sess)) + return flat + return list(history_items) diff --git a/cornac/models/sasrec/__init__.py b/cornac/models/sasrec/__init__.py new file mode 100644 index 000000000..e163b9a7d --- /dev/null +++ b/cornac/models/sasrec/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from .recom_sasrec import SASRec diff --git a/cornac/models/sasrec/recom_sasrec.py b/cornac/models/sasrec/recom_sasrec.py new file mode 100644 index 000000000..b48a9f0c3 --- /dev/null +++ b/cornac/models/sasrec/recom_sasrec.py @@ -0,0 +1,246 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +from tqdm.auto import trange + +from ...utils import get_rng +from ..recommender import SequentialRecommender +from ..seq_utils import session_seq_iter, user_seq_iter + +SUPPORTED_LOSSES = ( + "bce", + "ce", + "bpr", + "bpr-max", + "softmax", + "cross-entropy", + "xe_softmax", + "top1", +) + + +class SASRec(SequentialRecommender): + """SASRec: Self-Attentive Sequential Recommendation. + + Parameters + ---------- + name: string, default: 'SASRec' + The name of the recommender model. + + embedding_dim: int, optional, default: 100 + Item embedding dimension. + + mode: str, optional, default: 'session-aware' + Training data iteration strategy. One of 'session-based' (each + training sequence comes from a single session) or 'session-aware' + (each training sequence comes from a user's chronological + cross-session item history). + + loss: str, optional, default: 'ce' + Loss function. Supported: 'bce', 'ce', 'bpr', 'bpr-max', 'softmax' + (a.k.a 'cross-entropy' / 'xe_softmax'), 'top1'. + + batch_size: int, optional, default: 512 + + learning_rate: float, optional, default: 0.001 + + n_sample: int, optional, default: 2048 + Number of negative samples shared per mini-batch. + + sample_alpha: float, optional, default: 0.5 + Popularity-based negative sampling exponent. + + n_epochs: int, optional, default: 10 + + max_len: int, optional, default: 50 + + num_blocks: int, optional, default: 2 + + num_heads: int, optional, default: 1 + + dropout: float, optional, default: 0.2 + + l2_reg: float, optional, default: 0.0 + + bpreg: float, optional, default: 1.0 + Regularization coefficient for the 'bpr-max' loss. + + elu_param: float, optional, default: 0.5 + ELU parameter for the 'bpr-max' loss. + + device: str, optional, default: 'cpu' + + use_pos_emb: bool, optional, default: True + + trainable, verbose, seed: standard recommender parameters. + + References + ---------- + Kang, W.-C., & McAuley, J. (2018). Self-attentive sequential + recommendation. ICDM. + """ + + def __init__( + self, + name="SASRec", + embedding_dim=100, + mode="session-aware", + loss="ce", + batch_size=512, + learning_rate=0.001, + n_sample=2048, + sample_alpha=0.5, + n_epochs=10, + max_len=50, + num_blocks=2, + num_heads=1, + dropout=0.2, + l2_reg=0.0, + bpreg=1.0, + elu_param=0.5, + device="cpu", + use_pos_emb=True, + trainable=True, + verbose=False, + seed=None, + ): + super().__init__(name, mode=mode, trainable=trainable, verbose=verbose) + if loss not in SUPPORTED_LOSSES: + raise ValueError( + f"loss='{loss}' not supported; choose from {SUPPORTED_LOSSES}" + ) + self.embedding_dim = embedding_dim + self.loss = loss + self.batch_size = batch_size + self.learning_rate = learning_rate + self.n_sample = n_sample + self.sample_alpha = sample_alpha + self.n_epochs = n_epochs + self.max_len = max_len + self.num_blocks = num_blocks + self.num_heads = num_heads + self.dropout = dropout + self.l2_reg = l2_reg + self.bpreg = bpreg + self.elu_param = elu_param + self.device = device + self.use_pos_emb = use_pos_emb + self.seed = seed + self.rng = get_rng(seed) + + def _build_data_iter(self, pad_index): + kwargs = dict( + train_set=self.train_set, + pad_index=pad_index, + batch_size=self.batch_size, + max_len=self.max_len, + n_sample=self.n_sample, + sample_alpha=self.sample_alpha, + rng=self.rng, + shuffle=True, + ) + if self.mode == "session-aware": + return user_seq_iter(**kwargs) + return session_seq_iter(**kwargs) + + def fit(self, train_set, val_set=None): + super().fit(train_set, val_set) + if not self.trainable: + return self + + import torch + + from .sasrec import SASRecModel + from ..seq_utils.losses import get_loss_function + + torch.manual_seed(self.seed if self.seed is not None else 0) + + self.pad_idx = self.total_items + self.model = SASRecModel( + item_num=self.total_items, + embedding_dim=self.embedding_dim, + maxlen=self.max_len, + n_layers=self.num_blocks, + n_heads=self.num_heads, + use_pos_emb=self.use_pos_emb, + dropout=self.dropout, + pad_idx=self.pad_idx, + device=self.device, + ) + + loss_fn = get_loss_function(self.loss) + loss_kwargs = dict( + bpreg=self.bpreg, + elu_param=self.elu_param, + n_sample=self.n_sample, + ) + + opt = torch.optim.Adam( + self.model.parameters(), lr=self.learning_rate, betas=(0.9, 0.98) + ) + + progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose) + for _ in progress_bar: + self.model.train() + total_loss = 0.0 + cnt = 0 + for inc, (in_uids, hist_iids, out_iids) in enumerate( + self._build_data_iter(self.pad_idx) + ): + if len(hist_iids) < 2: + continue + hist_iids_t = torch.tensor( + hist_iids, dtype=torch.long, device=self.device, requires_grad=False + ) + out_iids_t = torch.tensor( + out_iids, dtype=torch.long, device=self.device, requires_grad=False + ) + + self.model.zero_grad() + item_scores = self.model( + None, hist_iids_t, out_iids_t, return_hidden=False + ) + + L = loss_fn( + item_scores, + out_iids=out_iids_t, + batch_size=len(hist_iids), + **loss_kwargs, + ) + if self.l2_reg > 0: + for p in self.model.parameters(): + L = L + self.l2_reg * torch.norm(p) + + L.backward() + opt.step() + + total_loss += L.cpu().detach().numpy() * len(hist_iids) + cnt += len(hist_iids) + if inc % 10 == 0 and cnt > 0: + progress_bar.set_postfix(loss=(total_loss / cnt)) + return self + + def score(self, user_idx, history_items, **kwargs): + import torch + + flat = self._flatten_history(history_items) + if len(flat) == 0: + return np.ones(self.total_items, dtype="float") + log_seq = [self.pad_idx] * (self.max_len - len(flat)) + list(flat) + log_seq = log_seq[-self.max_len :] + log_seq_t = torch.tensor([log_seq], dtype=torch.long, device=self.device) + self.model.eval() + return self.model.predict(user_idx, log_seq_t) diff --git a/cornac/models/sasrec/sasrec.py b/cornac/models/sasrec/sasrec.py new file mode 100644 index 000000000..e785d0f2e --- /dev/null +++ b/cornac/models/sasrec/sasrec.py @@ -0,0 +1,186 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import torch +import torch.nn as nn + + +class PointWiseFeedForward(nn.Module): + def __init__(self, hidden_units, dropout_rate): + super(PointWiseFeedForward, self).__init__() + conv1 = nn.Conv1d(hidden_units, hidden_units, kernel_size=1) + dropout1 = nn.Dropout(p=dropout_rate) + relu = nn.ReLU() + conv2 = nn.Conv1d(hidden_units, hidden_units, kernel_size=1) + dropout2 = nn.Dropout(p=dropout_rate) + self.process = nn.Sequential(conv1, dropout1, relu, conv2, dropout2) + + def forward(self, inputs): + outputs = self.process(inputs.transpose(-1, -2)) + outputs = outputs.transpose(-1, -2) + outputs += inputs + return outputs + + +class SASRecModel(nn.Module): + """SASRec self-attention model (Kang & McAuley, 2018). + + Operates on sequences of past item ids, returning the last-position + representation. Item ids are integers in ``[0, item_num)`` with + ``item_num`` being used as the padding index. + + The model produces a ``(B, B+N)`` score matrix when called as + ``forward(_, hist_iids, out_iids, return_hidden=False)`` where + ``out_iids`` contains the ``B`` in-batch positives followed by ``N`` + shared negatives, matching the contract expected by the loss functions + in :mod:`cornac.models.seq_utils`. + """ + + def __init__( + self, + item_num, + embedding_dim=100, + maxlen=20, + n_layers=2, + n_heads=1, + use_pos_emb=True, + use_biases=True, + dropout=0.2, + pad_idx=-1, + init_std=0.02, + device="cpu", + ): + super(SASRecModel, self).__init__() + self.item_num = item_num + self.pad_idx = pad_idx if pad_idx >= 0 else item_num + self.maxlen = maxlen + self.dev = device + self.init_std = init_std + + # +1 row for the padding entry at pad_idx + self.item_emb = nn.Embedding( + self.item_num + 1, embedding_dim, padding_idx=self.pad_idx + ) + if use_pos_emb: + self.pos_emb = nn.Embedding(maxlen + 1, embedding_dim) + if use_biases: + self.item_biases = nn.Embedding( + self.item_num + 1, 1, padding_idx=self.pad_idx + ) + self.emb_dropout = nn.Dropout(p=dropout) + + self.attention_layernorms = nn.ModuleList() + self.attention_layers = nn.ModuleList() + self.forward_layernorms = nn.ModuleList() + self.forward_layers = nn.ModuleList() + self.last_layernorm = nn.LayerNorm(embedding_dim, eps=1e-8) + + for _ in range(n_layers): + self.attention_layernorms.append(nn.LayerNorm(embedding_dim, eps=1e-8)) + self.attention_layers.append( + nn.MultiheadAttention(embedding_dim, n_heads, dropout) + ) + self.forward_layernorms.append(nn.LayerNorm(embedding_dim, eps=1e-8)) + self.forward_layers.append(PointWiseFeedForward(embedding_dim, dropout)) + + self.apply(self._init_weights) + self.to(device) + + def _init_weights(self, module): + if isinstance(module, (nn.Linear, nn.Conv1d)): + module.weight.data.normal_(mean=0.0, std=self.init_std) + if module.bias is not None: + module.bias.data.zero_() + elif isinstance(module, nn.Embedding): + module.weight.data.normal_(mean=0.0, std=self.init_std) + if module.padding_idx is not None: + module.weight.data[module.padding_idx].zero_() + elif isinstance(module, nn.LayerNorm): + module.bias.data.zero_() + module.weight.data.fill_(1.0) + + def _score_items(self, hidden, cand_items, biases=None): + scores = torch.mm(hidden, cand_items.T) + if biases is not None: + return scores + biases.T + return scores + + def _encode(self, hist_iids): + # hist_iids: (B, T) + seqs = self.item_emb(hist_iids) + seqs = seqs * (self.item_emb.embedding_dim**0.5) + positions = np.tile(np.arange(hist_iids.shape[1]), [hist_iids.shape[0], 1]) + if hasattr(self, "pos_emb"): + seqs = seqs + self.pos_emb( + torch.tensor(positions, dtype=torch.long, device=seqs.device) + ) + seqs = self.emb_dropout(seqs) + + timeline_mask = (hist_iids == self.pad_idx).to( + dtype=seqs.dtype, device=seqs.device + ) + seqs = seqs * (1.0 - timeline_mask).unsqueeze(-1) + + tl = seqs.shape[1] + attention_mask = ~torch.tril( + torch.ones((tl, tl), dtype=torch.bool, device=seqs.device) + ) + + for i in range(len(self.attention_layers)): + seqs_t = torch.transpose(seqs, 0, 1) + Q = self.attention_layernorms[i](seqs_t) + mha_out, _ = self.attention_layers[i]( + Q, seqs_t, seqs_t, attn_mask=attention_mask + ) + seqs_t = Q + mha_out + seqs = torch.transpose(seqs_t, 0, 1) + seqs = self.forward_layernorms[i](seqs) + seqs = self.forward_layers[i](seqs) + seqs = seqs * (1.0 - timeline_mask).unsqueeze(-1) + + log_feats = self.last_layernorm(seqs) + return log_feats[:, -1, :] + + def forward(self, user_ids, hist_iids, out_iids, return_hidden=False): + hidden = self._encode(hist_iids) + item_emb = self.item_emb(out_iids) + biases = self.item_biases(out_iids) if hasattr(self, "item_biases") else None + if return_hidden: + return hidden, item_emb, biases + return self._score_items(hidden, item_emb, biases) + + @torch.no_grad() + def predict(self, user_ids, log_seqs, item_indices=None): + """Score all real items for a single padded sequence. + + Returns a 1-D numpy array of size ``item_num`` (padding column is + already stripped). + """ + if item_indices is None: + item_indices = torch.arange(self.item_num, device=self.dev) + else: + item_indices = torch.as_tensor( + item_indices, dtype=torch.long, device=self.dev + ) + if not isinstance(log_seqs, torch.Tensor): + log_seqs = torch.as_tensor(log_seqs, dtype=torch.long, device=self.dev) + hidden = self._encode(log_seqs) + item_emb = self.item_emb(item_indices) + biases = ( + self.item_biases(item_indices) if hasattr(self, "item_biases") else None + ) + scores = self._score_items(hidden, item_emb, biases) + return scores.squeeze().detach().cpu().numpy() diff --git a/cornac/models/seq_utils/__init__.py b/cornac/models/seq_utils/__init__.py new file mode 100644 index 000000000..67a9be9a6 --- /dev/null +++ b/cornac/models/seq_utils/__init__.py @@ -0,0 +1,69 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Shared utilities for sequential recommendation models. + +The subpackage is organised so that ``cornac`` can still be imported in an +environment without :mod:`torch`. Only the numpy-only iterators are +re-exported here; the torch-using helpers must be imported explicitly from +their dedicated modules inside ``fit()``:: + + # at module top — safe, numpy only + from ..seq_utils import io_iter, user_io_iter, session_seq_iter, user_seq_iter + + def fit(self, train_set, val_set=None): + import torch + from ..seq_utils.losses import get_loss_function + from ..seq_utils.optim import IndexedAdagradM + ... + +Unified output contract +----------------------- + +* **Model forward output**: every sequential model returns an + ``item_scores`` matrix of shape ``(B, B + N)`` where the diagonal holds the + positive target scores and the remaining columns are in-batch + sampled + negatives. +* **Loss functions** (see :mod:`.losses`) all consume that ``(B, B+N)`` + matrix and return a scalar. +* **Iterators** (see :mod:`.iterators`) yield uniform tuples regardless of + mode: + + ========================= ================================================= + Iterator Yielded tuple + ========================= ================================================= + ``io_iter`` ``(in_uids, in_iids, out_iids, start_mask, + valid_id)`` — per-item RNN, session-based. + ``user_io_iter`` ``(in_uids, in_iids, out_iids, start_mask, + valid_id)`` — per-item RNN, session-aware. + ``session_seq_iter`` ``(in_uids, hist_iids, out_iids)`` — sequence + models, session-based. + ``user_seq_iter`` ``(in_uids, hist_iids, out_iids)`` — sequence + models, session-aware. + ========================= ================================================= +""" + +from .iterators import ( + io_iter, + user_io_iter, + session_seq_iter, + user_seq_iter, +) + +__all__ = [ + "io_iter", + "user_io_iter", + "session_seq_iter", + "user_seq_iter", +] diff --git a/cornac/models/seq_utils/iterators.py b/cornac/models/seq_utils/iterators.py new file mode 100644 index 000000000..73c4f4dfc --- /dev/null +++ b/cornac/models/seq_utils/iterators.py @@ -0,0 +1,390 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Mini-batch iterators for session-based and session-aware training. + +This module is intentionally numpy-only so it can be imported from the +top of any ``recom_*.py`` file without pulling in :mod:`torch`. +""" + +from collections import Counter + +import numpy as np + +from ...utils.common import get_rng + + +def _build_neg_sampler(uir_tuple, sample_alpha): + """Precompute popularity-based sampling distribution over items.""" + item_count = Counter(uir_tuple[1]) + item_indices = np.array([iid for iid, _ in item_count.most_common()], dtype="int") + item_dist = np.array([cnt for _, cnt in item_count.most_common()], dtype="float") ** sample_alpha + item_dist = item_dist / item_dist.sum() + return item_indices, item_dist + + +def io_iter(s_iter, uir_tuple, n_sample=0, sample_alpha=0, rng=None, batch_size=1, shuffle=False): + """Session-based per-item iterator (parallel sessions). + + Yields per training step a 5-tuple + ``(in_uids, in_iids, out_iids, start_mask, valid_id)`` where: + + - ``in_uids``: user id owning each parallel slot's current session. + Shape ``(B',)``. + - ``in_iids``: current input item id in each slot. Shape ``(B',)``. + - ``out_iids``: target item ids (followed by ``n_sample`` shared + negatives). Shape ``(B' + N,)``. + - ``start_mask``: 1 in slots that just started a new session (used to + reset RNN hidden state). Shape ``(B',)``. + - ``valid_id``: indices of the slots that remain valid in the current + batch (used to trim the hidden state when sessions end at different + times). Shape ``(B',)``. + + ``B'`` equals ``batch_size`` for the main loop and shrinks during the + drain phase as sessions are exhausted. + + The 5-tuple matches :func:`user_io_iter`. ``in_uids`` is provided so + user-conditioned models (e.g. FPMC) can train in session-based mode + too; models that don't use it can simply discard it. + """ + rng = rng if rng is not None else get_rng(None) + start_mask = np.zeros(batch_size, dtype="int") + end_mask = np.ones(batch_size, dtype="int") + input_iids = None + output_iids = None + l_pool = [] # pending sessions (list of mapped-id lists) + c_pool = [None for _ in range(batch_size)] + u_pool = np.zeros(batch_size, dtype="int") + sizes = np.zeros(batch_size, dtype="int") + if n_sample > 0: + item_indices, item_dist = _build_neg_sampler(uir_tuple, sample_alpha) + + def _user_of(mapped_ids): + # Any row in the session shares the same user id. + return int(uir_tuple[0][mapped_ids[0]]) + + for _, batch_mapped_ids in s_iter(batch_size, shuffle): + l_pool += batch_mapped_ids + while len(l_pool) > 0: + if end_mask.sum() == 0: + input_iids = uir_tuple[1][[mapped_ids[-sizes[idx]] for idx, mapped_ids in enumerate(c_pool)]] + output_iids = uir_tuple[1][[mapped_ids[-sizes[idx] + 1] for idx, mapped_ids in enumerate(c_pool)]] + sizes -= 1 + for idx, size in enumerate(sizes): + if size == 1: + end_mask[idx] = 1 + if n_sample > 0: + negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist) + output_iids = np.concatenate([output_iids, negatives]) + yield ( + u_pool.copy(), + input_iids, + output_iids, + start_mask.copy(), + np.arange(batch_size, dtype="int"), + ) + start_mask.fill(0) + while end_mask.sum() > 0 and len(l_pool) > 0: + next_seq = l_pool.pop() + if len(next_seq) > 1: + idx = np.nonzero(end_mask)[0][0] + end_mask[idx] = 0 + start_mask[idx] = 1 + c_pool[idx] = next_seq + u_pool[idx] = _user_of(next_seq) + sizes[idx] = len(c_pool[idx]) + + valid_id = np.ones(batch_size, dtype="int") + while True: + for idx, size in enumerate(sizes): + if size == 1: + end_mask[idx] = 1 + valid_id[idx] = 0 + keep = [idx for idx in range(len(c_pool)) if sizes[idx] > 1] + if not keep: + break + input_iids = uir_tuple[1][[c_pool[idx][-sizes[idx]] for idx in keep]] + output_iids = uir_tuple[1][[c_pool[idx][-sizes[idx] + 1] for idx in keep]] + sizes -= 1 + for idx, size in enumerate(sizes): + if size == 1: + end_mask[idx] = 1 + keep_mask = np.nonzero(valid_id)[0] + start_mask = start_mask[keep_mask] + end_mask = end_mask[keep_mask] + sizes = sizes[keep_mask] + c_pool = [_ for _, valid in zip(c_pool, valid_id) if valid > 0] + u_pool = u_pool[keep_mask] + if n_sample > 0: + negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist) + output_iids = np.concatenate([output_iids, negatives]) + yield u_pool.copy(), input_iids, output_iids, start_mask.copy(), np.nonzero(valid_id)[0] + valid_id = np.ones(len(input_iids), dtype="int") + if end_mask.sum() == len(input_iids): + break + start_mask.fill(0) + + +# NOTE: A per-RNN-step session-aware iterator with a max_len history window +# (the "uio_iter" from CoVE_models) is intentionally not provided here: for +# sequence/transformer models the cleaner ``session_seq_iter`` / +# ``user_seq_iter`` below are used instead. For RNN-style GRU4Rec see +# ``user_io_iter`` for session-aware per-item iteration over user sequences. + + +def _user_chrono_sequences(train_set): + """Build per-user chronological item sequences across sessions. + + Returns a list of (user_idx, item_sequence) tuples. The order within a + user follows session order (by first-timestamp in session, if available) + and then in-session interaction order. + """ + uir_tuple = train_set.uir_tuple + sessions = train_set.sessions + user_sessions = train_set.user_session_data + timestamps = train_set.timestamps + + sequences = [] + for uid, sids in user_sessions.items(): + if timestamps is not None: + order = sorted(sids, key=lambda sid: timestamps[sessions[sid][0]]) + else: + order = list(sids) + items = [] + for sid in order: + items.extend(int(i) for i in uir_tuple[1][sessions[sid]]) + if len(items) > 1: + sequences.append((int(uid), items)) + return sequences + + +def user_io_iter(train_set, n_sample=0, sample_alpha=0, rng=None, batch_size=1, shuffle=False): + """Session-aware per-item iterator (parallel users). + + Like :func:`io_iter` but the parallel "slots" hold *user* chronological + item sequences (concatenated across sessions) instead of single sessions. + The hidden state is therefore reset at *user* boundaries rather than + *session* boundaries. + """ + rng = rng if rng is not None else get_rng(None) + sequences = _user_chrono_sequences(train_set) + if shuffle: + rng.shuffle(sequences) + uir_tuple = train_set.uir_tuple + if n_sample > 0: + item_indices, item_dist = _build_neg_sampler(uir_tuple, sample_alpha) + + start_mask = np.zeros(batch_size, dtype="int") + end_mask = np.ones(batch_size, dtype="int") + c_pool = [None for _ in range(batch_size)] + u_pool = [None for _ in range(batch_size)] + sizes = np.zeros(batch_size, dtype="int") + pool_iter = iter(sequences) + + def _refill(): + nonlocal end_mask + while end_mask.sum() > 0: + try: + uid, seq = next(pool_iter) + except StopIteration: + return False + if len(seq) <= 1: + continue + idx = np.nonzero(end_mask)[0][0] + end_mask[idx] = 0 + start_mask[idx] = 1 + u_pool[idx] = uid + c_pool[idx] = seq + sizes[idx] = len(seq) + return True + + if not _refill(): + # no usable user sequences + return + + while True: + if end_mask.sum() == 0: + input_uids = np.array([u_pool[idx] for idx in range(batch_size)], dtype="int") + input_iids = np.array([c_pool[idx][-sizes[idx]] for idx in range(batch_size)], dtype="int") + output_iids = np.array([c_pool[idx][-sizes[idx] + 1] for idx in range(batch_size)], dtype="int") + sizes -= 1 + for idx, size in enumerate(sizes): + if size == 1: + end_mask[idx] = 1 + if n_sample > 0: + negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist) + output_iids = np.concatenate([output_iids, negatives]) + yield input_uids, input_iids, output_iids, start_mask.copy(), np.arange(batch_size, dtype="int") + start_mask.fill(0) + if end_mask.sum() > 0: + if not _refill(): + break + + # drain remainder + valid_id = np.ones(batch_size, dtype="int") + while True: + for idx, size in enumerate(sizes): + if size == 1: + end_mask[idx] = 1 + valid_id[idx] = 0 + keep = [idx for idx in range(len(c_pool)) if sizes[idx] > 1] + if not keep: + break + input_uids = np.array([u_pool[idx] for idx in keep], dtype="int") + input_iids = np.array([c_pool[idx][-sizes[idx]] for idx in keep], dtype="int") + output_iids = np.array([c_pool[idx][-sizes[idx] + 1] for idx in keep], dtype="int") + sizes -= 1 + for idx, size in enumerate(sizes): + if size == 1: + end_mask[idx] = 1 + start_mask = start_mask[np.nonzero(valid_id)[0]] + end_mask = end_mask[np.nonzero(valid_id)[0]] + sizes = sizes[np.nonzero(valid_id)[0]] + c_pool = [_ for _, v in zip(c_pool, valid_id) if v > 0] + u_pool = [_ for _, v in zip(u_pool, valid_id) if v > 0] + if n_sample > 0: + negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist) + output_iids = np.concatenate([output_iids, negatives]) + yield input_uids, input_iids, output_iids, start_mask.copy(), np.nonzero(valid_id)[0] + valid_id = np.ones(len(input_iids), dtype="int") + if end_mask.sum() == len(input_iids): + break + start_mask.fill(0) + + +def session_seq_iter( + train_set, + pad_index, + batch_size=64, + max_len=20, + n_sample=2048, + sample_alpha=0.5, + rng=None, + shuffle=True, +): + """Session-based sequence iterator for transformer/seq models. + + Iterates over sessions (each session = one training sequence). For a + session ``[i0, i1, ..., iT]`` it yields ``num_sessions * (T)`` training + triples ``(uid, hist[max_len], target)`` where ``hist`` is the + left-padded prefix and ``target`` is the next item. + """ + rng = rng if rng is not None else get_rng(None) + uir_tuple = train_set.uir_tuple + sessions = train_set.sessions + sids = list(sessions.keys()) + if shuffle: + rng.shuffle(sids) + if n_sample > 0: + item_indices, item_dist = _build_neg_sampler(uir_tuple, sample_alpha) + + buffer_uids, buffer_hist, buffer_target = [], [], [] + for sid in sids: + mapped_ids = sessions[sid] + items = list(uir_tuple[1][mapped_ids]) + if len(items) < 2: + continue + uid = int(uir_tuple[0][mapped_ids[0]]) + for t in range(1, len(items)): + hist = items[:t][-max_len:] + hist = [pad_index] * (max_len - len(hist)) + list(hist) + buffer_uids.append(uid) + buffer_hist.append(hist) + buffer_target.append(items[t]) + if len(buffer_uids) == batch_size: + target = np.array(buffer_target, dtype="int") + if n_sample > 0: + negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist) + out_iids = np.concatenate([target, negatives]) + else: + out_iids = target + yield ( + np.array(buffer_uids, dtype="int"), + np.array(buffer_hist, dtype="int"), + out_iids, + ) + buffer_uids, buffer_hist, buffer_target = [], [], [] + if len(buffer_uids) > 1: + target = np.array(buffer_target, dtype="int") + if n_sample > 0: + negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist) + out_iids = np.concatenate([target, negatives]) + else: + out_iids = target + yield ( + np.array(buffer_uids, dtype="int"), + np.array(buffer_hist, dtype="int"), + out_iids, + ) + + +def user_seq_iter( + train_set, + pad_index, + batch_size=64, + max_len=20, + n_sample=2048, + sample_alpha=0.5, + rng=None, + shuffle=True, +): + """Session-aware sequence iterator for transformer/seq models. + + Iterates over users; for each user constructs the chronological + cross-session item sequence and yields per-step ``(uid, hist[max_len], + target)`` triples. The history therefore spans across sessions. + """ + rng = rng if rng is not None else get_rng(None) + sequences = _user_chrono_sequences(train_set) + if shuffle: + rng.shuffle(sequences) + uir_tuple = train_set.uir_tuple + if n_sample > 0: + item_indices, item_dist = _build_neg_sampler(uir_tuple, sample_alpha) + + buffer_uids, buffer_hist, buffer_target = [], [], [] + for uid, items in sequences: + if len(items) < 2: + continue + for t in range(1, len(items)): + hist = items[:t][-max_len:] + hist = [pad_index] * (max_len - len(hist)) + list(hist) + buffer_uids.append(uid) + buffer_hist.append(hist) + buffer_target.append(items[t]) + if len(buffer_uids) == batch_size: + target = np.array(buffer_target, dtype="int") + if n_sample > 0: + negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist) + out_iids = np.concatenate([target, negatives]) + else: + out_iids = target + yield ( + np.array(buffer_uids, dtype="int"), + np.array(buffer_hist, dtype="int"), + out_iids, + ) + buffer_uids, buffer_hist, buffer_target = [], [], [] + if len(buffer_uids) > 1: + target = np.array(buffer_target, dtype="int") + if n_sample > 0: + negatives = rng.choice(item_indices, size=n_sample, replace=True, p=item_dist) + out_iids = np.concatenate([target, negatives]) + else: + out_iids = target + yield ( + np.array(buffer_uids, dtype="int"), + np.array(buffer_hist, dtype="int"), + out_iids, + ) diff --git a/cornac/models/seq_utils/losses.py b/cornac/models/seq_utils/losses.py new file mode 100644 index 000000000..cad911463 --- /dev/null +++ b/cornac/models/seq_utils/losses.py @@ -0,0 +1,125 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Loss functions for sequential recommendation models. + +Importing this module requires :mod:`torch`. To respect Cornac's optional- +dependency convention, only import it from inside a model's ``fit()`` method +(or another scope guaranteed to be reached only when torch is available). +""" + +import torch +import torch.nn.functional as F + + +def softmax_neg(X): + """Softmax over negatives, masking out the diagonal (positives).""" + hm = 1.0 - torch.eye(*X.shape, out=torch.empty_like(X)) + X = X * hm + e_x = torch.exp(X - X.max(dim=1, keepdim=True)[0]) * hm + if e_x.size(0) == 1: + return e_x + return e_x / (e_x.sum(dim=1, keepdim=True) + 1e-24) + + +def bpr_loss(item_scores, **kwargs): + """BPR pairwise logsigmoid loss against in-batch negatives. + + Parameters + ---------- + item_scores: torch.Tensor of shape (B, B+N) + Score matrix with positives on the diagonal. + """ + pos = torch.diag(item_scores) + pos = pos.reshape(pos.shape[0], -1) + logits = F.logsigmoid(pos - item_scores) + mask = 1.0 - torch.eye(*logits.shape, out=torch.empty_like(logits)) + loss = -torch.sum(logits * mask) + return loss / logits.size(0) / max(logits.size(1) - 1, 1) + + +def top1_loss(item_scores, n_sample=0, **kwargs): + """TOP1 ranking loss from Hidasi et al. (2015).""" + target = torch.diag(item_scores) + target = target.reshape(target.shape[0], -1) + return torch.sum( + torch.mean( + torch.sigmoid(item_scores - target) + torch.sigmoid(item_scores**2), + dim=1, + ) + - torch.sigmoid(target**2) / (item_scores.size(0) + n_sample) + ) / item_scores.size(0) + + +def xe_softmax_loss(item_scores, out_iids=None, P0=None, logq=0.0, sample_alpha=0.5, batch_size=None, **kwargs): + """Cross-entropy with softmax over in-batch + sampled negatives. + + Supports an optional logQ correction (Hidasi & Karatzoglou, 2018) when + ``P0`` (item popularity prior) and ``logq > 0`` are provided. + """ + if logq > 0 and P0 is not None and out_iids is not None and batch_size is not None: + item_scores = item_scores - logq * torch.log( + torch.cat([P0[out_iids[:batch_size]], P0[out_iids[batch_size:]] ** sample_alpha]) + ) + X = torch.exp(item_scores - item_scores.max(dim=1, keepdim=True)[0]) + X = X / (X.sum(dim=1, keepdim=True) + 1e-24) + return -torch.sum(torch.log(torch.diag(X) + 1e-24)) / item_scores.size(0) + + +def bpr_max_loss(item_scores, bpreg=1.0, elu_param=0.5, **kwargs): + """BPR-max with softmax-weighted negatives and L2 regularisation on scores.""" + if elu_param > 0: + item_scores = F.elu(item_scores, elu_param) + softmax_scores = softmax_neg(item_scores) + target = torch.diag(item_scores) + target = target.reshape(target.shape[0], -1) + return torch.sum( + -torch.log(torch.sum(torch.sigmoid(target - item_scores) * softmax_scores, dim=1) + 1e-24) + + bpreg * torch.sum((item_scores**2) * softmax_scores, dim=1) + ) / item_scores.size(0) + + +def bce_loss(item_scores, **kwargs): + """Binary cross-entropy treating the diagonal as positive and all other + columns (in-batch negatives + sampled negatives) as negatives. + """ + B, N = item_scores.shape + targets = torch.zeros_like(item_scores) + targets[torch.arange(B), torch.arange(B)] = 1.0 + return F.binary_cross_entropy_with_logits(item_scores, targets) + + +def ce_loss(item_scores, **kwargs): + """Standard cross-entropy where the target class is the in-batch diagonal.""" + targets = torch.arange(item_scores.size(0), device=item_scores.device, dtype=torch.long) + return F.cross_entropy(item_scores, targets) + + +LOSS_FUNCTIONS = { + "bpr": bpr_loss, + "top1": top1_loss, + "cross-entropy": xe_softmax_loss, + "xe_softmax": xe_softmax_loss, + "softmax": xe_softmax_loss, + "bpr-max": bpr_max_loss, + "bce": bce_loss, + "ce": ce_loss, +} + + +def get_loss_function(name): + """Look up a loss function by name. Raises ``ValueError`` if unknown.""" + if name not in LOSS_FUNCTIONS: + raise ValueError(f"Unknown loss '{name}'. Supported: {sorted(set(LOSS_FUNCTIONS))}") + return LOSS_FUNCTIONS[name] diff --git a/cornac/models/seq_utils/optim.py b/cornac/models/seq_utils/optim.py new file mode 100644 index 000000000..df98c5a1a --- /dev/null +++ b/cornac/models/seq_utils/optim.py @@ -0,0 +1,93 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Custom optimizer(s) for sequential recommendation models. + +Importing this module requires :mod:`torch`. Import only from inside a +model's ``fit()`` method (or another scope guaranteed to be reached only +when torch is available). +""" + +import torch +from torch.optim import Optimizer + + +class IndexedAdagradM(Optimizer): + """Sparse-aware Adagrad with momentum, used by GRU4Rec and FPMC.""" + + def __init__(self, params, lr=0.05, momentum=0.0, eps=1e-6): + if lr <= 0.0: + raise ValueError("Invalid learning rate: {}".format(lr)) + if momentum < 0.0: + raise ValueError("Invalid momentum value: {}".format(momentum)) + if eps <= 0.0: + raise ValueError("Invalid epsilon value: {}".format(eps)) + + defaults = dict(lr=lr, momentum=momentum, eps=eps) + super(IndexedAdagradM, self).__init__(params, defaults) + + for group in self.param_groups: + for p in group["params"]: + state = self.state[p] + state["acc"] = torch.full_like(p, 0, memory_format=torch.preserve_format) + if momentum > 0: + state["mom"] = torch.full_like(p, 0, memory_format=torch.preserve_format) + + def share_memory(self): + for group in self.param_groups: + for p in group["params"]: + state = self.state[p] + state["acc"].share_memory_() + if group["momentum"] > 0: + state["mom"].share_memory_() + + @torch.no_grad() + def step(self, closure=None): + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + for group in self.param_groups: + for p in group["params"]: + if p.grad is None: + continue + grad = p.grad + state = self.state[p] + clr = group["lr"] + momentum = group["momentum"] + if grad.is_sparse: + grad = grad.coalesce() + grad_indices = grad._indices()[0] + grad_values = grad._values() + accs = state["acc"][grad_indices] + grad_values.pow(2) + state["acc"].index_copy_(0, grad_indices, accs) + accs.add_(group["eps"]).sqrt_().mul_(-1 / clr) + if momentum > 0: + moma = state["mom"][grad_indices] + moma.mul_(momentum).add_(grad_values / accs) + state["mom"].index_copy_(0, grad_indices, moma) + p.index_add_(0, grad_indices, moma) + else: + p.index_add_(0, grad_indices, grad_values / accs) + else: + state["acc"].add_(grad.pow(2)) + accs = state["acc"].add(group["eps"]) + accs.sqrt_() + if momentum > 0: + mom = state["mom"] + mom.mul_(momentum).addcdiv_(grad, accs, value=-clr) + p.add_(mom) + else: + p.addcdiv_(grad, accs, value=-clr) + return loss From 979601e3b163199a6366a102d71b18070724cd3f Mon Sep 17 00:00:00 2001 From: hieuddo Date: Fri, 29 May 2026 17:00:57 +0800 Subject: [PATCH 02/10] feat: model selection --- cornac/models/bert4rec/recom_bert4rec.py | 39 +++++++++++++++++++++++- cornac/models/gpt2rec/recom_gpt2rec.py | 39 +++++++++++++++++++++++- cornac/models/gru4rec/recom_gru4rec.py | 39 +++++++++++++++++++++++- cornac/models/sasrec/recom_sasrec.py | 39 +++++++++++++++++++++++- 4 files changed, 152 insertions(+), 4 deletions(-) diff --git a/cornac/models/bert4rec/recom_bert4rec.py b/cornac/models/bert4rec/recom_bert4rec.py index 553c1181f..3c4ba4f58 100644 --- a/cornac/models/bert4rec/recom_bert4rec.py +++ b/cornac/models/bert4rec/recom_bert4rec.py @@ -66,6 +66,10 @@ def __init__( trainable=True, verbose=False, seed=None, + model_selection="last", + val_eval_every=5, + val_k=20, + val_metric="recall", ): super().__init__(name, mode=mode, trainable=trainable, verbose=verbose) if loss not in SUPPORTED_LOSSES: @@ -89,6 +93,14 @@ def __init__( self.device = device self.seed = seed self.rng = get_rng(seed) + if model_selection not in ("last", "best"): + raise ValueError( + f"model_selection='{model_selection}' not supported; choose 'last' or 'best'" + ) + self.model_selection = model_selection + self.val_eval_every = val_eval_every + self.val_k = val_k + self.val_metric = val_metric def _build_data_iter(self, pad_index): kwargs = dict( @@ -138,8 +150,10 @@ def fit(self, train_set, val_set=None): self.model.parameters(), lr=self.learning_rate, betas=(0.9, 0.98) ) + best_val = -float("inf") + best_state = None progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose) - for _ in progress_bar: + for epoch_id in progress_bar: self.model.train() total_loss = 0.0 cnt = 0 @@ -177,6 +191,29 @@ def fit(self, train_set, val_set=None): cnt += len(hist_iids) if inc % 10 == 0 and cnt > 0: progress_bar.set_postfix(loss=(total_loss / cnt)) + + if ( + self.model_selection == "best" + and val_set is not None + and epoch_id % self.val_eval_every == 0 + ): + val_score = self._val_score( + val_set, metric=self.val_metric, k=self.val_k + ) + if val_score is not None and val_score > best_val: + best_val = val_score + best_state = { + n: p.detach().clone() for n, p in self.model.state_dict().items() + } + if self.verbose: + progress_bar.set_postfix( + loss=(total_loss / max(cnt, 1)), + val_recall=val_score, + best=best_val, + ) + + if self.model_selection == "best" and best_state is not None: + self.model.load_state_dict(best_state) return self def score(self, user_idx, history_items, **kwargs): diff --git a/cornac/models/gpt2rec/recom_gpt2rec.py b/cornac/models/gpt2rec/recom_gpt2rec.py index 0ed2f754a..6a76bbede 100644 --- a/cornac/models/gpt2rec/recom_gpt2rec.py +++ b/cornac/models/gpt2rec/recom_gpt2rec.py @@ -60,6 +60,10 @@ def __init__( trainable=True, verbose=False, seed=None, + model_selection="last", + val_eval_every=5, + val_k=20, + val_metric="recall", ): super().__init__(name, mode=mode, trainable=trainable, verbose=verbose) if loss not in SUPPORTED_LOSSES: @@ -83,6 +87,14 @@ def __init__( self.device = device self.seed = seed self.rng = get_rng(seed) + if model_selection not in ("last", "best"): + raise ValueError( + f"model_selection='{model_selection}' not supported; choose 'last' or 'best'" + ) + self.model_selection = model_selection + self.val_eval_every = val_eval_every + self.val_k = val_k + self.val_metric = val_metric def _build_data_iter(self, pad_index): kwargs = dict( @@ -132,8 +144,10 @@ def fit(self, train_set, val_set=None): self.model.parameters(), lr=self.learning_rate, betas=(0.9, 0.98) ) + best_val = -float("inf") + best_state = None progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose) - for _ in progress_bar: + for epoch_id in progress_bar: self.model.train() total_loss = 0.0 cnt = 0 @@ -171,6 +185,29 @@ def fit(self, train_set, val_set=None): cnt += len(hist_iids) if inc % 10 == 0 and cnt > 0: progress_bar.set_postfix(loss=(total_loss / cnt)) + + if ( + self.model_selection == "best" + and val_set is not None + and epoch_id % self.val_eval_every == 0 + ): + val_score = self._val_score( + val_set, metric=self.val_metric, k=self.val_k + ) + if val_score is not None and val_score > best_val: + best_val = val_score + best_state = { + n: p.detach().clone() for n, p in self.model.state_dict().items() + } + if self.verbose: + progress_bar.set_postfix( + loss=(total_loss / max(cnt, 1)), + val_recall=val_score, + best=best_val, + ) + + if self.model_selection == "best" and best_state is not None: + self.model.load_state_dict(best_state) return self def score(self, user_idx, history_items, **kwargs): diff --git a/cornac/models/gru4rec/recom_gru4rec.py b/cornac/models/gru4rec/recom_gru4rec.py index a753b7891..4942abb49 100644 --- a/cornac/models/gru4rec/recom_gru4rec.py +++ b/cornac/models/gru4rec/recom_gru4rec.py @@ -148,6 +148,10 @@ def __init__( trainable=True, verbose=False, seed=None, + model_selection="last", + val_eval_every=5, + val_k=20, + val_metric="recall", ): super().__init__(name, mode=mode, trainable=trainable, verbose=verbose) if loss not in SUPPORTED_LOSSES: @@ -172,6 +176,14 @@ def __init__( self.device = device self.seed = seed self.rng = get_rng(seed) + if model_selection not in ("last", "best"): + raise ValueError( + f"model_selection='{model_selection}' not supported; choose 'last' or 'best'" + ) + self.model_selection = model_selection + self.val_eval_every = val_eval_every + self.val_k = val_k + self.val_metric = val_metric def _build_loss_kwargs(self): return dict( @@ -229,8 +241,10 @@ def fit(self, train_set, val_set=None): self.model.parameters(), self.learning_rate, self.momentum ) + best_val = -float("inf") + best_state = None progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose) - for _ in progress_bar: + for epoch_id in progress_bar: H = [ torch.zeros( (self.batch_size, self.layers[i]), @@ -268,6 +282,29 @@ def fit(self, train_set, val_set=None): cnt += len(in_iids) if inc % 10 == 0 and cnt > 0: progress_bar.set_postfix(loss=(total_loss / cnt)) + + if ( + self.model_selection == "best" + and val_set is not None + and epoch_id % self.val_eval_every == 0 + ): + val_score = self._val_score( + val_set, metric=self.val_metric, k=self.val_k + ) + if val_score is not None and val_score > best_val: + best_val = val_score + best_state = { + n: p.detach().clone() for n, p in self.model.state_dict().items() + } + if self.verbose: + progress_bar.set_postfix( + loss=(total_loss / max(cnt, 1)), + val_recall=val_score, + best=best_val, + ) + + if self.model_selection == "best" and best_state is not None: + self.model.load_state_dict(best_state) return self def _build_data_iter(self): diff --git a/cornac/models/sasrec/recom_sasrec.py b/cornac/models/sasrec/recom_sasrec.py index b48a9f0c3..30cb473bb 100644 --- a/cornac/models/sasrec/recom_sasrec.py +++ b/cornac/models/sasrec/recom_sasrec.py @@ -116,6 +116,10 @@ def __init__( trainable=True, verbose=False, seed=None, + model_selection="last", + val_eval_every=5, + val_k=20, + val_metric="recall", ): super().__init__(name, mode=mode, trainable=trainable, verbose=verbose) if loss not in SUPPORTED_LOSSES: @@ -140,6 +144,14 @@ def __init__( self.use_pos_emb = use_pos_emb self.seed = seed self.rng = get_rng(seed) + if model_selection not in ("last", "best"): + raise ValueError( + f"model_selection='{model_selection}' not supported; choose 'last' or 'best'" + ) + self.model_selection = model_selection + self.val_eval_every = val_eval_every + self.val_k = val_k + self.val_metric = val_metric def _build_data_iter(self, pad_index): kwargs = dict( @@ -192,8 +204,10 @@ def fit(self, train_set, val_set=None): self.model.parameters(), lr=self.learning_rate, betas=(0.9, 0.98) ) + best_val = -float("inf") + best_state = None progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose) - for _ in progress_bar: + for epoch_id in progress_bar: self.model.train() total_loss = 0.0 cnt = 0 @@ -231,6 +245,29 @@ def fit(self, train_set, val_set=None): cnt += len(hist_iids) if inc % 10 == 0 and cnt > 0: progress_bar.set_postfix(loss=(total_loss / cnt)) + + if ( + self.model_selection == "best" + and val_set is not None + and epoch_id % self.val_eval_every == 0 + ): + val_score = self._val_score( + val_set, metric=self.val_metric, k=self.val_k + ) + if val_score is not None and val_score > best_val: + best_val = val_score + best_state = { + n: p.detach().clone() for n, p in self.model.state_dict().items() + } + if self.verbose: + progress_bar.set_postfix( + loss=(total_loss / max(cnt, 1)), + val_recall=val_score, + best=best_val, + ) + + if self.model_selection == "best" and best_state is not None: + self.model.load_state_dict(best_state) return self def score(self, user_idx, history_items, **kwargs): From 77470780ae538592d2702377c6de6913bb7b6392 Mon Sep 17 00:00:00 2001 From: hieuddo Date: Fri, 29 May 2026 21:59:24 +0800 Subject: [PATCH 03/10] migrate NextItemRecommender to SequentialRecommender --- cornac/eval_methods/__init__.py | 4 +- ...evaluation.py => sequential_evaluation.py} | 317 ++++++------------ cornac/models/__init__.py | 1 - cornac/models/recommender.py | 172 ++++++---- cornac/models/seq_utils/__init__.py | 9 +- cornac/models/seq_utils/iterators.py | 102 +++++- cornac/models/spop/recom_spop.py | 20 +- examples/gru4rec_yoochoose.py | 6 +- examples/spop_yoochoose.py | 8 +- .../eval_methods/test_next_item_evaluation.py | 67 ---- .../test_sequential_evaluation.py | 89 +++++ tests/cornac/models/test_recommender.py | 19 +- 12 files changed, 440 insertions(+), 374 deletions(-) rename cornac/eval_methods/{next_item_evaluation.py => sequential_evaluation.py} (51%) delete mode 100644 tests/cornac/eval_methods/test_next_item_evaluation.py create mode 100644 tests/cornac/eval_methods/test_sequential_evaluation.py diff --git a/cornac/eval_methods/__init__.py b/cornac/eval_methods/__init__.py index 74979c804..b32bfffb9 100644 --- a/cornac/eval_methods/__init__.py +++ b/cornac/eval_methods/__init__.py @@ -22,7 +22,7 @@ from .timestamp_split import TimestampSplit from .cross_validation import CrossValidation from .next_basket_evaluation import NextBasketEvaluation -from .next_item_evaluation import NextItemEvaluation +from .sequential_evaluation import SequentialEvaluation from .propensity_stratified_evaluation import PropensityStratifiedEvaluation __all__ = [ @@ -32,6 +32,6 @@ "TimestampSplit", "CrossValidation", "NextBasketEvaluation", - "NextItemEvaluation", + "SequentialEvaluation", "PropensityStratifiedEvaluation", ] diff --git a/cornac/eval_methods/next_item_evaluation.py b/cornac/eval_methods/sequential_evaluation.py similarity index 51% rename from cornac/eval_methods/next_item_evaluation.py rename to cornac/eval_methods/sequential_evaluation.py index 50a22244f..6a338cd1f 100644 --- a/cornac/eval_methods/next_item_evaluation.py +++ b/cornac/eval_methods/sequential_evaluation.py @@ -1,4 +1,4 @@ -# Copyright 2023 The Cornac Authors. All Rights Reserved. +# Copyright 2026 The Cornac Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,173 +22,138 @@ from ..data import SequentialDataset from ..experiment.result import Result -from ..models import NextItemRecommender +from ..models import SequentialRecommender from . import BaseMethod -EVALUATION_MODES = frozenset([ - "last", - "next", -]) +EVALUATION_MODES = frozenset(["any", "first", "last"]) + def ranking_eval( model, metrics, train_set, test_set, - user_based=False, + user_based=True, exclude_unknowns=True, - mode="last", + mode="any", verbose=False, ): - """Evaluate model on provided ranking metrics. - - Parameters - ---------- - model: :obj:`cornac.models.NextItemRecommender`, required - NextItemRecommender model to be evaluated. - - metrics: :obj:`iterable`, required - List of rating metrics :obj:`cornac.metrics.RankingMetric`. - - train_set: :obj:`cornac.data.SequentialDataset`, required - SequentialDataset to be used for model training. This will be used to exclude - observations already appeared during training. - - test_set: :obj:`cornac.data.SequentialDataset`, required - SequentialDataset to be used for evaluation. - - exclude_unknowns: bool, optional, default: True - Ignore unknown users and items during evaluation. - - verbose: bool, optional, default: False - Output evaluation progress. - - Returns - ------- - res: (List, List) - Tuple of two lists: - - average result for each of the metrics - - average result per user for each of the metrics - + """Session-aware ranking evaluation. + + Iterates the ``test_set`` by user (``usi_iter``) so that a model sees a + user's *prior* sessions as history when scoring the held-out session. + The session list passed as ``history_items`` is nested + (``list[list[int]]``), and is consumed by + :meth:`cornac.models.SequentialRecommender._flatten_history` according + to the model's ``mode``. + + Modes + ----- + - ``"any"``: all items in the last session are positives. + - ``"first"``: only the first item of the last session is the positive. + - ``"last"``: only the last item of the last session is the positive; + the rest of that session is appended to ``history_items``. """ - if len(metrics) == 0: return [], [] avg_results = [] - session_results = [defaultdict(list) for _ in enumerate(metrics)] user_results = [defaultdict(list) for _ in enumerate(metrics)] user_sessions = defaultdict(list) - session_ids = [] - for [sid], [mapped_ids], [session_items] in tqdm( - test_set.si_iter(batch_size=1, shuffle=False), - total=len(test_set.sessions), + for [user_idx], [sids], [mapped_ids], [session_items] in tqdm( + test_set.usi_iter(batch_size=1, shuffle=False), + total=test_set.num_users, desc="Ranking", disable=not verbose, miniters=100, ): - if len(session_items) < 2: # exclude all session with size smaller than 2 + if len(session_items) == 0 or len(session_items[-1]) == 0: continue - user_idx = test_set.uir_tuple[0][mapped_ids[0]] - if user_based: - user_sessions[user_idx].append(sid) - session_ids.append(sid) - - start_pos = 1 if mode == "next" else len(session_items) - 1 - for test_pos in range(start_pos, len(session_items), 1): - test_pos_items = session_items[test_pos] - - # binary mask for ground-truth positive items - u_gt_pos_mask = np.zeros(test_set.num_items, dtype="int") - u_gt_pos_mask[test_pos_items] = 1 - - # binary mask for ground-truth negative items, removing all positive items - u_gt_neg_mask = np.ones(test_set.num_items, dtype="int") - u_gt_neg_mask[test_pos_items] = 0 - - # filter items being considered for evaluation - if exclude_unknowns: - u_gt_pos_mask = u_gt_pos_mask[: train_set.num_items] - u_gt_neg_mask = u_gt_neg_mask[: train_set.num_items] - - u_gt_pos_items = np.nonzero(u_gt_pos_mask)[0] - u_gt_neg_items = np.nonzero(u_gt_neg_mask)[0] - item_indices = np.nonzero(u_gt_pos_mask + u_gt_neg_mask)[0] - - item_rank, item_scores = model.rank( - user_idx, - item_indices, - history_items=session_items[:test_pos], - history_mapped_ids=mapped_ids[:test_pos], - sessions=test_set.sessions, - session_indices=test_set.session_indices, - extra_data=test_set.extra_data, - ) + user_sessions[user_idx].append(sids[-1]) - for i, mt in enumerate(metrics): - mt_score = mt.compute( - gt_pos=u_gt_pos_items, - gt_neg=u_gt_neg_items, - pd_rank=item_rank, - pd_scores=item_scores, - item_indices=item_indices, - ) - if user_based: - user_results[i][user_idx].append(mt_score) - else: - session_results[i][sid].append(mt_score) - - # avg results of ranking metrics - for i, mt in enumerate(metrics): - if user_based: - user_ids = list(user_sessions.keys()) - user_avg_results = [np.mean(user_results[i][user_idx]) for user_idx in user_ids] - avg_results.append(np.mean(user_avg_results)) - else: - session_result = [score for sid in session_ids for score in session_results[i][sid]] - avg_results.append(np.mean(session_result)) - return avg_results, user_results + if mode == "any": + test_pos_items = session_items[-1] + elif mode == "first": + test_pos_items = session_items[-1][0:1] + else: # "last" + test_pos_items = session_items[-1][-1:] + # ground-truth masks over the test_set item space + u_gt_pos_mask = np.zeros(test_set.num_items, dtype="int") + u_gt_pos_mask[test_pos_items] = 1 -class NextItemEvaluation(BaseMethod): - """Next Item Recommendation Evaluation method + u_gt_neg_mask = np.ones(test_set.num_items, dtype="int") + u_gt_neg_mask[test_pos_items] = 0 - Parameters - ---------- - data: list, required - Raw preference data in the tuple format [(user_id, sessions)]. + if exclude_unknowns: + u_gt_pos_mask = u_gt_pos_mask[: train_set.num_items] + u_gt_neg_mask = u_gt_neg_mask[: train_set.num_items] - test_size: float, optional, default: 0.2 - The proportion of the test set, \ - if > 1 then it is treated as the size of the test set. + u_gt_pos_items = np.nonzero(u_gt_pos_mask)[0] + u_gt_neg_items = np.nonzero(u_gt_neg_mask)[0] + item_indices = np.nonzero(u_gt_pos_mask + u_gt_neg_mask)[0] - val_size: float, optional, default: 0.0 - The proportion of the validation set, \ - if > 1 then it is treated as the size of the validation set. + if mode == "last": + history_items = list(session_items[:-1]) + [session_items[-1][:-1]] + else: + history_items = list(session_items[:-1]) + + item_rank, item_scores = model.rank( + user_idx, + item_indices, + history_items=history_items, + history_mapped_ids=mapped_ids[:-1], + sessions=test_set.sessions, + session_indices=test_set.session_indices, + extra_data=test_set.extra_data, + mode=mode, + ) - fmt: str, default: 'SIT' - Format of the input data. Currently, we are supporting: + for i, mt in enumerate(metrics): + mt_score = mt.compute( + gt_pos=u_gt_pos_items, + gt_neg=u_gt_neg_items, + pd_rank=item_rank, + pd_scores=item_scores, + item_indices=item_indices, + ) + user_results[i][user_idx].append(mt_score) - 'SIT': Session, Item, Timestamp - 'USIT': User, Session, Item, Timestamp - 'SITJson': Session, Item, Timestamp, Json - 'USITJson': User, Session, Item, Timestamp, Json + # average across users (user-based) + for i in range(len(metrics)): + user_ids = list(user_sessions.keys()) + if len(user_ids) == 0: + avg_results.append(0.0) + continue + per_user = [np.mean(user_results[i][uid]) for uid in user_ids] + avg_results.append(np.mean(per_user)) - seed: int, optional, default: None - Random seed for reproducibility. + return avg_results, user_results - mode: str, optional, default: 'last' - Evaluation mode is either 'next' or 'last'. - If 'last', only evaluate the last item. - If 'next', evaluate every next item in the sequence, - exclude_unknowns: bool, optional, default: True - If `True`, unknown items will be ignored during model evaluation. +class SequentialEvaluation(BaseMethod): + """Next Session Recommendation Evaluation method. - verbose: bool, optional, default: False - Output running log. + Iterates the test set by user and feeds the model the user's prior + sessions as history when ranking items in the held-out last session. + Parameters + ---------- + data: list, optional + Raw preference data in tuple format. Format defined by ``fmt``. + + test_size: float, default: 0.2 + val_size: float, default: 0.0 + fmt: str, default: 'USIT' + One of 'SIT', 'USIT', 'SITJson', 'USITJson'. User information is + required for ``mode="session-aware"`` on the model side; pure SIT + formats only support session-based models. + seed: int, optional + mode: str, default: 'any' + One of 'any', 'first', 'last'. + exclude_unknowns: bool, default: True + verbose: bool, default: False """ def __init__( @@ -196,9 +161,9 @@ def __init__( data=None, test_size=0.2, val_size=0.0, - fmt="SIT", + fmt="USIT", seed=None, - mode="last", + mode="any", exclude_unknowns=True, verbose=False, **kwargs, @@ -215,10 +180,10 @@ def __init__( mode=mode, **kwargs, ) - + if mode not in EVALUATION_MODES: raise ValueError(f"{mode} is not supported. ({EVALUATION_MODES})") - + self.mode = mode self.global_sid_map = kwargs.get("global_sid_map", OrderedDict()) @@ -263,6 +228,7 @@ def _build_datasets(self, train_data, test_data, val_data=None): fmt=self.fmt, global_uid_map=self.global_uid_map, global_iid_map=self.global_iid_map, + global_sid_map=self.global_sid_map, seed=self.seed, exclude_unknowns=self.exclude_unknowns, ) @@ -288,9 +254,9 @@ def eval( test_set, exclude_unknowns, ranking_metrics, - user_based=False, + user_based=True, verbose=False, - mode="last", + mode="any", **kwargs, ): metric_avg_results = OrderedDict() @@ -314,29 +280,8 @@ def eval( return Result(model.name, metric_avg_results, metric_user_results) def evaluate(self, model, metrics, user_based, show_validation=True): - """Evaluate given models according to given metrics. Supposed to be called by Experiment. - - Parameters - ---------- - model: :obj:`cornac.models.NextItemRecommender` - NextItemRecommender model to be evaluated. - - metrics: :obj:`iterable` - List of metrics. - - user_based: bool, required - Evaluation strategy for the rating metrics. Whether results - are averaging based on number of users or number of ratings. - - show_validation: bool, optional, default: True - Whether to show the results on validation set (if exists). - - Returns - ------- - res: :obj:`cornac.experiment.Result` - """ - if not isinstance(model, NextItemRecommender): - raise ValueError("model must be a NextItemRecommender but '%s' is provided" % type(model)) + if not isinstance(model, SequentialRecommender): + raise ValueError("model must be a SequentialRecommender but '%s' is provided" % type(model)) if self.train_set is None: raise ValueError("train_set is required but None!") @@ -345,9 +290,6 @@ def evaluate(self, model, metrics, user_based, show_validation=True): self._reset() - ########### - # FITTING # - ########### if self.verbose: print("\n[{}] Training started!".format(model.name)) @@ -355,15 +297,15 @@ def evaluate(self, model, metrics, user_based, show_validation=True): model.fit(self.train_set, self.val_set) train_time = time.time() - start - ############## - # EVALUATION # - ############## if self.verbose: print("\n[{}] Evaluation started!".format(model.name)) rating_metrics, ranking_metrics = self.organize_metrics(metrics) if len(rating_metrics) > 0: - warnings.warn("NextItemEvaluation only supports ranking metrics. The given rating metrics {} will be ignored!".format([mt.name for mt in rating_metrics])) + warnings.warn( + "SequentialEvaluation only supports ranking metrics. " + "The given rating metrics {} will be ignored!".format([mt.name for mt in rating_metrics]) + ) start = time.time() model.transform(self.test_set) @@ -408,51 +350,12 @@ def from_splits( train_data, test_data, val_data=None, - fmt="SIT", + fmt="USIT", exclude_unknowns=False, seed=None, verbose=False, **kwargs, ): - """Constructing evaluation method given data. - - Parameters - ---------- - train_data: array-like - Training data - - test_data: array-like - Test data - - val_data: array-like, optional, default: None - Validation data - - fmt: str, default: 'SIT' - Format of the input data. Currently, we are supporting: - - 'SIT': Session, Item, Timestamp - 'USIT': User, Session, Item, Timestamp - 'SITJson': Session, Item, Timestamp, Json - 'USITJson': User, Session, Item, Timestamp, Json - - rating_threshold: float, default: 1.0 - Threshold to decide positive or negative preferences. - - exclude_unknowns: bool, default: False - Whether to exclude unknown users/items in evaluation. - - seed: int, optional, default: None - Random seed for reproduce the splitting. - - verbose: bool, default: False - The verbosity flag. - - Returns - ------- - method: :obj:`` - Evaluation method object. - - """ method = cls( fmt=fmt, exclude_unknowns=exclude_unknowns, diff --git a/cornac/models/__init__.py b/cornac/models/__init__.py index 643b25258..085eea298 100644 --- a/cornac/models/__init__.py +++ b/cornac/models/__init__.py @@ -15,7 +15,6 @@ from .recommender import Recommender from .recommender import NextBasketRecommender -from .recommender import NextItemRecommender from .recommender import SequentialRecommender from .amr import AMR diff --git a/cornac/models/recommender.py b/cornac/models/recommender.py index ab9dccefc..ea86e060d 100644 --- a/cornac/models/recommender.py +++ b/cornac/models/recommender.py @@ -709,80 +709,38 @@ def score(self, user_idx, history_baskets, **kwargs): raise NotImplementedError("The algorithm is not able to make score prediction!") -class NextItemRecommender(Recommender): - """Generic class for a next item recommender model. All next item recommendation models should inherit from this class. - - Parameters - ---------------- - name: str, required - Name of the recommender model. - - trainable: boolean, optional, default: True - When False, the model is not trainable. - - verbose: boolean, optional, default: False - When True, running logs are displayed. - - Attributes - ---------- - num_users: int - Number of users in training data. - - num_items: int - Number of items in training data. - - total_users: int - Number of users in training, validation, and test data. - In other words, this includes unknown/unseen users. - - total_items: int - Number of items in training, validation, and test data. - In other words, this includes unknown/unseen items. - - uid_map: int - Global mapping of user ID-index. - - iid_map: int - Global mapping of item ID-index. - """ - - def __init__(self, name, trainable=True, verbose=False): - super().__init__(name=name, trainable=trainable, verbose=verbose) - - def score(self, user_idx, history_items, **kwargs): - """Predict the scores for all items based on input history items - - Parameters - ---------- - history_items: list of lists - The list of history items in sequential manner for next-item prediction. +SUPPORTED_MODES = ("session-based", "session-aware") +SUPPORTED_INPUT_FORMATS = ("flat", "hierarchical") - Returns - ------- - res : a Numpy array - Relative scores of all known items - """ - raise NotImplementedError("The algorithm is not able to make score prediction!") +class SequentialRecommender(Recommender): + """Generic class for a sequential next-item recommender model. + Sequential recommenders are characterised by two orthogonal properties: -SUPPORTED_MODES = ("session-based", "session-aware") + 1. **The user's choice of evaluation/training mode** (``self.mode``): + - ``"session-based"``: only the most recent session is used as + context; the hidden state / history window resets at session + boundaries. + - ``"session-aware"``: the user's chronological cross-session item + sequence is used as context. -class SequentialRecommender(NextItemRecommender): - """Generic class for a sequential next-item recommender model. + 2. **The model's input format** (class attribute ``INPUT_FORMAT``): - Sequential recommenders consume a user's interaction history (either a - flat list of item ids or a list of session item lists) and operate in one - of two modes: + - ``"flat"``: the model consumes a flat sequence of item ids + (GRU4Rec, SASRec, BERT4Rec, GPT2Rec, FPMC). + - ``"hierarchical"``: the model consumes a nested ``(prior_sessions, + current_prefix, target)`` structure (HGRU4Rec, SHAN — none in the + current codebase but the dispatch is ready). - - ``"session-based"``: only the most recent session is used as context; - the hidden state / history window resets at session boundaries. - - ``"session-aware"``: the user's chronological cross-session item - sequence is used as context. + The base class wires ``mode`` + ``INPUT_FORMAT`` to the right iterator + via :meth:`_build_data_iter`. Subclasses normally don't need to override + that method; they just set ``INPUT_FORMAT`` and pick up the right + iterator automatically. - Subclasses should set the desired default in their own ``__init__`` and - forward ``mode`` to ``super().__init__``. + Subclasses should set their default ``mode`` in their own ``__init__`` + and forward ``mode`` to ``super().__init__``. Parameters ---------- @@ -799,23 +757,46 @@ class SequentialRecommender(NextItemRecommender): When True, running logs are displayed. """ + INPUT_FORMAT = "flat" + def __init__(self, name, mode="session-based", trainable=True, verbose=False): if mode not in SUPPORTED_MODES: raise ValueError( f"mode='{mode}' not supported; choose from {SUPPORTED_MODES}" ) + if self.INPUT_FORMAT not in SUPPORTED_INPUT_FORMATS: + raise ValueError( + f"{type(self).__name__}.INPUT_FORMAT='{self.INPUT_FORMAT}' " + f"not supported; choose from {SUPPORTED_INPUT_FORMATS}" + ) super().__init__(name=name, trainable=trainable, verbose=verbose) self.mode = mode + def fit(self, train_set, val_set=None): + """Validate data/mode compatibility before subclass training. + + ``mode="session-aware"`` requires per-user identity (USIT-like + formats). Pure SIT datasets collapse every row to a single + anonymous user, which makes session-aware iteration meaningless. + We catch this early with a clear error rather than producing a + silently broken iterator. + """ + if self.mode == "session-aware" and getattr(train_set, "num_users", 0) <= 1: + raise ValueError( + f"{type(self).__name__}: mode='session-aware' requires a " + f"dataset with user identity (e.g. fmt='USIT'); " + f"train_set.num_users={getattr(train_set, 'num_users', 'N/A')}." + ) + return super().fit(train_set, val_set) + def _flatten_history(self, history_items): """Coerce ``history_items`` into a flat list of item ids. Accepts both: - - ``[i0, i1, i2, ...]`` (flat list, as produced by - :class:`cornac.eval_methods.NextItemEvaluation`). + - ``[i0, i1, i2, ...]`` (flat list of item ids). - ``[[i0, i1], [i2, i3], ...]`` (list of session item lists, as - produced by the session-aware ``NextSessionEvaluation`` flow). + produced by :class:`cornac.eval_methods.SequentialEvaluation`). For ``mode == "session-based"`` the list of sessions is collapsed to just the items of the *last non-empty* session. For @@ -836,3 +817,58 @@ def _flatten_history(self, history_items): flat.extend(list(sess)) return flat return list(history_items) + + def _val_score(self, val_set, metric="recall", k=20, mode="last"): + """Compute a session-aware ranking metric on ``val_set`` for + best-on-val selection during training. + + Iterates val_set via the same logic as + :func:`cornac.eval_methods.sequential_evaluation.ranking_eval` + so the val score matches the eventual test protocol. + + Parameters + ---------- + metric : str + One of ``"recall"``, ``"ndcg"``, ``"auc"``, ``"mrr"`` + (case-insensitive). ``k`` is ignored for ``auc`` and ``mrr``. + + Returns ``None`` if ``val_set`` is ``None``. + """ + if val_set is None: + return None + from ..eval_methods.sequential_evaluation import ranking_eval + from ..metrics import AUC, MRR, NDCG, Recall + + name = metric.lower() + if name == "recall": + m = Recall(k=k) + elif name == "ndcg": + m = NDCG(k=k) + elif name == "auc": + m = AUC() + elif name == "mrr": + m = MRR() + else: + raise ValueError( + f"val_metric='{metric}' not supported; " + f"choose from recall/ndcg/auc/mrr" + ) + + inner = getattr(self, "model", None) + was_training = bool(getattr(inner, "training", False)) + if inner is not None and hasattr(inner, "eval"): + inner.eval() + try: + avg, _ = ranking_eval( + model=self, + metrics=[m], + train_set=self.train_set, + test_set=val_set, + mode=mode, + exclude_unknowns=True, + verbose=False, + ) + finally: + if inner is not None and was_training and hasattr(inner, "train"): + inner.train() + return float(avg[0]) if avg else 0.0 diff --git a/cornac/models/seq_utils/__init__.py b/cornac/models/seq_utils/__init__.py index 67a9be9a6..ba9a3945f 100644 --- a/cornac/models/seq_utils/__init__.py +++ b/cornac/models/seq_utils/__init__.py @@ -50,7 +50,12 @@ def fit(self, train_set, val_set=None): ``session_seq_iter`` ``(in_uids, hist_iids, out_iids)`` — sequence models, session-based. ``user_seq_iter`` ``(in_uids, hist_iids, out_iids)`` — sequence - models, session-aware. + models, session-aware (flat history, skips + cross-session target tuples). + ``user_hier_seq_iter`` ``(uid, prior_sessions, current_prefix, target)`` + — per-row hierarchical iterator for future + session-aware models (HGRU4Rec, SHAN). No + current consumer. ========================= ================================================= """ @@ -59,6 +64,7 @@ def fit(self, train_set, val_set=None): user_io_iter, session_seq_iter, user_seq_iter, + user_hier_seq_iter, ) __all__ = [ @@ -66,4 +72,5 @@ def fit(self, train_set, val_set=None): "user_io_iter", "session_seq_iter", "user_seq_iter", + "user_hier_seq_iter", ] diff --git a/cornac/models/seq_utils/iterators.py b/cornac/models/seq_utils/iterators.py index 73c4f4dfc..918603e3d 100644 --- a/cornac/models/seq_utils/iterators.py +++ b/cornac/models/seq_utils/iterators.py @@ -146,9 +146,15 @@ def _user_of(mapped_ids): def _user_chrono_sequences(train_set): """Build per-user chronological item sequences across sessions. - Returns a list of (user_idx, item_sequence) tuples. The order within a - user follows session order (by first-timestamp in session, if available) - and then in-session interaction order. + Returns a list of ``(user_idx, item_sequence, session_starts)`` tuples. + The order within a user follows session order (by first-timestamp in + session, if available) and then in-session interaction order. + + ``session_starts`` is a sorted list of indices into ``item_sequence`` + marking the first item of each session. The first entry is always 0. + For example, sessions ``[[a,b,c],[d,e,f]]`` produce + ``items=[a,b,c,d,e,f]`` and ``session_starts=[0, 3]``. Targets at these + indices (except 0) are cross-session boundary predictions. """ uir_tuple = train_set.uir_tuple sessions = train_set.sessions @@ -162,10 +168,12 @@ def _user_chrono_sequences(train_set): else: order = list(sids) items = [] + session_starts = [] for sid in order: + session_starts.append(len(items)) items.extend(int(i) for i in uir_tuple[1][sessions[sid]]) if len(items) > 1: - sequences.append((int(uid), items)) + sequences.append((int(uid), items, session_starts)) return sequences @@ -176,6 +184,13 @@ def user_io_iter(train_set, n_sample=0, sample_alpha=0, rng=None, batch_size=1, item sequences (concatenated across sessions) instead of single sessions. The hidden state is therefore reset at *user* boundaries rather than *session* boundaries. + + Note + ---- + Unlike :func:`user_seq_iter`, this iterator does NOT mask cross-session + target transitions. Per-row loss masking would require changes to every + loss function in :mod:`losses`; for now GRU4Rec session-aware training + accepts the cross-session targets as a small amount of label noise. """ rng = rng if rng is not None else get_rng(None) sequences = _user_chrono_sequences(train_set) @@ -196,7 +211,7 @@ def _refill(): nonlocal end_mask while end_mask.sum() > 0: try: - uid, seq = next(pool_iter) + uid, seq, _session_starts = next(pool_iter) except StopIteration: return False if len(seq) <= 1: @@ -338,12 +353,19 @@ def user_seq_iter( sample_alpha=0.5, rng=None, shuffle=True, + skip_cross_session_targets=True, ): """Session-aware sequence iterator for transformer/seq models. Iterates over users; for each user constructs the chronological cross-session item sequence and yields per-step ``(uid, hist[max_len], target)`` triples. The history therefore spans across sessions. + + When ``skip_cross_session_targets`` is True (default), training tuples + whose target is the first item of a new session are dropped so the loss + is not penalized for predicting items across session boundaries. The + cross-session items are still available as *history* in subsequent + within-session steps. """ rng = rng if rng is not None else get_rng(None) sequences = _user_chrono_sequences(train_set) @@ -354,10 +376,15 @@ def user_seq_iter( item_indices, item_dist = _build_neg_sampler(uir_tuple, sample_alpha) buffer_uids, buffer_hist, buffer_target = [], [], [] - for uid, items in sequences: + for uid, items, session_starts in sequences: if len(items) < 2: continue + boundary_targets = ( + set(session_starts[1:]) if skip_cross_session_targets else set() + ) for t in range(1, len(items)): + if t in boundary_targets: + continue hist = items[:t][-max_len:] hist = [pad_index] * (max_len - len(hist)) + list(hist) buffer_uids.append(uid) @@ -388,3 +415,66 @@ def user_seq_iter( np.array(buffer_hist, dtype="int"), out_iids, ) + + +def user_hier_seq_iter( + train_set, + rng=None, + shuffle=True, +): + """Session-aware *hierarchical* iterator for future HGRU/SHAN-style models. + + For every within-session step ``t`` of session ``S_k`` (k >= 0, t >= 1) + yields one row: + + ``(uid, prior_sessions, current_prefix, target)`` + + where: + + - ``uid`` (int): user id. + - ``prior_sessions`` (``list[list[int]]``): the user's earlier sessions + (``S_0, ..., S_{k-1}``) in chronological order, each as a list of item + ids. Empty list for the user's first session. + - ``current_prefix`` (``list[int]``): ``S_k[:t]`` — the in-progress + session up to (but not including) the target step. + - ``target`` (int): ``S_k[t]`` — the held-out next item. + + Padding and batching are intentionally left to the consuming model so + each architecture can pick its own ``max_history_sessions`` / + ``max_session_len`` budgets. + + This iterator has no consumer in the current codebase; it exists to + complete the dispatch contract for ``mode="session-aware"`` + + ``INPUT_FORMAT="hierarchical"`` so that adding HGRU4Rec/SHAN later does + not require touching the base class. + """ + rng = rng if rng is not None else get_rng(None) + uir_tuple = train_set.uir_tuple + sessions = train_set.sessions + user_sessions = train_set.user_session_data + timestamps = train_set.timestamps + + user_ids = list(user_sessions.keys()) + if shuffle: + rng.shuffle(user_ids) + + for uid in user_ids: + sids = user_sessions[uid] + if timestamps is not None: + order = sorted(sids, key=lambda sid: timestamps[sessions[sid][0]]) + else: + order = list(sids) + per_session_items = [ + [int(i) for i in uir_tuple[1][sessions[sid]]] for sid in order + ] + prior = [] + for sess_items in per_session_items: + if len(sess_items) >= 2: + for t in range(1, len(sess_items)): + yield ( + int(uid), + [list(s) for s in prior], + list(sess_items[:t]), + int(sess_items[t]), + ) + prior.append(sess_items) diff --git a/cornac/models/spop/recom_spop.py b/cornac/models/spop/recom_spop.py index 1910c6073..558cbc130 100644 --- a/cornac/models/spop/recom_spop.py +++ b/cornac/models/spop/recom_spop.py @@ -17,10 +17,10 @@ import numpy as np -from ..recommender import NextItemRecommender +from ..recommender import SequentialRecommender -class SPop(NextItemRecommender): +class SPop(SequentialRecommender): """Recommend most popular items of the current session. Parameters @@ -28,8 +28,15 @@ class SPop(NextItemRecommender): name: string, default: 'SPop' The name of the recommender model. + mode: str, optional, default: 'session-based' + One of 'session-based' or 'session-aware'. SPop's popularity logic + is the same in both; the only difference is how + :meth:`_flatten_history` collapses the nested ``history_items`` + coming from :class:`cornac.eval_methods.SequentialEvaluation`. + use_session_popularity: boolean, optional, default: True - When False, no item frequency from history items in current session are being used. + When False, no item frequency from history items in current session + are used; only the global training popularity is returned. References ---------- @@ -37,8 +44,8 @@ class SPop(NextItemRecommender): Session-based Recommendations with Recurrent Neural Networks, ICLR 2016 """ - def __init__(self, name="SPop", use_session_popularity=True): - super().__init__(name=name, trainable=False) + def __init__(self, name="SPop", mode="session-based", use_session_popularity=True): + super().__init__(name=name, mode=mode, trainable=False) self.use_session_popularity = use_session_popularity self.item_freq = Counter() @@ -53,7 +60,8 @@ def score(self, user_idx, history_items, **kwargs): for iid, freq in self.item_freq.items(): item_scores[iid] = freq / max_item_freq if self.use_session_popularity: - s_item_freq = Counter([iid for iid in history_items]) + flat_history = self._flatten_history(history_items) + s_item_freq = Counter(flat_history) for iid, cnt in s_item_freq.most_common(): item_scores[iid] += cnt return item_scores diff --git a/examples/gru4rec_yoochoose.py b/examples/gru4rec_yoochoose.py index 1d8b6f261..307e4155b 100644 --- a/examples/gru4rec_yoochoose.py +++ b/examples/gru4rec_yoochoose.py @@ -17,7 +17,7 @@ import cornac from cornac.data import Reader from cornac.datasets import yoochoose -from cornac.eval_methods import NextItemEvaluation +from cornac.eval_methods import SequentialEvaluation from cornac.metrics import MRR, NDCG, Recall from cornac.models import GRU4Rec, SPop @@ -29,7 +29,7 @@ test_data = yoochoose.load_test(reader=Reader(min_sequence_size=2, item_set=item_set)) print("test data loaded") -next_item_eval = NextItemEvaluation.from_splits( +eval_method = SequentialEvaluation.from_splits( train_data=buy_data, test_data=test_data[:10000], # illustration purpose only, subset of test data for faster experiment exclude_unknowns=True, @@ -63,7 +63,7 @@ ] cornac.Experiment( - eval_method=next_item_eval, + eval_method=eval_method, models=models, metrics=metrics, ).run() diff --git a/examples/spop_yoochoose.py b/examples/spop_yoochoose.py index eb4f785fd..2a4c77176 100644 --- a/examples/spop_yoochoose.py +++ b/examples/spop_yoochoose.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -"""Example of a next-item recommendation model based on item popularity""" +"""Example of a session-based recommendation model based on item popularity""" import cornac from cornac.datasets import yoochoose -from cornac.eval_methods import NextItemEvaluation +from cornac.eval_methods import SequentialEvaluation from cornac.metrics import MRR, NDCG, Recall from cornac.models import SPop @@ -25,7 +25,7 @@ test_data = yoochoose.load_test() print("test data loaded") -next_item_eval = NextItemEvaluation.from_splits( +eval_method = SequentialEvaluation.from_splits( train_data=buy_data, test_data=test_data[:10000], # illustration purpose only, subset of test data for faster experiment verbose=True, @@ -46,7 +46,7 @@ ] cornac.Experiment( - eval_method=next_item_eval, + eval_method=eval_method, models=models, metrics=metrics, ).run() diff --git a/tests/cornac/eval_methods/test_next_item_evaluation.py b/tests/cornac/eval_methods/test_next_item_evaluation.py deleted file mode 100644 index 31ceb8d9b..000000000 --- a/tests/cornac/eval_methods/test_next_item_evaluation.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright 2023 The Cornac Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import unittest - -from cornac.eval_methods import NextItemEvaluation -from cornac.data import Reader -from cornac.models import SPop -from cornac.metrics import HitRatio, Recall - - -class TestNextItemEvaluation(unittest.TestCase): - def setUp(self): - self.data = Reader().read("./tests/sequence.txt", fmt="USIT", sep=" ") - - def test_from_splits(self): - next_item_eval = NextItemEvaluation.from_splits(train_data=self.data[:50], test_data=self.data[50:], fmt="USIT") - - self.assertTrue(next_item_eval.train_set != None) - self.assertTrue(next_item_eval.test_set != None) - self.assertTrue(next_item_eval.val_set == None) - self.assertTrue(next_item_eval.total_sessions == 16) - - def test_evaluate(self): - next_item_eval = NextItemEvaluation.from_splits(train_data=self.data[:50], test_data=self.data[50:], fmt="USIT") - result = next_item_eval.evaluate( - SPop(), [HitRatio(k=2), Recall(k=2)], user_based=False - ) - self.assertEqual(result[0].metric_avg_results.get('HitRatio@2'), 0) - self.assertEqual(result[0].metric_avg_results.get('Recall@2'), 0) - - next_item_eval = NextItemEvaluation.from_splits(train_data=self.data[:50], test_data=self.data[50:], fmt="USIT") - result = next_item_eval.evaluate( - SPop(), [HitRatio(k=5), Recall(k=5)], user_based=True - ) - self.assertEqual(result[0].metric_avg_results.get('HitRatio@5'), 2/3) - self.assertEqual(result[0].metric_avg_results.get('Recall@5'), 2/3) - - next_item_eval = NextItemEvaluation.from_splits(train_data=self.data[:50], test_data=self.data[50:], fmt="USIT", mode="next") - result = next_item_eval.evaluate( - SPop(), [HitRatio(k=2), Recall(k=2)], user_based=False - ) - - self.assertEqual(result[0].metric_avg_results.get('HitRatio@2'), 1/8) - self.assertEqual(result[0].metric_avg_results.get('Recall@2'), 1/8) - - next_item_eval = NextItemEvaluation.from_splits(train_data=self.data[:50], test_data=self.data[50:], fmt="USIT", mode="next") - result = next_item_eval.evaluate( - SPop(), [HitRatio(k=5), Recall(k=5)], user_based=True - ) - self.assertEqual(result[0].metric_avg_results.get('HitRatio@5'), 3/4) - self.assertEqual(result[0].metric_avg_results.get('Recall@5'), 3/4) - -if __name__ == "__main__": - unittest.main() diff --git a/tests/cornac/eval_methods/test_sequential_evaluation.py b/tests/cornac/eval_methods/test_sequential_evaluation.py new file mode 100644 index 000000000..511a3aad0 --- /dev/null +++ b/tests/cornac/eval_methods/test_sequential_evaluation.py @@ -0,0 +1,89 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import unittest + +from cornac.data import Reader +from cornac.eval_methods import SequentialEvaluation +from cornac.metrics import MRR, NDCG, Recall +from cornac.models import SPop + + +class TestSequentialEvaluation(unittest.TestCase): + def setUp(self): + self.data = Reader().read("./tests/sequence.txt", fmt="USIT", sep=" ") + + def test_from_splits(self): + eval_method = SequentialEvaluation.from_splits( + train_data=self.data[:50], test_data=self.data[50:], fmt="USIT" + ) + self.assertIsNotNone(eval_method.train_set) + self.assertIsNotNone(eval_method.test_set) + self.assertIsNone(eval_method.val_set) + self.assertEqual( + eval_method.total_sessions, + eval_method.train_set.num_sessions + eval_method.test_set.num_sessions, + ) + + def test_invalid_mode_raises(self): + with self.assertRaises(ValueError): + SequentialEvaluation.from_splits( + train_data=self.data[:50], + test_data=self.data[50:], + fmt="USIT", + mode="bogus", + ) + + def _run_with_mode(self, mode): + eval_method = SequentialEvaluation.from_splits( + train_data=self.data[:50], + test_data=self.data[50:], + fmt="USIT", + mode=mode, + ) + test_result, _ = eval_method.evaluate( + SPop(), + [Recall(k=5), NDCG(k=5), MRR()], + user_based=True, + ) + return test_result.metric_avg_results + + def test_evaluate_mode_last(self): + results = self._run_with_mode("last") + for name, value in results.items(): + if name in ("Train (s)", "Test (s)"): + continue + self.assertGreaterEqual(value, 0.0) + self.assertLessEqual(value, 1.0) + + def test_evaluate_mode_first(self): + results = self._run_with_mode("first") + for name, value in results.items(): + if name in ("Train (s)", "Test (s)"): + continue + self.assertGreaterEqual(value, 0.0) + self.assertLessEqual(value, 1.0) + + def test_evaluate_mode_any(self): + results = self._run_with_mode("any") + for name, value in results.items(): + if name in ("Train (s)", "Test (s)"): + continue + self.assertGreaterEqual(value, 0.0) + self.assertLessEqual(value, 1.0) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/cornac/models/test_recommender.py b/tests/cornac/models/test_recommender.py index 2006064a0..2b40c2321 100644 --- a/tests/cornac/models/test_recommender.py +++ b/tests/cornac/models/test_recommender.py @@ -16,9 +16,10 @@ import unittest import numpy as np +import numpy.testing as npt -from cornac.data import BasketDataset, Dataset, SequentialDataset, Reader -from cornac.models import MF, GPTop, SPop, NextBasketRecommender, NextItemRecommender +from cornac.data import BasketDataset, Dataset, Reader, SequentialDataset +from cornac.models import MF, GPTop, NextBasketRecommender, SequentialRecommender, SPop class TestRecommender(unittest.TestCase): @@ -71,26 +72,26 @@ def test_fit(self): model.rank(0, history_baskets=[[]]) -class TestNextItemRecommender(unittest.TestCase): +class TestSequentialRecommender(unittest.TestCase): def setUp(self): self.data = Reader().read("./tests/sequence.txt", fmt="USIT", sep=" ") def test_init(self): - model = NextItemRecommender("test") + model = SequentialRecommender("test") self.assertTrue(model.name == "test") def test_fit(self): dataset = SequentialDataset.from_usit(self.data) - model = NextItemRecommender("") + model = SequentialRecommender("") model.fit(dataset) model = SPop() model.fit(dataset) model.score(0, []) result = model.rank(0, history_items=[]) - self.assertTrue((result[0][0:3] == [3, 2, 4]).all()) - self.assertTrue((np.sort(result[0][3:5]) == [0, 1]).all()) # identical scores, sorting may affect the ordering - self.assertTrue((result[0][5:6] == [5]).all()) - self.assertTrue((np.sort(result[0][6:9]) == [6, 7, 8]).all()) # identical scores, sorting may affect the ordering + npt.assert_array_equal(result[0][0:3], [3, 2, 4]) + npt.assert_array_equal(np.sort(result[0][3:5]), [0, 1]) # identical scores, sorting may affect the ordering + npt.assert_array_equal(result[0][5:6], [5]) + npt.assert_array_equal(np.sort(result[0][6:9]), [6, 7, 8]) # identical scores, sorting may affect the ordering if __name__ == "__main__": From e91947ce46d1634071d0e9af9b4d69d0e56f5e7c Mon Sep 17 00:00:00 2001 From: hieuddo Date: Fri, 29 May 2026 22:39:59 +0800 Subject: [PATCH 04/10] add alias for SBR and SAR --- cornac/models/recommender.py | 6 +++++- tests/cornac/models/test_recommender.py | 8 ++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/cornac/models/recommender.py b/cornac/models/recommender.py index ea86e060d..75555f881 100644 --- a/cornac/models/recommender.py +++ b/cornac/models/recommender.py @@ -760,9 +760,13 @@ class SequentialRecommender(Recommender): INPUT_FORMAT = "flat" def __init__(self, name, mode="session-based", trainable=True, verbose=False): + mode = {"sbr": "session-based", "sar": "session-aware"}.get( + mode.lower() if isinstance(mode, str) else mode, mode + ) if mode not in SUPPORTED_MODES: raise ValueError( - f"mode='{mode}' not supported; choose from {SUPPORTED_MODES}" + f"mode='{mode}' not supported; choose from {SUPPORTED_MODES} " + f"(aliases 'sbr'/'sar' also accepted, case-insensitive)" ) if self.INPUT_FORMAT not in SUPPORTED_INPUT_FORMATS: raise ValueError( diff --git a/tests/cornac/models/test_recommender.py b/tests/cornac/models/test_recommender.py index 2b40c2321..45448b81c 100644 --- a/tests/cornac/models/test_recommender.py +++ b/tests/cornac/models/test_recommender.py @@ -93,6 +93,14 @@ def test_fit(self): npt.assert_array_equal(result[0][5:6], [5]) npt.assert_array_equal(np.sort(result[0][6:9]), [6, 7, 8]) # identical scores, sorting may affect the ordering + def test_mode_aliases(self): + for alias in ("sbr", "SBR", "Sbr", "session-based"): + self.assertEqual(SequentialRecommender("t", mode=alias).mode, "session-based") + for alias in ("sar", "SAR", "Sar", "session-aware"): + self.assertEqual(SequentialRecommender("t", mode=alias).mode, "session-aware") + with self.assertRaises(ValueError): + SequentialRecommender("t", mode="bogus") + if __name__ == "__main__": unittest.main() From 28006afbaed939406e43ef007b14b70d370d1e7a Mon Sep 17 00:00:00 2001 From: hieuddo Date: Fri, 29 May 2026 22:40:13 +0800 Subject: [PATCH 05/10] add example for sequential models --- examples/sequential_diginetica.py | 98 +++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 examples/sequential_diginetica.py diff --git a/examples/sequential_diginetica.py b/examples/sequential_diginetica.py new file mode 100644 index 000000000..5c7dc77f9 --- /dev/null +++ b/examples/sequential_diginetica.py @@ -0,0 +1,98 @@ +# Copyright 2026 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Sequential recommendation on Diginetica with GRU4Rec / SASRec / BERT4Rec / GPT2Rec. + +Each model is trained twice: + +- ``mode="sbr"`` (session-based: only the current session is context) +- ``mode="sar"`` (session-aware: the user's cross-session history is context) + +``"sbr"`` and ``"sar"`` are short-form aliases for ``"session-based"`` and +``"session-aware"`` and are matched case-insensitively. + +Runs a short 10-epoch schedule so the example fits in a reasonable time; +bump ``N_EPOCHS`` for real comparisons. + +Note: GRU4Rec converges slower than Transformers-based models. +""" + +import cornac +from cornac.data import Reader +from cornac.datasets import diginetica +from cornac.eval_methods import SequentialEvaluation +from cornac.metrics import AUC, MRR, NDCG, Recall +from cornac.models import BERT4Rec, GPT2Rec, GRU4Rec, SASRec + +N_EPOCHS = 10 +DEVICE = "cuda" # set to "cpu" if no GPU is available +SEED = 123 + +reader = Reader(min_sequence_size=2) +train_data = diginetica.load_train(reader=reader) +val_data = diginetica.load_val(reader=reader) +test_data = diginetica.load_test(reader=reader) + +eval_method = SequentialEvaluation.from_splits( + train_data=train_data, + val_data=val_data, + test_data=test_data, + fmt="USIT", + exclude_unknowns=True, + verbose=True, + mode="last", +) + +shared = dict( + embedding_dim=64, + batch_size=256, + n_sample=512, + n_epochs=N_EPOCHS, + max_len=20, + num_blocks=2, + num_heads=2, + device=DEVICE, + verbose=True, + seed=SEED, +) + +models = [] +for setting in ("sbr", "sar"): + models += [ + GRU4Rec( + name=f"GRU4Rec-{setting}", + layers=[64], + loss="cross-entropy", + mode=setting, + batch_size=shared["batch_size"], + n_sample=shared["n_sample"], + sample_alpha=0.75, + dropout_p_hidden=0.3, + n_epochs=N_EPOCHS, + device=DEVICE, + verbose=True, + seed=SEED, + ), + SASRec(name=f"SASRec-{setting}", mode=setting, **shared), + BERT4Rec(name=f"BERT4Rec-{setting}", mode=setting, **shared), + GPT2Rec(name=f"GPT2Rec-{setting}", mode=setting, **shared), + ] + +metrics = [AUC(), MRR(), NDCG(k=10), NDCG(k=50), Recall(k=10), Recall(k=50)] + +cornac.Experiment( + eval_method=eval_method, + models=models, + metrics=metrics, +).run() From 05831f8a2c17b137e05cec660c9b141fdd709d26 Mon Sep 17 00:00:00 2001 From: hieuddo Date: Fri, 29 May 2026 23:04:03 +0800 Subject: [PATCH 06/10] model_selection for FPMC --- cornac/models/fpmc/recom_fpmc.py | 50 +++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/cornac/models/fpmc/recom_fpmc.py b/cornac/models/fpmc/recom_fpmc.py index a40a3682c..6f3e0830f 100644 --- a/cornac/models/fpmc/recom_fpmc.py +++ b/cornac/models/fpmc/recom_fpmc.py @@ -62,6 +62,17 @@ class FPMC(SequentialRecommender): momentum: float, optional, default: 0.0 Momentum for the IndexedAdagradM optimizer. + model_selection: str, optional, default: 'last' + One of 'last' or 'best'. When 'best', the model with the highest + validation score (evaluated every ``val_eval_every`` epochs) is + restored at the end of ``fit``. + + val_eval_every: int, optional, default: 5 + val_k: int, optional, default: 20 + val_metric: str, optional, default: 'recall' + Cutoff and metric used for best-on-val selection. See + :meth:`SequentialRecommender._val_score`. + References ---------- Rendle, S., Freudenthaler, C., & Schmidt-Thieme, L. (2010). @@ -86,6 +97,10 @@ def __init__( trainable=True, verbose=False, seed=None, + model_selection="last", + val_eval_every=5, + val_k=20, + val_metric="recall", ): super().__init__(name, mode=mode, trainable=trainable, verbose=verbose) if loss not in SUPPORTED_LOSSES: @@ -105,6 +120,14 @@ def __init__( self.device = device self.seed = seed self.rng = get_rng(seed) + if model_selection not in ("last", "best"): + raise ValueError( + f"model_selection='{model_selection}' not supported; choose 'last' or 'best'" + ) + self.model_selection = model_selection + self.val_eval_every = val_eval_every + self.val_k = val_k + self.val_metric = val_metric def _build_data_iter(self, pad_index): # FPMC only uses the immediately previous item, so set max_len = 1. @@ -153,8 +176,10 @@ def fit(self, train_set, val_set=None): self.model.parameters(), lr=self.learning_rate, momentum=self.momentum ) + best_val = -float("inf") + best_state = None progress_bar = trange(1, self.n_epochs + 1, disable=not self.verbose) - for _ in progress_bar: + for epoch_id in progress_bar: self.model.train() total_loss = 0.0 cnt = 0 @@ -193,6 +218,29 @@ def fit(self, train_set, val_set=None): cnt += len(in_uids) if inc % 10 == 0 and cnt > 0: progress_bar.set_postfix(loss=(total_loss / cnt)) + + if ( + self.model_selection == "best" + and val_set is not None + and epoch_id % self.val_eval_every == 0 + ): + val_score = self._val_score( + val_set, metric=self.val_metric, k=self.val_k + ) + if val_score is not None and val_score > best_val: + best_val = val_score + best_state = { + n: p.detach().clone() for n, p in self.model.state_dict().items() + } + if self.verbose: + progress_bar.set_postfix( + loss=(total_loss / max(cnt, 1)), + val_recall=val_score, + best=best_val, + ) + + if self.model_selection == "best" and best_state is not None: + self.model.load_state_dict(best_state) return self def score(self, user_idx, history_items, **kwargs): From 24b30df0721da1a099cbc6856a5dc5a1d2578aac Mon Sep 17 00:00:00 2001 From: hieuddo Date: Fri, 29 May 2026 23:04:21 +0800 Subject: [PATCH 07/10] add requirements.txt for sequential models --- cornac/models/bert4rec/requirements.txt | 2 ++ cornac/models/fpmc/requirements.txt | 1 + cornac/models/gpt2rec/requirements.txt | 2 ++ cornac/models/sasrec/requirements.txt | 1 + 4 files changed, 6 insertions(+) create mode 100644 cornac/models/bert4rec/requirements.txt create mode 100644 cornac/models/fpmc/requirements.txt create mode 100644 cornac/models/gpt2rec/requirements.txt create mode 100644 cornac/models/sasrec/requirements.txt diff --git a/cornac/models/bert4rec/requirements.txt b/cornac/models/bert4rec/requirements.txt new file mode 100644 index 000000000..a808c3f6f --- /dev/null +++ b/cornac/models/bert4rec/requirements.txt @@ -0,0 +1,2 @@ +torch>=1.12.0 +transformers>=4.30.0 diff --git a/cornac/models/fpmc/requirements.txt b/cornac/models/fpmc/requirements.txt new file mode 100644 index 000000000..be222b022 --- /dev/null +++ b/cornac/models/fpmc/requirements.txt @@ -0,0 +1 @@ +torch>=1.12.0 diff --git a/cornac/models/gpt2rec/requirements.txt b/cornac/models/gpt2rec/requirements.txt new file mode 100644 index 000000000..a808c3f6f --- /dev/null +++ b/cornac/models/gpt2rec/requirements.txt @@ -0,0 +1,2 @@ +torch>=1.12.0 +transformers>=4.30.0 diff --git a/cornac/models/sasrec/requirements.txt b/cornac/models/sasrec/requirements.txt new file mode 100644 index 000000000..be222b022 --- /dev/null +++ b/cornac/models/sasrec/requirements.txt @@ -0,0 +1 @@ +torch>=1.12.0 From d808b5ba2223e602e1acc3bb288964d5309537f8 Mon Sep 17 00:00:00 2001 From: hieuddo Date: Fri, 29 May 2026 23:07:30 +0800 Subject: [PATCH 08/10] add FPMC to example --- examples/sequential_diginetica.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/examples/sequential_diginetica.py b/examples/sequential_diginetica.py index 5c7dc77f9..6a63d4706 100644 --- a/examples/sequential_diginetica.py +++ b/examples/sequential_diginetica.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -"""Sequential recommendation on Diginetica with GRU4Rec / SASRec / BERT4Rec / GPT2Rec. +"""Sequential recommendation on Diginetica with FPMC / GRU4Rec / SASRec / BERT4Rec / GPT2Rec. Each model is trained twice: @@ -33,7 +33,7 @@ from cornac.datasets import diginetica from cornac.eval_methods import SequentialEvaluation from cornac.metrics import AUC, MRR, NDCG, Recall -from cornac.models import BERT4Rec, GPT2Rec, GRU4Rec, SASRec +from cornac.models import FPMC, BERT4Rec, GPT2Rec, GRU4Rec, SASRec N_EPOCHS = 10 DEVICE = "cuda" # set to "cpu" if no GPU is available @@ -70,6 +70,18 @@ models = [] for setting in ("sbr", "sar"): models += [ + FPMC( + name=f"FPMC-{setting}", + embedding_dim=shared["embedding_dim"], + loss="bpr", + mode=setting, + batch_size=shared["batch_size"], + n_sample=shared["n_sample"], + n_epochs=N_EPOCHS, + device=DEVICE, + verbose=True, + seed=SEED, + ), GRU4Rec( name=f"GRU4Rec-{setting}", layers=[64], From 3db6f09f82a940417b069c45c3fa6282ea258487 Mon Sep 17 00:00:00 2001 From: hieuddo Date: Fri, 29 May 2026 23:29:02 +0800 Subject: [PATCH 09/10] update models in README --- README.md | 8 ++++++-- examples/README.md | 8 +++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 103a03014..6e79d78e1 100644 --- a/README.md +++ b/README.md @@ -158,6 +158,7 @@ The table below lists the recommendation models/algorithms featured in Cornac. E | 2021 | [Bilateral Variational Autoencoder for Collaborative Filtering (BiVAECF)](cornac/models/bivaecf), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.bivaecf.recom_bivaecf), [paper](https://dl.acm.org/doi/pdf/10.1145/3437963.3441759) | Collaborative Filtering / Content-Based | [requirements](cornac/models/bivaecf/requirements.txt), CPU / GPU | [quick-start](https://github.com/PreferredAI/bi-vae), [deep-dive](https://github.com/recommenders-team/recommenders/blob/main/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb) | | [Causal Inference for Visual Debiasing in Visually-Aware Recommendation (CausalRec)](cornac/models/causalrec), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.causalrec.recom_causalrec), [paper](https://arxiv.org/abs/2107.02390) | Content-Based / Image | [requirements](cornac/models/causalrec/requirements.txt), CPU / GPU | [quick-start](examples/causalrec_clothing.py) | | [Explainable Recommendation with Comparative Constraints on Product Aspects (ComparER)](cornac/models/comparer), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.comparer.recom_comparer_sub), [paper](https://dl.acm.org/doi/pdf/10.1145/3437963.3441754) | Explainable | CPU | [quick-start](https://github.com/PreferredAI/ComparER) +| | [GPT2Rec (Inspired by Transformers4Rec)](cornac/models/gpt2rec), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.gpt2rec.recom_gpt2rec), [paper](https://dl.acm.org/doi/10.1145/3460231.3474255) | Sequential | [requirements](cornac/models/gpt2rec/requirements.txt), CPU / GPU | [quick-start](examples/sequential_diginetica.py) | 2020 | [Adversarial Multimedia Recommendation (AMR)](cornac/models/amr), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.amr.recom_amr), [paper](https://ieeexplore.ieee.org/document/8618394) | Content-Based / Image | [requirements](cornac/models/amr/requirements.txt), CPU / GPU | [quick-start](examples/amr_clothing.py) | | [Hybrid Deep Representation Learning of Ratings and Reviews (HRDR)](cornac/models/hrdr), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.hrdr.recom_hrdr), [paper](https://www.sciencedirect.com/science/article/abs/pii/S0925231219313207) | Content-Based / Text | [requirements](cornac/models/hrdr/requirements.txt), CPU / GPU | [quick-start](examples/hrdr_example.py) | | [LightGCN: Simplifying and Powering Graph Convolution Network](cornac/models/lightgcn), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.lightgcn.recom_lightgcn), [paper](https://arxiv.org/pdf/2002.02126.pdf) | Collaborative Filtering | [requirements](cornac/models/lightgcn/requirements.txt), CPU / GPU | [quick-start](examples/lightgcn_example.py) @@ -166,6 +167,7 @@ The table below lists the recommendation models/algorithms featured in Cornac. E | | [Temporal-Item-Frequency-based User-KNN (TIFUKNN)](cornac/models/tifuknn), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.tifuknn.recom_tifuknn), [paper](https://arxiv.org/pdf/2006.00556.pdf) | Next-Basket | CPU | [quick-start](examples/tifuknn_tafeng.py) | | [Variational Autoencoder for Top-N Recommendations (RecVAE)](cornac/models/recvae), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.recvae.recom_recvae), [paper](https://doi.org/10.1145/3336191.3371831) | Collaborative Filtering | [requirements](cornac/models/recvae/requirements.txt), CPU / GPU | [quick-start](examples/recvae_example.py) | 2019 | [Correlation-Sensitive Next-Basket Recommendation (Beacon)](cornac/models/beacon), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#correlation-sensitive-next-basket-recommendation-beacon), [paper](https://www.ijcai.org/proceedings/2019/0389.pdf) | Next-Basket | [requirements](cornac/models/beacon/requirements.txt), CPU / GPU | [quick-start](examples/beacon_tafeng.py) +| | [BERT4Rec: Sequential Recommendation with Bidirectional Encoder Representations from Transformer (BERT4Rec)](cornac/models/bert4rec), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.bert4rec.recom_bert4rec), [paper](https://arxiv.org/pdf/1904.06690.pdf) | Sequential | [requirements](cornac/models/bert4rec/requirements.txt), CPU / GPU | [quick-start](examples/sequential_diginetica.py) | | [Embarrassingly Shallow Autoencoders for Sparse Data (EASEᴿ)](cornac/models/ease), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.ease.recom_ease), [paper](https://arxiv.org/pdf/1905.03375.pdf) | Collaborative Filtering | CPU | [quick-start](examples/ease_movielens.py) | | [Neural Graph Collaborative Filtering (NGCF)](cornac/models/ngcf), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.ngcf.recom_ngcf), [paper](https://arxiv.org/pdf/1905.08108.pdf) | Collaborative Filtering | [requirements](cornac/models/ngcf/requirements.txt), CPU / GPU | [quick-start](examples/ngcf_example.py) | | [Sampler Design for Bayesian Personalized Ranking by Leveraging View Data (VEBPR)](cornac/models/bpr), [paper](https://arxiv.org/pdf/1809.08162) | Collaborative Filtering | CPU | [quick-start](examples/vebpr_example.py) @@ -174,6 +176,7 @@ The table below lists the recommendation models/algorithms featured in Cornac. E | | [Multi-Task Explainable Recommendation (MTER)](cornac/models/mter), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.mter.recom_mter), [paper](https://arxiv.org/pdf/1806.03568.pdf) | Explainable | CPU | [quick-start](examples/mter_example.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/07_explanations.ipynb) | | [Neural Attention Rating Regression with Review-level Explanations (NARRE)](cornac/models/narre), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.narre.recom_narre), [paper](http://www.thuir.cn/group/~YQLiu/publications/WWW2018_CC.pdf) | Explainable / Content-Based | [requirements](cornac/models/narre/requirements.txt), CPU / GPU | [quick-start](examples/narre_example.py) | | [Probabilistic Collaborative Representation Learning (PCRL)](cornac/models/pcrl), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.pcrl.recom_pcrl), [paper](http://www.hadylauw.com/publications/uai18.pdf) | Content-Based / Graph | [requirements](cornac/models/pcrl/requirements.txt), CPU / GPU | [quick-start](examples/pcrl_example.py) +| | [Self-Attentive Sequential Recommendation (SASRec)](cornac/models/sasrec), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.sasrec.recom_sasrec), [paper](https://arxiv.org/pdf/1808.09781.pdf) | Sequential | [requirements](cornac/models/sasrec/requirements.txt), CPU / GPU | [quick-start](examples/sequential_diginetica.py) | | [Variational Autoencoder for Collaborative Filtering (VAECF)](cornac/models/vaecf), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.vaecf.recom_vaecf), [paper](https://arxiv.org/pdf/1802.05814.pdf) | Collaborative Filtering | [requirements](cornac/models/vaecf/requirements.txt), CPU / GPU | [quick-start](examples/vaecf_citeulike.py), [param-search](tutorials/param_search_vaecf.ipynb), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/09_deep_learning.ipynb) | 2017 | [Collaborative Variational Autoencoder (CVAE)](cornac/models/cvae), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.cvae.recom_cvae), [paper](http://eelxpeng.github.io/assets/paper/Collaborative_Variational_Autoencoder.pdf) | Content-Based / Text | [requirements](cornac/models/cvae/requirements.txt), CPU / GPU | [quick-start](examples/cvae_example.py) | | [Conditional Variational Autoencoder for Collaborative Filtering (CVAECF)](cornac/models/cvaecf), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.cvaecf.recom_cvaecf), [paper](https://dl.acm.org/doi/10.1145/3132847.3132972) | Content-Based / Text | [requirements](cornac/models/cvaecf/requirements.txt), CPU / GPU | [quick-start](examples/cvaecf_filmtrust.py) @@ -188,7 +191,7 @@ The table below lists the recommendation models/algorithms featured in Cornac. E | | [Collaborative Ordinal Embedding (COE)](cornac/models/coe), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.coe.recom_coe), [paper](http://www.hadylauw.com/publications/sdm16.pdf) | Collaborative Filtering | [requirements](cornac/models/coe/requirements.txt), CPU / GPU | | | [Convolutional Matrix Factorization (ConvMF)](cornac/models/conv_mf), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.conv_mf.recom_convmf), [paper](http://uclab.khu.ac.kr/resources/publication/C_351.pdf) | Content-Based / Text | [requirements](cornac/models/conv_mf/requirements.txt), CPU / GPU | [quick-start](examples/conv_mf_example.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/09_deep_learning.ipynb) | | [Learning to Rank Features for Recommendation over Multiple Categories (LRPPM)](cornac/models/lrppm), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#learn-to-rank-user-preferences-based-on-phrase-level-sentiment-analysis-across-multiple-categories-lrppm), [paper](https://www.yongfeng.me/attach/sigir16-chen.pdf) | Explainable | CPU | [quick-start](examples/lrppm_example.py) -| | [Session-based Recommendations With Recurrent Neural Networks (GRU4Rec)](cornac/models/gru4rec), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.gru4rec.recom_gru4rec), [paper](https://arxiv.org/pdf/1511.06939.pdf) | Next-Item | [requirements](cornac/models/gru4rec/requirements.txt), CPU / GPU | [quick-start](examples/gru4rec_yoochoose.py) +| | [Session-based Recommendations With Recurrent Neural Networks (GRU4Rec)](cornac/models/gru4rec), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.gru4rec.recom_gru4rec), [paper](https://arxiv.org/pdf/1511.06939.pdf) | Sequential | [requirements](cornac/models/gru4rec/requirements.txt), CPU / GPU | [quick-start](examples/gru4rec_yoochoose.py) | | [Spherical K-means (SKM)](cornac/models/skm), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.skm.recom_skmeans), [paper](https://www.sciencedirect.com/science/article/pii/S092523121501509X) | Collaborative Filtering | CPU | [quick-start](examples/skm_movielens.py) | | [Visual Bayesian Personalized Ranking (VBPR)](cornac/models/vbpr), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.vbpr.recom_vbpr), [paper](https://arxiv.org/pdf/1510.01784.pdf) | Content-Based / Image | [requirements](cornac/models/vbpr/requirements.txt), CPU / GPU | [quick-start](examples/vbpr_tradesy.py), [cross-modality](tutorials/vbpr_text.ipynb), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/05_multimodality.ipynb) | 2015 | [Collaborative Deep Learning (CDL)](cornac/models/cdl), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.cdl.recom_cdl), [paper](https://arxiv.org/pdf/1409.2944.pdf) | Content-Based / Text | [requirements](cornac/models/cdl/requirements.txt), CPU / GPU | [quick-start](examples/cdl_example.py), [deep-dive](https://github.com/lgabs/cornac/blob/luan/describe-gpu-supported-models-readme/tutorials/working_with_auxiliary_data.md) @@ -199,6 +202,7 @@ The table below lists the recommendation models/algorithms featured in Cornac. E | 2013 | [Hidden Factors and Hidden Topics (HFT)](cornac/models/hft), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.hft.recom_hft), [paper](https://cs.stanford.edu/people/jure/pubs/reviews-recsys13.pdf) | Content-Based / Text | CPU | [quick-start](examples/hft_example.py) | 2012 | [Weighted Bayesian Personalized Ranking (WBPR)](cornac/models/bpr), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#weighted-bayesian-personalized-ranking-wbpr), [paper](http://proceedings.mlr.press/v18/gantner12a/gantner12a.pdf) | Collaborative Filtering | CPU | [quick-start](examples/bpr_netflix.py) | 2011 | [Collaborative Topic Regression (CTR)](cornac/models/ctr), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.ctr.recom_ctr), [paper](http://www.cs.columbia.edu/~blei/papers/WangBlei2011.pdf) | Content-Based / Text | CPU | [quick-start](examples/ctr_example_citeulike.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/05_multimodality.ipynb) +| 2010 | [Factorizing Personalized Markov Chains for Next-Basket Recommendation (FPMC)](cornac/models/fpmc), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.fpmc.recom_fpmc), [paper](https://dl.acm.org/doi/10.1145/1772690.1772773) | Sequential | [requirements](cornac/models/fpmc/requirements.txt), CPU / GPU | [quick-start](examples/sequential_diginetica.py) | Earlier | [Baseline Only](cornac/models/baseline_only), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#baseline-only), [paper](http://courses.ischool.berkeley.edu/i290-dm/s11/SECURE/a1-koren.pdf) | Baseline | CPU | [quick-start](examples/svd_example.py) | | [Bayesian Personalized Ranking (BPR)](cornac/models/bpr), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#bayesian-personalized-ranking-bpr) [paper](https://arxiv.org/ftp/arxiv/papers/1205/1205.2618.pdf) | Collaborative Filtering | CPU | [quick-start](examples/bpr_netflix.py), [deep-dive](https://github.com/recommenders-team/recommenders/blob/main/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb) | | [Factorization Machines (FM)](cornac/models/fm), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#factorization-machines-fm), [paper](https://www.csie.ntu.edu.tw/~b97053/paper/Factorization%20Machines%20with%20libFM.pdf) | Collaborative Filtering / Content-Based | Linux, CPU | [quick-start](examples/fm_example.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/06_contextual_awareness.ipynb) @@ -210,7 +214,7 @@ The table below lists the recommendation models/algorithms featured in Cornac. E | | [Most Popular (MostPop)](cornac/models/most_pop), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.most_pop.recom_most_pop), [paper](https://arxiv.org/ftp/arxiv/papers/1205/1205.2618.pdf) | Baseline | CPU | [quick-start](examples/bpr_netflix.py) | | [Non-negative Matrix Factorization (NMF)](cornac/models/nmf), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.nmf.recom_nmf), [paper](http://papers.nips.cc/paper/1861-algorithms-for-non-negative-matrix-factorization.pdf) | Collaborative Filtering | CPU | [quick-start](examples/nmf_example.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/03_matrix_factorization.ipynb) | | [Probabilistic Matrix Factorization (PMF)](cornac/models/pmf), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.pmf.recom_pmf), [paper](https://papers.nips.cc/paper/3208-probabilistic-matrix-factorization.pdf) | Collaborative Filtering | CPU | [quick-start](examples/pmf_ratio.py) -| | [Session Popular (SPop)](cornac/models/spop), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.spop.recom_spop), [paper](https://arxiv.org/pdf/1511.06939.pdf) | Next-Item / Baseline | CPU | [quick-start](examples/spop_yoochoose.py) +| | [Session Popular (SPop)](cornac/models/spop), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.spop.recom_spop), [paper](https://arxiv.org/pdf/1511.06939.pdf) | Sequential / Baseline | CPU | [quick-start](examples/spop_yoochoose.py) | | [Singular Value Decomposition (SVD)](cornac/models/svd), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.svd.recom_svd), [paper](https://people.engr.tamu.edu/huangrh/Spring16/papers_course/matrix_factorization.pdf) | Collaborative Filtering | CPU | [quick-start](examples/svd_example.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/03_matrix_factorization.ipynb) | | [Social Recommendation using PMF (SoRec)](cornac/models/sorec), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#module-cornac.models.sorec.recom_sorec), [paper](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.304.2464&rep=rep1&type=pdf) | Content-Based / Social | CPU | [quick-start](examples/sorec_filmtrust.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/05_multimodality.ipynb) | | [User K-Nearest-Neighbors (UserKNN)](cornac/models/knn), [docs](https://cornac.readthedocs.io/en/stable/api_ref/models.html#user-k-nearest-neighbors-userknn), [paper](https://arxiv.org/pdf/1301.7363.pdf) | Neighborhood-Based | CPU | [quick-start](examples/knn_movielens.py), [deep-dive](https://github.com/PreferredAI/tutorials/blob/master/recommender-systems/02_neighborhood.ipynb) diff --git a/examples/README.md b/examples/README.md index f667087fd..ad3d2ace9 100644 --- a/examples/README.md +++ b/examples/README.md @@ -120,12 +120,18 @@ ---- -## Next-Item Algorithms +## Sequential Recommendations + +### Session-Based With Yoochoose Dataset [spop_yoochoose.py](spop_yoochoose.py) - Next-item recommendation based on item popularity. [gru4rec_yoochoose.py](gru4rec_yoochoose.py) - Example of Session-based Recommendations with Recurrent Neural Networks (GRU4Rec). +### Session-Based & Session-Aware With Diginetica Dataset + +[sequential_diginetica.py](sequential_diginetica.py) - Session-based and session-aware recommendations with FPMC, BERT4Rec, GPT2Rec, GRU4Rec, SASRec models. + ---- ## Next-Basket Algorithms From 71ee633835c3c231e8f6537e0345d9d0a1ec023e Mon Sep 17 00:00:00 2001 From: hieuddo Date: Fri, 29 May 2026 23:42:13 +0800 Subject: [PATCH 10/10] remove unused nonlocal --- cornac/models/seq_utils/iterators.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cornac/models/seq_utils/iterators.py b/cornac/models/seq_utils/iterators.py index 918603e3d..4f09320e8 100644 --- a/cornac/models/seq_utils/iterators.py +++ b/cornac/models/seq_utils/iterators.py @@ -208,7 +208,6 @@ def user_io_iter(train_set, n_sample=0, sample_alpha=0, rng=None, batch_size=1, pool_iter = iter(sequences) def _refill(): - nonlocal end_mask while end_mask.sum() > 0: try: uid, seq, _session_starts = next(pool_iter)