如何优化 LightFM 的超参数?
How do I optimize the hyperparameters of LightFM?
我在我的数据集上使用 LightFM 推荐库,它给出了下图中的结果。
NUM_THREADS = 4
NUM_COMPONENTS = 30
NUM_EPOCHS = 5
ITEM_ALPHA = 1e-6
LEARNING_RATE = 0.005
LEARNING_SCHEDULE = 'adagrad'
RANDOM_SEED = 29031994
warp_model = LightFM(loss='warp',
learning_rate=LEARNING_RATE,
learning_schedule=LEARNING_SCHEDULE,
item_alpha=ITEM_ALPHA,
no_components=NUM_COMPONENTS,
random_state=RANDOM_SEED)
bpr_model = LightFM(loss='bpr',
learning_rate=LEARNING_RATE,
learning_schedule=LEARNING_SCHEDULE,
item_alpha=ITEM_ALPHA,
no_components=NUM_COMPONENTS,
random_state=RANDOM_SEED)
我的五官形状如下:
如何优化我的超参数以提高曲线下面积 (AUC) 分数?
您可以在 sklearn docs.
中找到一个很好的超参数优化通用指南
一种可用于优化 LightFM 模型的简单但有效的技术是 random search。大致包括以下步骤:
- 将您的数据分成训练集、验证集和测试集。
- 为您要优化的每个超参数定义一个分布。例如,如果您正在优化学习率,则可以使用均值为 0.05 的指数分布;如果你正在优化损失函数,你可以从
['warp', 'bpr', 'warp-kos']
. 中均匀采样
- 在优化的每次迭代中,对所有超参数进行采样并使用它们在训练数据上拟合模型。评估模型在验证集上的表现。
- 执行一系列优化步骤后,select 验证性能最佳的那个。
要衡量最终模型的性能,您应该使用测试集:只需评估测试集上的最佳验证模型。
以下脚本说明了这一点:
import itertools
import numpy as np
from lightfm import LightFM
from lightfm.evaluation import auc_score
def sample_hyperparameters():
"""
Yield possible hyperparameter choices.
"""
while True:
yield {
"no_components": np.random.randint(16, 64),
"learning_schedule": np.random.choice(["adagrad", "adadelta"]),
"loss": np.random.choice(["bpr", "warp", "warp-kos"]),
"learning_rate": np.random.exponential(0.05),
"item_alpha": np.random.exponential(1e-8),
"user_alpha": np.random.exponential(1e-8),
"max_sampled": np.random.randint(5, 15),
"num_epochs": np.random.randint(5, 50),
}
def random_search(train, test, num_samples=10, num_threads=1):
"""
Sample random hyperparameters, fit a LightFM model, and evaluate it
on the test set.
Parameters
----------
train: np.float32 coo_matrix of shape [n_users, n_items]
Training data.
test: np.float32 coo_matrix of shape [n_users, n_items]
Test data.
num_samples: int, optional
Number of hyperparameter choices to evaluate.
Returns
-------
generator of (auc_score, hyperparameter dict, fitted model)
"""
for hyperparams in itertools.islice(sample_hyperparameters(), num_samples):
num_epochs = hyperparams.pop("num_epochs")
model = LightFM(**hyperparams)
model.fit(train, epochs=num_epochs, num_threads=num_threads)
score = auc_score(model, test, train_interactions=train, num_threads=num_threads).mean()
hyperparams["num_epochs"] = num_epochs
yield (score, hyperparams, model)
if __name__ == "__main__":
from lightfm.datasets import fetch_movielens
data = fetch_movielens()
train = data["train"]
test = data["test"]
(score, hyperparams, model) = max(random_search(train, test, num_threads=2), key=lambda x: x[0])
print("Best score {} at {}".format(score, hyperparams))
我在我的数据集上使用 LightFM 推荐库,它给出了下图中的结果。
NUM_THREADS = 4
NUM_COMPONENTS = 30
NUM_EPOCHS = 5
ITEM_ALPHA = 1e-6
LEARNING_RATE = 0.005
LEARNING_SCHEDULE = 'adagrad'
RANDOM_SEED = 29031994
warp_model = LightFM(loss='warp',
learning_rate=LEARNING_RATE,
learning_schedule=LEARNING_SCHEDULE,
item_alpha=ITEM_ALPHA,
no_components=NUM_COMPONENTS,
random_state=RANDOM_SEED)
bpr_model = LightFM(loss='bpr',
learning_rate=LEARNING_RATE,
learning_schedule=LEARNING_SCHEDULE,
item_alpha=ITEM_ALPHA,
no_components=NUM_COMPONENTS,
random_state=RANDOM_SEED)
我的五官形状如下:
如何优化我的超参数以提高曲线下面积 (AUC) 分数?
您可以在 sklearn docs.
中找到一个很好的超参数优化通用指南一种可用于优化 LightFM 模型的简单但有效的技术是 random search。大致包括以下步骤:
- 将您的数据分成训练集、验证集和测试集。
- 为您要优化的每个超参数定义一个分布。例如,如果您正在优化学习率,则可以使用均值为 0.05 的指数分布;如果你正在优化损失函数,你可以从
['warp', 'bpr', 'warp-kos']
. 中均匀采样
- 在优化的每次迭代中,对所有超参数进行采样并使用它们在训练数据上拟合模型。评估模型在验证集上的表现。
- 执行一系列优化步骤后,select 验证性能最佳的那个。
要衡量最终模型的性能,您应该使用测试集:只需评估测试集上的最佳验证模型。
以下脚本说明了这一点:
import itertools
import numpy as np
from lightfm import LightFM
from lightfm.evaluation import auc_score
def sample_hyperparameters():
"""
Yield possible hyperparameter choices.
"""
while True:
yield {
"no_components": np.random.randint(16, 64),
"learning_schedule": np.random.choice(["adagrad", "adadelta"]),
"loss": np.random.choice(["bpr", "warp", "warp-kos"]),
"learning_rate": np.random.exponential(0.05),
"item_alpha": np.random.exponential(1e-8),
"user_alpha": np.random.exponential(1e-8),
"max_sampled": np.random.randint(5, 15),
"num_epochs": np.random.randint(5, 50),
}
def random_search(train, test, num_samples=10, num_threads=1):
"""
Sample random hyperparameters, fit a LightFM model, and evaluate it
on the test set.
Parameters
----------
train: np.float32 coo_matrix of shape [n_users, n_items]
Training data.
test: np.float32 coo_matrix of shape [n_users, n_items]
Test data.
num_samples: int, optional
Number of hyperparameter choices to evaluate.
Returns
-------
generator of (auc_score, hyperparameter dict, fitted model)
"""
for hyperparams in itertools.islice(sample_hyperparameters(), num_samples):
num_epochs = hyperparams.pop("num_epochs")
model = LightFM(**hyperparams)
model.fit(train, epochs=num_epochs, num_threads=num_threads)
score = auc_score(model, test, train_interactions=train, num_threads=num_threads).mean()
hyperparams["num_epochs"] = num_epochs
yield (score, hyperparams, model)
if __name__ == "__main__":
from lightfm.datasets import fetch_movielens
data = fetch_movielens()
train = data["train"]
test = data["test"]
(score, hyperparams, model) = max(random_search(train, test, num_threads=2), key=lambda x: x[0])
print("Best score {} at {}".format(score, hyperparams))