LensKit Recommender 仅为某些用户提供 returns 个结果,否则 returns 个空 DataFrame。为什么会这样?
LensKit Recommender only returns results for some users, otherwise returns empty DataFrame. Why is this happening?
我正在尝试使用 Django 框架实现群组推荐系统,使用 Python 的 LensKit 工具(特别是适应 UserUser 算法的推荐对象)。但是,它在某些情况下(对于某些特定用户)仅 returns 个人推荐,但它总是 returns 对用户组的建议(我创建了一个混合用户,其分数是组成员分数的平均值并请求建议)。下面是我为单个用户和组请求推荐的实现:
from rest_framework import viewsets, status
from .models import Movie, Rating, Customer, Recommendation
from .serializers import MovieSerializer, RatingSerializer, UserSerializer, GroupSerializer, CustomerSerializer, RecommendationSerializer
from rest_framework.response import Response
from rest_framework.decorators import action
from django.contrib.auth.models import User, Group
from rest_framework.authentication import TokenAuthentication
from rest_framework.permissions import IsAuthenticated, AllowAny
from pandas import Series
from lenskit.algorithms import Recommender
from lenskit.algorithms.user_knn import UserUser
import lenskit.datasets as ds
class CustomerViewSet(viewsets.ModelViewSet):
queryset = Customer.objects.all()
serializer_class = CustomerSerializer
authentication_classes = (TokenAuthentication,)
permission_classes = (IsAuthenticated,)
@action(methods=['GET'], detail=False)
def recommendations(self, request):
if 'genre' in request.data:
genre = request.data['genre']
else:
genre = 'All'
user = request.user
ratings = Rating.objects.filter(user=user.id)
user_dict = {} #dictionary of user ratings
name = user.username
#print('name', name)
#Deleting the recommendations resulted from previous requests before generating new ones
Recommendation.objects.filter(name=name).delete()
for rating in ratings:
stars = rating.stars
movieId = int(rating.movie.movieId)
user_dict[movieId] = stars
#print(user_dict)
data = ds.MovieLens('datasets/')
user_user = UserUser(15, min_nbrs=3)
algo = Recommender.adapt(user_user)
algo.fit(data.ratings)
#print(algo)
#print(data.ratings)
"""
Recommendations are generated based on a user that is not included in a training set (hence, their id is -1),
and a pandas.Series obtained from the ratings dictionary.
"""
recs = algo.recommend(user=-1, ratings=Series(user_dict))
#print(recs)
#Parsing the resulting DataFrame and saving recommendations as objects
for index, row in recs.iterrows():
#print(row['item'])
movieId = row['item']
stars = row['score']
if genre == 'All':
Recommendation.objects.create(name=name, movieId=movieId, pred_stars=stars)
if genre != 'All' and genre in movie.genres:
Recommendation.objects.create(name=name, movieId=movieId, pred_stars=stars)
#Stopping at 20 recommended items
count = Recommendation.objects.filter(name=name).count()
#print('count', count)
if count >= 20:
break
#Returning the movies ordered by the predicted ratings for them
recs = Recommendation.objects.filter(name=name).order_by('-pred_stars')
rec_movies = []
for rec in recs:
mov = Movie.objects.get(movieId=rec.movieId)
rec_movies.append(mov)
serializer = MovieSerializer(rec_movies, many=True)
return Response(serializer.data, status=status.HTTP_200_OK)
class GroupViewSet(viewsets.ModelViewSet):
queryset = Group.objects.all()
serializer_class = GroupSerializer
authentication_classes = (TokenAuthentication,)
permission_classes = (IsAuthenticated, )
@action(methods=['GET'], detail=True)
def recommendations(self, request, pk=None):
if 'genre' in request.data:
genre = request.data['genre']
else:
genre = 'All'
#Checking if the user belongs to the group
group = Group.objects.get(id=pk)
users = group.user_set.all()
#print(users)
user = request.user
#print(user)
if user in users:
# Deleting the recommendations resulted from previous requests before generating new ones
Recommendation.objects.filter(name=group.name).delete()
rating_dict = {} #a dictionary of average ratings for the group
for user in users:
ratings = Rating.objects.filter(user=user.id)
for rating in ratings:
stars = rating.stars
movieId = int(rating.movie.movieId)
"""
If the movie has already been rated by another member (i.e. a rating for it exists in the
dictionary), an average rating is computed
"""
if movieId in rating_dict:
x = rating_dict[movieId][0]
y = rating_dict[movieId][1]
x = (x * y + stars) / (y+1)
y += 1
rating_dict[movieId][0] = x
rating_dict[movieId][1] = y
#If not, the individual rating is simply insteted into the dictionary
else:
rating_dict[movieId] = [stars, 1]
#Training the ML algorithm
data = ds.MovieLens('datasets/')
user_user = UserUser(15, min_nbrs=3)
algo = Recommender.adapt(user_user)
algo.fit(data.ratings)
for key in rating_dict.keys():
x = rating_dict[key][0]
rating_dict[key] = x
#print(rating_dict)
#Requesting recommendations for the hybrid user
recs = algo.recommend(user=-1, ratings=Series(rating_dict))
#print(recs)
genre = request.data['genre']
name = group.name
#Parsing the resulting DataFrame and saving the recommendations as objects
for index, row in recs.iterrows():
print(row['item'])
movie = Movie.objects.get(movieId=str(int(row['item'])))
stars = row['score']
name = group.name
if genre == 'All':
Recommendation.objects.create(name=name, movieId=movie.movieId, pred_stars=stars)
if genre != 'All' and genre in movie.genres:
Recommendation.objects.create(name=name, movieId=movie.movieId, pred_stars=stars)
#Stopping at 20 recommendations
count = Recommendation.objects.filter(name=name).count()
print('count', count)
if count >= 20:
break
#Returning movies ordered by the predicted score for the group
recs = Recommendation.objects.filter(name=name).order_by('-pred_stars')
rec_movies = []
for rec in recs:
mov = Movie.objects.get(movieId=rec.movieId)
rec_movies.append(mov)
serializer = MovieSerializer(rec_movies, many=True)
return Response(serializer.data, status=status.HTTP_200_OK)
else:
response = {'message': 'You are not a member of this group'}
return Response(response, status=status.HTTP_400_BAD_REQUEST)
下面是工作响应的示例:
[
{
"id": 17521,
"movieId": "318",
"title": "Shawshank Redemption, The (1994)",
"genres": "Crime|Drama",
"link": "https://www.imdb.com/title/tt0111161/",
"average_rating": 4.487138263665595,
"no_ratings": 311,
"poster": "/default-movie.jpg"
},
{
"id": 17503,
"movieId": "296",
"title": "Pulp Fiction (1994)",
"genres": "Comedy|Crime|Drama|Thriller",
"link": "https://www.imdb.com/title/tt0110912/",
"average_rating": 4.256172839506172,
"no_ratings": 324,
"poster": "/default-movie.jpg"
},
...
]
无效响应:
[]
在后一种情况下,打印 Recommender 返回的 DataFrame 显示:
Empty DataFrame
Columns: [item, score]
Index: []
我不确定我做错了什么。有人可以帮忙吗?
导致此问题的最可能原因是用户-用户推荐器无法构建足够多的可行社区来提供推荐。这是基于社区的推荐的缺点。
解决方案要么切换到始终可以向具有某些评级的用户推荐的算法(例如矩阵分解算法之一),and/or 使用后备算法,例如 Popular
在个性化协同过滤器无法推荐时进行推荐。
(另一种解决方案是为 LensKit 实施各种冷启动推荐器或基于内容的推荐器,但 none 目前由该项目提供。)
我正在尝试使用 Django 框架实现群组推荐系统,使用 Python 的 LensKit 工具(特别是适应 UserUser 算法的推荐对象)。但是,它在某些情况下(对于某些特定用户)仅 returns 个人推荐,但它总是 returns 对用户组的建议(我创建了一个混合用户,其分数是组成员分数的平均值并请求建议)。下面是我为单个用户和组请求推荐的实现:
from rest_framework import viewsets, status
from .models import Movie, Rating, Customer, Recommendation
from .serializers import MovieSerializer, RatingSerializer, UserSerializer, GroupSerializer, CustomerSerializer, RecommendationSerializer
from rest_framework.response import Response
from rest_framework.decorators import action
from django.contrib.auth.models import User, Group
from rest_framework.authentication import TokenAuthentication
from rest_framework.permissions import IsAuthenticated, AllowAny
from pandas import Series
from lenskit.algorithms import Recommender
from lenskit.algorithms.user_knn import UserUser
import lenskit.datasets as ds
class CustomerViewSet(viewsets.ModelViewSet):
queryset = Customer.objects.all()
serializer_class = CustomerSerializer
authentication_classes = (TokenAuthentication,)
permission_classes = (IsAuthenticated,)
@action(methods=['GET'], detail=False)
def recommendations(self, request):
if 'genre' in request.data:
genre = request.data['genre']
else:
genre = 'All'
user = request.user
ratings = Rating.objects.filter(user=user.id)
user_dict = {} #dictionary of user ratings
name = user.username
#print('name', name)
#Deleting the recommendations resulted from previous requests before generating new ones
Recommendation.objects.filter(name=name).delete()
for rating in ratings:
stars = rating.stars
movieId = int(rating.movie.movieId)
user_dict[movieId] = stars
#print(user_dict)
data = ds.MovieLens('datasets/')
user_user = UserUser(15, min_nbrs=3)
algo = Recommender.adapt(user_user)
algo.fit(data.ratings)
#print(algo)
#print(data.ratings)
"""
Recommendations are generated based on a user that is not included in a training set (hence, their id is -1),
and a pandas.Series obtained from the ratings dictionary.
"""
recs = algo.recommend(user=-1, ratings=Series(user_dict))
#print(recs)
#Parsing the resulting DataFrame and saving recommendations as objects
for index, row in recs.iterrows():
#print(row['item'])
movieId = row['item']
stars = row['score']
if genre == 'All':
Recommendation.objects.create(name=name, movieId=movieId, pred_stars=stars)
if genre != 'All' and genre in movie.genres:
Recommendation.objects.create(name=name, movieId=movieId, pred_stars=stars)
#Stopping at 20 recommended items
count = Recommendation.objects.filter(name=name).count()
#print('count', count)
if count >= 20:
break
#Returning the movies ordered by the predicted ratings for them
recs = Recommendation.objects.filter(name=name).order_by('-pred_stars')
rec_movies = []
for rec in recs:
mov = Movie.objects.get(movieId=rec.movieId)
rec_movies.append(mov)
serializer = MovieSerializer(rec_movies, many=True)
return Response(serializer.data, status=status.HTTP_200_OK)
class GroupViewSet(viewsets.ModelViewSet):
queryset = Group.objects.all()
serializer_class = GroupSerializer
authentication_classes = (TokenAuthentication,)
permission_classes = (IsAuthenticated, )
@action(methods=['GET'], detail=True)
def recommendations(self, request, pk=None):
if 'genre' in request.data:
genre = request.data['genre']
else:
genre = 'All'
#Checking if the user belongs to the group
group = Group.objects.get(id=pk)
users = group.user_set.all()
#print(users)
user = request.user
#print(user)
if user in users:
# Deleting the recommendations resulted from previous requests before generating new ones
Recommendation.objects.filter(name=group.name).delete()
rating_dict = {} #a dictionary of average ratings for the group
for user in users:
ratings = Rating.objects.filter(user=user.id)
for rating in ratings:
stars = rating.stars
movieId = int(rating.movie.movieId)
"""
If the movie has already been rated by another member (i.e. a rating for it exists in the
dictionary), an average rating is computed
"""
if movieId in rating_dict:
x = rating_dict[movieId][0]
y = rating_dict[movieId][1]
x = (x * y + stars) / (y+1)
y += 1
rating_dict[movieId][0] = x
rating_dict[movieId][1] = y
#If not, the individual rating is simply insteted into the dictionary
else:
rating_dict[movieId] = [stars, 1]
#Training the ML algorithm
data = ds.MovieLens('datasets/')
user_user = UserUser(15, min_nbrs=3)
algo = Recommender.adapt(user_user)
algo.fit(data.ratings)
for key in rating_dict.keys():
x = rating_dict[key][0]
rating_dict[key] = x
#print(rating_dict)
#Requesting recommendations for the hybrid user
recs = algo.recommend(user=-1, ratings=Series(rating_dict))
#print(recs)
genre = request.data['genre']
name = group.name
#Parsing the resulting DataFrame and saving the recommendations as objects
for index, row in recs.iterrows():
print(row['item'])
movie = Movie.objects.get(movieId=str(int(row['item'])))
stars = row['score']
name = group.name
if genre == 'All':
Recommendation.objects.create(name=name, movieId=movie.movieId, pred_stars=stars)
if genre != 'All' and genre in movie.genres:
Recommendation.objects.create(name=name, movieId=movie.movieId, pred_stars=stars)
#Stopping at 20 recommendations
count = Recommendation.objects.filter(name=name).count()
print('count', count)
if count >= 20:
break
#Returning movies ordered by the predicted score for the group
recs = Recommendation.objects.filter(name=name).order_by('-pred_stars')
rec_movies = []
for rec in recs:
mov = Movie.objects.get(movieId=rec.movieId)
rec_movies.append(mov)
serializer = MovieSerializer(rec_movies, many=True)
return Response(serializer.data, status=status.HTTP_200_OK)
else:
response = {'message': 'You are not a member of this group'}
return Response(response, status=status.HTTP_400_BAD_REQUEST)
下面是工作响应的示例:
[
{
"id": 17521,
"movieId": "318",
"title": "Shawshank Redemption, The (1994)",
"genres": "Crime|Drama",
"link": "https://www.imdb.com/title/tt0111161/",
"average_rating": 4.487138263665595,
"no_ratings": 311,
"poster": "/default-movie.jpg"
},
{
"id": 17503,
"movieId": "296",
"title": "Pulp Fiction (1994)",
"genres": "Comedy|Crime|Drama|Thriller",
"link": "https://www.imdb.com/title/tt0110912/",
"average_rating": 4.256172839506172,
"no_ratings": 324,
"poster": "/default-movie.jpg"
},
...
]
无效响应:
[]
在后一种情况下,打印 Recommender 返回的 DataFrame 显示:
Empty DataFrame
Columns: [item, score]
Index: []
我不确定我做错了什么。有人可以帮忙吗?
导致此问题的最可能原因是用户-用户推荐器无法构建足够多的可行社区来提供推荐。这是基于社区的推荐的缺点。
解决方案要么切换到始终可以向具有某些评级的用户推荐的算法(例如矩阵分解算法之一),and/or 使用后备算法,例如 Popular
在个性化协同过滤器无法推荐时进行推荐。
(另一种解决方案是为 LensKit 实施各种冷启动推荐器或基于内容的推荐器,但 none 目前由该项目提供。)