KeyError: “likeCount" and IndexError: list index out of range in Python
KeyError: “likeCount" and IndexError: list index out of range in Python
我正在使用 Python jupyter notebook 来获取统计数据,例如总喜欢数、不喜欢数、某些特定频道的所有视频(约 14k 视频)的总观看次数。我使用了在 Gitbub 中找到的这段代码。
我能够 运行 代码直到最后一部分
当我尝试 运行 下面的代码行时,我收到错误消息“KeyError: 'commentCount'”。与“likeCount”、“dislikeCount”等相同。(错误请打开URL)
for i in range(len(allVideos)):
i += 1
title.append((allVideos[i])['snippet']['title'])
publishedDate.append((allVideos[i])['snippet']['publishedAt'])
video_description.append((allVideos[i])['snippet']['description'])
liked.append(int((stats[i])['statistics']['likeCount']))
disliked.append(int((stats[i])['statistics']['dislikeCount']))
views.append(int((stats[i])['statistics']['viewCount']))
comment.append(int((stats[i])['statistics']['commentCount']))
videoid.append(allVideos[i]['snippet']['resourceId']['videoId'])
KeyError: 'commentCount
我知道这个问题可能是由于;当某些视频的评论、喜欢和不喜欢部分被禁用时。我该如何解决这个问题?
我评论了上面提到的一些指标行并重新运行了代码。我最终收到以下错误消息“IndexError:列表索引超出范围”
for i in range(len(allVideos)):
i += 1
title.append((allVideos[i])['snippet']['title'])
#publishedDate.append((allVideos[i])['snippet']['publishedAt'])
#video_description.append((allVideos[i])['snippet']['description'])
#liked.append(int((stats[i])['statistics']['likeCount']))
#disliked.append(int((stats[i])['statistics']['dislikeCount']))
#views.append(int((stats[i])['statistics']['viewCount']))
#comment.append(int((stats[i])['statistics']['commentCount']))
#videoid.append(allVideos[i]['snippet']['resourceId']['videoId'])
IndexError: list index out of range
周围任何聪明的人都可以帮助我解决这个问题。我尝试了不同的方法来修复它但没有成功 ??
你得到 list index out of range
因为你使用 i += 1
。你不需要它。
你也可以学习使用 for
-loop 没有 range(len(...))
like
for video in allVideos:
title.append( video['snippet']['title'] )
publishedDate.append( video['snippet']['publishedAt'] )
如果你需要在 for
循环中使用数字,那么你可以使用 enumerate
for number, video in enumerate(allVideos):
title.append( video['snippet']['title'] )
publishedDate.append( video['snippet']['publishedAt'] )
comment.append( int(stats[number]['statistics']['commentCount']) )
liked.append( int(stats[number]['statistics']['likeCount']) )
但在您的代码中,您可以使用 zip()
来更简单
for video, stat in zip(allVideos, stats):
title.append( video['snippet']['title'] )
publishedDate.append( video['snippet']['publishedAt'] )
comment.append( int(stat['statistics']['commentCount']) )
liked.append( int(stat['statistics']['likeCount']) )
如果您首先获取 ['snippet']
和 ['statistics']
并分配给变量
,您可以使其更具可读性
for video, stat in zip(allVideos, stats):
v = video['snippet']
title.append( v['title'] )
publishedDate.append( v['publishedAt'] )
s = stat['statistics']
comment.append( int(s['commentCount']) )
liked.append( int(s['likeCount']) )
如果你得到 KeyError 那么你应该使用 if/else
s = stat['statistics']
if 'commentCount' in s:
comment.append( int(s['commentCount']) )
else:
comment.append( 0 )
if 'likeCount' in s:
liked.append( int(s['likeCount']) )
else:
liked.append( 0 )
或更短使用 .get(key, default value)
s = stat['statistics']
comment.append( int(s.get('commentCount', 0)) )
liked.append( int(s.get('likeCount', 0)) )
完整版:
for video, stat in zip(allVideos, stats):
v = video['snippet']
title.append( v['title'] )
publishedDate.append( v['publishedAt'] )
video_description.append( v['description'] )
videoid.append( v['resourceId']['videoId'] )
s = stat['statistics']
liked.append( int(s.get('likeCount',0)) )
disliked.append( int(s.get('dislikeCount',0)) )
views.append( int(s.get('viewCount',0)) )
comment.append( int(s.get('commentCount',0)) )
但是如果你想将它添加到 DataFrame,那么你可以在没有所有这些列表 title
等的情况下使它更简单
all_rows = []
for video, stat in zip(allVideos, stats):
v = video['snippet']
s = stat['statistics']
row = [
v['title'],
v['resourceId']['videoId'],
v['description'],
v['publishedAt'],
int(s.get('likeCount',0)),
int(s.get('dislikeCount',0)),
int(s.get('viewCount',0)),
int(s.get('commentCount',0)),
]
all_rows.append(row)
# - after `for`-loop -
df = pd.DataFrame(
all_rows,
columns=['title', 'videoIDS', 'video_description', 'publishedDate', 'likes', 'dislikes', 'views', 'comment']
)
编辑:
完整的工作代码:
from googleapiclient.discovery import build
import pandas as pd
youTubeApiKey = "AIzaSyCoBcCAxIGkTf5WKxAiXJu48APdyQjqU0I"
youtube = build('youtube', 'v3', developerKey=youTubeApiKey)
snippets = youtube.search().list(part="snippet", type="channel", q="nptelhrd").execute()
print('len(snippets):', len(snippets))
channel_id = snippets['items'][0]['snippet']['channelId']
print('channel_id:', channel_id)
stats = youtube.channels().list(part="statistics", id = channel_id).execute()
print('len(stats):', len(stats))
#status = youtube.channels().list(id=channel_id, part='status').execute()
#print('len(status):', len(status))
content = youtube.channels().list(id=channel_id, part='contentDetails').execute()
print('len(content):', len(content))
upload_id = content['items'][0]['contentDetails']['relatedPlaylists']['uploads']
print('upload_id:', upload_id)
all_videos = []
next_page_token = None
number = 0
while True:
number +=1
print('page', number)
res = youtube.playlistItems().list(playlistId=upload_id, maxResults=50, part='snippet', pageToken=next_page_token).execute()
all_videos += res['items']
next_page_token = res.get('nextPageToken')
if next_page_token is None:
break
print('len(all_videos):', len(all_videos))
video_ids = list(map(lambda x: x['snippet']['resourceId']['videoId'], all_videos))
print('len(video_ids):', len(video_ids))
stats = []
for i in range(0, len(video_ids), 40):
res = youtube.videos().list(id=','.join(video_ids[i:i+40]), part='statistics').execute()
stats += res['items']
print('len(stats):', len(stats))
all_rows = []
number = 0
for video, stat in zip(all_videos, stats):
number +=1
print('row', number)
v = video['snippet']
s = stat['statistics']
row = [
v['title'],
v['resourceId']['videoId'],
v['description'],
v['publishedAt'],
int(s.get('likeCount',0)),
int(s.get('dislikeCount',0)),
int(s.get('viewCount',0)),
int(s.get('commentCount',0)),
]
all_rows.append(row)
# - after `for`-loop -
df = pd.DataFrame(all_rows, columns=['title', 'videoIDS', 'video_description', 'publishedDate', 'likes', 'dislikes', 'views', 'comment'])
print(df.head())
结果:
title videoIDS ... views comment
0 NPTEL Awareness workshop in association with P... TCMQ2NEEiRo ... 7282 0
1 Cayley-Hamilton theorem WROFJ15hk00 ... 3308 4
2 Recap of matrix norms and Levy-Desplanques the... WsO_s8dNfVI ... 675 1
3 Convergent matrices, Banach lemma PVGeabmeLDQ ... 676 2
4 Schur's triangularization theorem UbDwzSnS0Y0 ... 436 0
[5 rows x 8 columns]
我正在使用 Python jupyter notebook 来获取统计数据,例如总喜欢数、不喜欢数、某些特定频道的所有视频(约 14k 视频)的总观看次数。我使用了在 Gitbub 中找到的这段代码。 我能够 运行 代码直到最后一部分 当我尝试 运行 下面的代码行时,我收到错误消息“KeyError: 'commentCount'”。与“likeCount”、“dislikeCount”等相同。(错误请打开URL)
for i in range(len(allVideos)):
i += 1
title.append((allVideos[i])['snippet']['title'])
publishedDate.append((allVideos[i])['snippet']['publishedAt'])
video_description.append((allVideos[i])['snippet']['description'])
liked.append(int((stats[i])['statistics']['likeCount']))
disliked.append(int((stats[i])['statistics']['dislikeCount']))
views.append(int((stats[i])['statistics']['viewCount']))
comment.append(int((stats[i])['statistics']['commentCount']))
videoid.append(allVideos[i]['snippet']['resourceId']['videoId'])
KeyError: 'commentCount
我知道这个问题可能是由于;当某些视频的评论、喜欢和不喜欢部分被禁用时。我该如何解决这个问题?
我评论了上面提到的一些指标行并重新运行了代码。我最终收到以下错误消息“IndexError:列表索引超出范围”
for i in range(len(allVideos)):
i += 1
title.append((allVideos[i])['snippet']['title'])
#publishedDate.append((allVideos[i])['snippet']['publishedAt'])
#video_description.append((allVideos[i])['snippet']['description'])
#liked.append(int((stats[i])['statistics']['likeCount']))
#disliked.append(int((stats[i])['statistics']['dislikeCount']))
#views.append(int((stats[i])['statistics']['viewCount']))
#comment.append(int((stats[i])['statistics']['commentCount']))
#videoid.append(allVideos[i]['snippet']['resourceId']['videoId'])
IndexError: list index out of range
周围任何聪明的人都可以帮助我解决这个问题。我尝试了不同的方法来修复它但没有成功 ??
你得到 list index out of range
因为你使用 i += 1
。你不需要它。
你也可以学习使用 for
-loop 没有 range(len(...))
like
for video in allVideos:
title.append( video['snippet']['title'] )
publishedDate.append( video['snippet']['publishedAt'] )
如果你需要在 for
循环中使用数字,那么你可以使用 enumerate
for number, video in enumerate(allVideos):
title.append( video['snippet']['title'] )
publishedDate.append( video['snippet']['publishedAt'] )
comment.append( int(stats[number]['statistics']['commentCount']) )
liked.append( int(stats[number]['statistics']['likeCount']) )
但在您的代码中,您可以使用 zip()
for video, stat in zip(allVideos, stats):
title.append( video['snippet']['title'] )
publishedDate.append( video['snippet']['publishedAt'] )
comment.append( int(stat['statistics']['commentCount']) )
liked.append( int(stat['statistics']['likeCount']) )
如果您首先获取 ['snippet']
和 ['statistics']
并分配给变量
for video, stat in zip(allVideos, stats):
v = video['snippet']
title.append( v['title'] )
publishedDate.append( v['publishedAt'] )
s = stat['statistics']
comment.append( int(s['commentCount']) )
liked.append( int(s['likeCount']) )
如果你得到 KeyError 那么你应该使用 if/else
s = stat['statistics']
if 'commentCount' in s:
comment.append( int(s['commentCount']) )
else:
comment.append( 0 )
if 'likeCount' in s:
liked.append( int(s['likeCount']) )
else:
liked.append( 0 )
或更短使用 .get(key, default value)
s = stat['statistics']
comment.append( int(s.get('commentCount', 0)) )
liked.append( int(s.get('likeCount', 0)) )
完整版:
for video, stat in zip(allVideos, stats):
v = video['snippet']
title.append( v['title'] )
publishedDate.append( v['publishedAt'] )
video_description.append( v['description'] )
videoid.append( v['resourceId']['videoId'] )
s = stat['statistics']
liked.append( int(s.get('likeCount',0)) )
disliked.append( int(s.get('dislikeCount',0)) )
views.append( int(s.get('viewCount',0)) )
comment.append( int(s.get('commentCount',0)) )
但是如果你想将它添加到 DataFrame,那么你可以在没有所有这些列表 title
等的情况下使它更简单
all_rows = []
for video, stat in zip(allVideos, stats):
v = video['snippet']
s = stat['statistics']
row = [
v['title'],
v['resourceId']['videoId'],
v['description'],
v['publishedAt'],
int(s.get('likeCount',0)),
int(s.get('dislikeCount',0)),
int(s.get('viewCount',0)),
int(s.get('commentCount',0)),
]
all_rows.append(row)
# - after `for`-loop -
df = pd.DataFrame(
all_rows,
columns=['title', 'videoIDS', 'video_description', 'publishedDate', 'likes', 'dislikes', 'views', 'comment']
)
编辑:
完整的工作代码:
from googleapiclient.discovery import build
import pandas as pd
youTubeApiKey = "AIzaSyCoBcCAxIGkTf5WKxAiXJu48APdyQjqU0I"
youtube = build('youtube', 'v3', developerKey=youTubeApiKey)
snippets = youtube.search().list(part="snippet", type="channel", q="nptelhrd").execute()
print('len(snippets):', len(snippets))
channel_id = snippets['items'][0]['snippet']['channelId']
print('channel_id:', channel_id)
stats = youtube.channels().list(part="statistics", id = channel_id).execute()
print('len(stats):', len(stats))
#status = youtube.channels().list(id=channel_id, part='status').execute()
#print('len(status):', len(status))
content = youtube.channels().list(id=channel_id, part='contentDetails').execute()
print('len(content):', len(content))
upload_id = content['items'][0]['contentDetails']['relatedPlaylists']['uploads']
print('upload_id:', upload_id)
all_videos = []
next_page_token = None
number = 0
while True:
number +=1
print('page', number)
res = youtube.playlistItems().list(playlistId=upload_id, maxResults=50, part='snippet', pageToken=next_page_token).execute()
all_videos += res['items']
next_page_token = res.get('nextPageToken')
if next_page_token is None:
break
print('len(all_videos):', len(all_videos))
video_ids = list(map(lambda x: x['snippet']['resourceId']['videoId'], all_videos))
print('len(video_ids):', len(video_ids))
stats = []
for i in range(0, len(video_ids), 40):
res = youtube.videos().list(id=','.join(video_ids[i:i+40]), part='statistics').execute()
stats += res['items']
print('len(stats):', len(stats))
all_rows = []
number = 0
for video, stat in zip(all_videos, stats):
number +=1
print('row', number)
v = video['snippet']
s = stat['statistics']
row = [
v['title'],
v['resourceId']['videoId'],
v['description'],
v['publishedAt'],
int(s.get('likeCount',0)),
int(s.get('dislikeCount',0)),
int(s.get('viewCount',0)),
int(s.get('commentCount',0)),
]
all_rows.append(row)
# - after `for`-loop -
df = pd.DataFrame(all_rows, columns=['title', 'videoIDS', 'video_description', 'publishedDate', 'likes', 'dislikes', 'views', 'comment'])
print(df.head())
结果:
title videoIDS ... views comment
0 NPTEL Awareness workshop in association with P... TCMQ2NEEiRo ... 7282 0
1 Cayley-Hamilton theorem WROFJ15hk00 ... 3308 4
2 Recap of matrix norms and Levy-Desplanques the... WsO_s8dNfVI ... 675 1
3 Convergent matrices, Banach lemma PVGeabmeLDQ ... 676 2
4 Schur's triangularization theorem UbDwzSnS0Y0 ... 436 0
[5 rows x 8 columns]