TypeError: 'NoneType' object is not iterable - text summarisation with keras
TypeError: 'NoneType' object is not iterable - text summarisation with keras
我是机器学习的新手,我正在尝试通过 tutorial for text summarization using Keras.
我已经到了矢量化数据的地步,但是我遇到了一个错误,我自己已经尝试了一切。我真的很想让这个程序运行,并希望有人能阐明为什么它会给我这个错误以及我该如何修复它。我确实看过以前的帖子,但到目前为止 none 对我有所帮助,谢谢。这是我的代码:
#vectorise data
input_texts = []
target_texts = []
input_characters = set()
target_characters = set()
for story in stories:
input_text = story['story']
for highlight in story['highlights']:
target_text = highlight
target_text = '\t' + target_text + '\n'
input_texts.append(input_text)
target_texts.append(target_text)
for char in input_text:
if char not in input_characters:
input_characters.add(char)
for char in target_text:
if char not in target_characters:
target_characters.add(char)
input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))
num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
max_encoder_seq_length = max([len(txt) for txt in input_texts])
max_decoder_seq_length = max([len(txt) for txt in target_texts])
print('Number of samples:', len(input_texts))
print('Number of unique input tokens:', num_encoder_tokens)
print('Number of unique output tokens:', num_decoder_tokens)
print('Max sequence length for inputs:', max_encoder_seq_length)
print('Max sequence length for outputs:', max_decoder_seq_length)
这是引发错误的代码行
for highlight in story['highlights']:
这是我用来清理和腌制数据的代码
#remove all unneeded features and null values
reviews = reviews.dropna()
reviews = reviews.drop(['Id','ProductId','UserId','ProfileName','HelpfulnessNumerator','HelpfulnessDenominator', 'Score','Time'], 1)
reviews = reviews.reset_index(drop=True)
print(reviews.head())
for i in range(5):
print("Review #",i+1)
print(reviews.Summary[i])
print(reviews.Text[i])
print()
#define contractions eg slang words and their correct spellings
contractions = {
"ain't": "am not",
"aren't": "are not",
"can't": "cannot",
"can't've": "cannot have",
"'cause": "because",
"could've": "could have",
"couldn't": "could not",
"couldn't've": "could not have",
"didn't": "did not",
"doesn't": "does not",
"don't": "do not",
"hadn't": "had not",
"hadn't've": "had not have",
"hasn't": "has not",
"haven't": "have not",
"he'd": "he would",
"he'd've": "he would have"}
#clean the text of contractions and stop words
def clean_text(text, remove_stopwords=True):
text = text.lower()
if True:
text = text.split()
new_text = []
for word in text:
if word in contractions:new_text.append(contractions[word])
else:
new_text.append(word)
text = " ".join(new_text)
text = re.sub(r'https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
text = re.sub(r'\<a href', ' ', text)
text = re.sub(r'&', '', text)
text = re.sub(r'[_"\-;%()|+&=*%.,!?:#$@\[\]/]', ' ', text)
text = re.sub(r'<br />', ' ', text)
text = re.sub(r'\'', ' ', text)
if remove_stopwords:
text = text.split()
stops = set(stopwords.words("english"))
text = [w for w in text if not w in stops]
text = " ".join(text)
return text
#clean summaries and texts
clean_summaries = []
for summary in reviews.Summary:
clean_summaries.append(clean_text(summary, remove_stopwords=False))
print("Summaries are complete.")
clean_texts = []
for text in reviews.Text:
clean_texts.append(clean_text(text))
print("Texts are complete.")
stories = list()
for i, text in enumerate(clean_texts):
stories.append({'story': text, 'highlights': clean_summaries[i]}) # save to file
dump(stories, open('data/review_dataset.pkl', 'wb'))
您的故事词典中至少有一个没有键值对 'highlights'。如果这仅适用于某些故事,您可以在迭代之前简单地检查是否存在 NoneType。如果所有故事都是如此,则您的代码和您正在使用的数据之间可能存在差异。
此外,我认为存在缩进错误(可能只是错误的 SO 格式),但我认为 target_text = highlight
之后的代码应该再次向右缩进。
for story in stories:
input_text = story['story']
# check for None to make sure you are not iterating over NoneType
if story['highlights'] is not None:
for highlight in story['highlights']:
target_text = highlight
# I believe the following code should be indented as well
target_text = '\t' + target_text + '\n'
input_texts.append(input_text)
target_texts.append(target_text)
...
我是机器学习的新手,我正在尝试通过 tutorial for text summarization using Keras.
我已经到了矢量化数据的地步,但是我遇到了一个错误,我自己已经尝试了一切。我真的很想让这个程序运行,并希望有人能阐明为什么它会给我这个错误以及我该如何修复它。我确实看过以前的帖子,但到目前为止 none 对我有所帮助,谢谢。这是我的代码:
#vectorise data
input_texts = []
target_texts = []
input_characters = set()
target_characters = set()
for story in stories:
input_text = story['story']
for highlight in story['highlights']:
target_text = highlight
target_text = '\t' + target_text + '\n'
input_texts.append(input_text)
target_texts.append(target_text)
for char in input_text:
if char not in input_characters:
input_characters.add(char)
for char in target_text:
if char not in target_characters:
target_characters.add(char)
input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))
num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
max_encoder_seq_length = max([len(txt) for txt in input_texts])
max_decoder_seq_length = max([len(txt) for txt in target_texts])
print('Number of samples:', len(input_texts))
print('Number of unique input tokens:', num_encoder_tokens)
print('Number of unique output tokens:', num_decoder_tokens)
print('Max sequence length for inputs:', max_encoder_seq_length)
print('Max sequence length for outputs:', max_decoder_seq_length)
这是引发错误的代码行
for highlight in story['highlights']:
这是我用来清理和腌制数据的代码
#remove all unneeded features and null values
reviews = reviews.dropna()
reviews = reviews.drop(['Id','ProductId','UserId','ProfileName','HelpfulnessNumerator','HelpfulnessDenominator', 'Score','Time'], 1)
reviews = reviews.reset_index(drop=True)
print(reviews.head())
for i in range(5):
print("Review #",i+1)
print(reviews.Summary[i])
print(reviews.Text[i])
print()
#define contractions eg slang words and their correct spellings
contractions = {
"ain't": "am not",
"aren't": "are not",
"can't": "cannot",
"can't've": "cannot have",
"'cause": "because",
"could've": "could have",
"couldn't": "could not",
"couldn't've": "could not have",
"didn't": "did not",
"doesn't": "does not",
"don't": "do not",
"hadn't": "had not",
"hadn't've": "had not have",
"hasn't": "has not",
"haven't": "have not",
"he'd": "he would",
"he'd've": "he would have"}
#clean the text of contractions and stop words
def clean_text(text, remove_stopwords=True):
text = text.lower()
if True:
text = text.split()
new_text = []
for word in text:
if word in contractions:new_text.append(contractions[word])
else:
new_text.append(word)
text = " ".join(new_text)
text = re.sub(r'https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
text = re.sub(r'\<a href', ' ', text)
text = re.sub(r'&', '', text)
text = re.sub(r'[_"\-;%()|+&=*%.,!?:#$@\[\]/]', ' ', text)
text = re.sub(r'<br />', ' ', text)
text = re.sub(r'\'', ' ', text)
if remove_stopwords:
text = text.split()
stops = set(stopwords.words("english"))
text = [w for w in text if not w in stops]
text = " ".join(text)
return text
#clean summaries and texts
clean_summaries = []
for summary in reviews.Summary:
clean_summaries.append(clean_text(summary, remove_stopwords=False))
print("Summaries are complete.")
clean_texts = []
for text in reviews.Text:
clean_texts.append(clean_text(text))
print("Texts are complete.")
stories = list()
for i, text in enumerate(clean_texts):
stories.append({'story': text, 'highlights': clean_summaries[i]}) # save to file
dump(stories, open('data/review_dataset.pkl', 'wb'))
您的故事词典中至少有一个没有键值对 'highlights'。如果这仅适用于某些故事,您可以在迭代之前简单地检查是否存在 NoneType。如果所有故事都是如此,则您的代码和您正在使用的数据之间可能存在差异。
此外,我认为存在缩进错误(可能只是错误的 SO 格式),但我认为 target_text = highlight
之后的代码应该再次向右缩进。
for story in stories:
input_text = story['story']
# check for None to make sure you are not iterating over NoneType
if story['highlights'] is not None:
for highlight in story['highlights']:
target_text = highlight
# I believe the following code should be indented as well
target_text = '\t' + target_text + '\n'
input_texts.append(input_text)
target_texts.append(target_text)
...