TypeError: 'NoneType' object is not iterable - text summarisation with keras

TypeError: 'NoneType' object is not iterable - text summarisation with keras

我是机器学习的新手,我正在尝试通过 tutorial for text summarization using Keras.

我已经到了矢量化数据的地步,但是我遇到了一个错误,我自己已经尝试了一切。我真的很想让这个程序运行,并希望有人能阐明为什么它会给我这个错误以及我该如何修复它。我确实看过以前的帖子,但到目前为止 none 对我有所帮助,谢谢。这是我的代码:

#vectorise data
input_texts = []
target_texts = []
input_characters = set()
target_characters = set()

for story in stories:
    input_text = story['story']
    for highlight in story['highlights']:
        target_text = highlight
    target_text = '\t' + target_text + '\n'
    input_texts.append(input_text)
    target_texts.append(target_text)
    for char in input_text:
        if char not in input_characters:
            input_characters.add(char)
    for char in target_text:
        if char not in target_characters:
            target_characters.add(char)

input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))
num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
max_encoder_seq_length = max([len(txt) for txt in input_texts])
max_decoder_seq_length = max([len(txt) for txt in target_texts])
print('Number of samples:', len(input_texts))
print('Number of unique input tokens:', num_encoder_tokens)
print('Number of unique output tokens:', num_decoder_tokens)
print('Max sequence length for inputs:', max_encoder_seq_length)
print('Max sequence length for outputs:', max_decoder_seq_length)

这是引发错误的代码行

for highlight in story['highlights']:

这是我用来清理和腌制数据的代码

#remove all unneeded features and null values
reviews = reviews.dropna()
reviews = reviews.drop(['Id','ProductId','UserId','ProfileName','HelpfulnessNumerator','HelpfulnessDenominator', 'Score','Time'], 1)
reviews = reviews.reset_index(drop=True) 
print(reviews.head())

for i in range(5):
    print("Review #",i+1) 

print(reviews.Summary[i]) 
print(reviews.Text[i]) 
print()

#define contractions eg slang words and their correct spellings
contractions = {
        "ain't": "am not",
        "aren't": "are not",
        "can't": "cannot",
        "can't've": "cannot have",
        "'cause": "because",
        "could've": "could have",
        "couldn't": "could not",
        "couldn't've": "could not have",
        "didn't": "did not",
        "doesn't": "does not",
        "don't": "do not",
        "hadn't": "had not",
        "hadn't've": "had not have",
        "hasn't": "has not",
        "haven't": "have not",
        "he'd": "he would",
        "he'd've": "he would have"}

#clean the text of contractions and stop words 
def clean_text(text, remove_stopwords=True): 
    text = text.lower() 
    if True: 
        text = text.split() 
        new_text = []
        for word in text:
            if word in contractions:new_text.append(contractions[word])
            else:
                new_text.append(word)
            text = " ".join(new_text)
            text = re.sub(r'https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
            text = re.sub(r'\<a href', ' ', text)
            text = re.sub(r'&amp;', '', text)
            text = re.sub(r'[_"\-;%()|+&=*%.,!?:#$@\[\]/]', ' ', text)
            text = re.sub(r'<br />', ' ', text)
            text = re.sub(r'\'', ' ', text)
        if remove_stopwords:
            text = text.split()
            stops = set(stopwords.words("english"))
            text = [w for w in text if not w in stops]
            text = " ".join(text) 
            return text

#clean summaries and texts
clean_summaries = []
for summary in reviews.Summary:
    clean_summaries.append(clean_text(summary, remove_stopwords=False)) 
print("Summaries are complete.")

clean_texts = []
for text in reviews.Text:
    clean_texts.append(clean_text(text))
print("Texts are complete.")

stories = list()
for i, text in enumerate(clean_texts):
    stories.append({'story': text, 'highlights': clean_summaries[i]}) # save to file
dump(stories, open('data/review_dataset.pkl', 'wb'))

您的故事词典中至少有一个没有键值对 'highlights'。如果这仅适用于某些故事,您可以在迭代之前简单地检查是否存在 NoneType。如果所有故事都是如此,则您的代码和您正在使用的数据之间可能存在差异。

此外,我认为存在缩进错误(可能只是错误的 SO 格式),但我认为 target_text = highlight 之后的代码应该再次向右缩进。

for story in stories:
    input_text = story['story']
    # check for None to make sure you are not iterating over NoneType
    if story['highlights'] is not None:
        for highlight in story['highlights']:
            target_text = highlight
            # I believe the following code should be indented as well
            target_text = '\t' + target_text + '\n'
            input_texts.append(input_text)
            target_texts.append(target_text)
            ...