AttributeError: 'spacy.tokens.span.Span' object has no attribute 'string'

Question

import re
import spacy
from nltk.corpus import stopwords
import pdfplumber

def extract_All_data(path):
    text = ""
    try:
        with pdfplumber.open(path) as pdf:
            for i in pdf.pages:
                text += i.extract_text()
            return text
    except:
        return None

resume_text = extract_All_data(r"E:\AllResumesPdfs202883_Mumbai_6.pdf")
#resume_text =text.lower()

# load pre-trained model
nlp = spacy.load('en_core_web_lg')

# Grad all general stop words
STOPWORDS = set(stopwords.words('english'))

# Education Degrees
EDUCATION = [
            'BE','B.E.', 'B.E', 'BS', 'B.S',
            'ME', 'M.E', 'M.E.', 'MS', 'M.S', 'M.C.A.',
            'BTECH', 'B.TECH', 'M.TECH', 'MTECH',
            'SSC', 'HSC', 'CBSE', 'ICSE', 'X', 'XII'
        ]

def extract_education(resume_text):
    nlp_text = nlp(resume_text)

    # Sentence Tokenizer
    nlp_text = [sent.string.strip() for sent in nlp_text.sents]

    edu = {}
    # Extract education degree
    for index, text in enumerate(nlp_text):
        for tex in text.split():
            # Replace all special symbols
            tex = re.sub(r'[?|$|.|!|,]', r'', tex)
            if tex.upper() in EDUCATION and tex not in STOPWORDS:
                edu[tex] = text + nlp_text[index + 1]

    # Extract year
    education = []
    for key in edu.keys():
        year = re.search(re.compile(r'((20|19)(\d{2}))'), edu[key])
        if year:
            education.append((key, ''.join(year[0])))
        else:
            education.append(key)
    return education

Education= extract_education(resume_text)
print(Education)

我已经下载了大模型，但它仍然显示字符串错误。请帮我解决这个问题。提前致谢。

C:\Python37\python.exe E:/JobScan/Sample1.py
2021-05-22 09:33:10.781450: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2021-05-22 09:33:10.781933: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Traceback (most recent call last):
  File "E:/JobScan/Sample1.py", line 58, in <module>
    Education= extract_education(resume_text)
  File "E:/JobScan/Sample1.py", line 37, in extract_education
    nlp_text = [sent.string.strip() for sent in nlp_text.sents]
  File "E:/JobScan/Sample1.py", line 37, in <listcomp>
    nlp_text = [sent.string.strip() for sent in nlp_text.sents]
AttributeError: 'spacy.tokens.span.Span' object has no attribute 'string'

Process finished with exit code 1

这是控制台错误。

Answer 1

正如 Tim Roberts 所说，您想要 text 属性。

# change "string" to "text"
nlp_text = [sent.text.strip() for sent in nlp_text.sents]