AttributeError: 'spacy.tokens.span.Span' object has no attribute 'string'
AttributeError: 'spacy.tokens.span.Span' object has no attribute 'string'
import re
import spacy
from nltk.corpus import stopwords
import pdfplumber
def extract_All_data(path):
text = ""
try:
with pdfplumber.open(path) as pdf:
for i in pdf.pages:
text += i.extract_text()
return text
except:
return None
resume_text = extract_All_data(r"E:\AllResumesPdfs202883_Mumbai_6.pdf")
#resume_text =text.lower()
# load pre-trained model
nlp = spacy.load('en_core_web_lg')
# Grad all general stop words
STOPWORDS = set(stopwords.words('english'))
# Education Degrees
EDUCATION = [
'BE','B.E.', 'B.E', 'BS', 'B.S',
'ME', 'M.E', 'M.E.', 'MS', 'M.S', 'M.C.A.',
'BTECH', 'B.TECH', 'M.TECH', 'MTECH',
'SSC', 'HSC', 'CBSE', 'ICSE', 'X', 'XII'
]
def extract_education(resume_text):
nlp_text = nlp(resume_text)
# Sentence Tokenizer
nlp_text = [sent.string.strip() for sent in nlp_text.sents]
edu = {}
# Extract education degree
for index, text in enumerate(nlp_text):
for tex in text.split():
# Replace all special symbols
tex = re.sub(r'[?|$|.|!|,]', r'', tex)
if tex.upper() in EDUCATION and tex not in STOPWORDS:
edu[tex] = text + nlp_text[index + 1]
# Extract year
education = []
for key in edu.keys():
year = re.search(re.compile(r'((20|19)(\d{2}))'), edu[key])
if year:
education.append((key, ''.join(year[0])))
else:
education.append(key)
return education
Education= extract_education(resume_text)
print(Education)
我已经下载了大模型,但它仍然显示字符串错误。
请帮我解决这个问题。
提前致谢。
C:\Python37\python.exe E:/JobScan/Sample1.py
2021-05-22 09:33:10.781450: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2021-05-22 09:33:10.781933: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Traceback (most recent call last):
File "E:/JobScan/Sample1.py", line 58, in <module>
Education= extract_education(resume_text)
File "E:/JobScan/Sample1.py", line 37, in extract_education
nlp_text = [sent.string.strip() for sent in nlp_text.sents]
File "E:/JobScan/Sample1.py", line 37, in <listcomp>
nlp_text = [sent.string.strip() for sent in nlp_text.sents]
AttributeError: 'spacy.tokens.span.Span' object has no attribute 'string'
Process finished with exit code 1
这是控制台错误。
正如 Tim Roberts 所说,您想要 text
属性。
# change "string" to "text"
nlp_text = [sent.text.strip() for sent in nlp_text.sents]
import re
import spacy
from nltk.corpus import stopwords
import pdfplumber
def extract_All_data(path):
text = ""
try:
with pdfplumber.open(path) as pdf:
for i in pdf.pages:
text += i.extract_text()
return text
except:
return None
resume_text = extract_All_data(r"E:\AllResumesPdfs202883_Mumbai_6.pdf")
#resume_text =text.lower()
# load pre-trained model
nlp = spacy.load('en_core_web_lg')
# Grad all general stop words
STOPWORDS = set(stopwords.words('english'))
# Education Degrees
EDUCATION = [
'BE','B.E.', 'B.E', 'BS', 'B.S',
'ME', 'M.E', 'M.E.', 'MS', 'M.S', 'M.C.A.',
'BTECH', 'B.TECH', 'M.TECH', 'MTECH',
'SSC', 'HSC', 'CBSE', 'ICSE', 'X', 'XII'
]
def extract_education(resume_text):
nlp_text = nlp(resume_text)
# Sentence Tokenizer
nlp_text = [sent.string.strip() for sent in nlp_text.sents]
edu = {}
# Extract education degree
for index, text in enumerate(nlp_text):
for tex in text.split():
# Replace all special symbols
tex = re.sub(r'[?|$|.|!|,]', r'', tex)
if tex.upper() in EDUCATION and tex not in STOPWORDS:
edu[tex] = text + nlp_text[index + 1]
# Extract year
education = []
for key in edu.keys():
year = re.search(re.compile(r'((20|19)(\d{2}))'), edu[key])
if year:
education.append((key, ''.join(year[0])))
else:
education.append(key)
return education
Education= extract_education(resume_text)
print(Education)
我已经下载了大模型,但它仍然显示字符串错误。 请帮我解决这个问题。 提前致谢。
C:\Python37\python.exe E:/JobScan/Sample1.py
2021-05-22 09:33:10.781450: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2021-05-22 09:33:10.781933: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Traceback (most recent call last):
File "E:/JobScan/Sample1.py", line 58, in <module>
Education= extract_education(resume_text)
File "E:/JobScan/Sample1.py", line 37, in extract_education
nlp_text = [sent.string.strip() for sent in nlp_text.sents]
File "E:/JobScan/Sample1.py", line 37, in <listcomp>
nlp_text = [sent.string.strip() for sent in nlp_text.sents]
AttributeError: 'spacy.tokens.span.Span' object has no attribute 'string'
Process finished with exit code 1
这是控制台错误。
正如 Tim Roberts 所说,您想要 text
属性。
# change "string" to "text"
nlp_text = [sent.text.strip() for sent in nlp_text.sents]