python 如何通过调用文件中的函数名来打印文件名
How to print the file name by calling a function name in the file in python
这是一个基于简历筛选的程序。在这个程序中,我给每份简历打分以对他们进行排名。现在输出只给了我我想要的分数,它给了我文件名。
程序是:
import re
import fitz
import os
# Create an array "zs[]" that store the score values
zs = []
# call the Resume files by calling the folder name
for filename in os.listdir('resume/'):
# Select only PDF files
if filename.endswith('.pdf'):
print(filename)
os.chdir('C:/Users/M. Abrar Hussain/Desktop/cv/resume')
pdfFileObj = open(filename, 'rb')
# Extract the text Data from resume files
with fitz.open(pdfFileObj) as doc:
text = ""
for page in doc:
text += page.getText()
print(text)
# Splitting the Resume Data into many indexes of Array
p = doc.loadPage(0)
p_text = p.getText()
p_lines = p_text.splitlines()
# Split the information and the data
split_lst = [i.split(': ', 1) for i in p_lines]
d = {a: b.rstrip() for a, b in split_lst}
# Naming the information part
f = d["Name"]
g = d["GPA"]
h = d["Skills"]
i = d["Experience"]
p = re.findall(r"[-+]?\d*\.\d+|\d+", i)
# search the keywords with the data that extract from resume
search_keywords = ['Laravel', 'Java', 'Python']
search_exp = ['1', '1.5', '2', '2.5', '3']
search_gpa = ['2.5', '2.6', '2.7', '2.8', '2.9', '3.0', '3.1', '3.2', '3.3', '3.4', '3.5', '3.6', '3.7',
'3.8', '3.9', '4.0']
# Comparing GPA data with the keywords
lst = []
for gpa in search_gpa:
if gpa in g:
lst.append(gpa)
# Comparing Skills data with keywords
lst1 = []
for word in search_keywords:
if word in h:
lst1.append(word)
# Comparing Experience data with keywords
lst2 = []
for exp in search_exp:
if exp in p:
lst2.append(exp)
# Scoring the Extracted data to see the best resume
score = 0
w1 = []
# Scoring the GPA
for w1 in lst:
if '3.0' <= w1 < '3.5':
score += 1
if '3.5' <= w1 <= '4':
score += 2
# Scoring the SKills
for w1 in lst1:
if w1 == 'Laravel':
score += 2
if w1 == 'Python':
score += 2
if w1 == 'Java':
score += 1
# Scoring the Experience
for w1 in lst2:
if '2.5' <= w1 < '3':
score += 0.5
if '3' <= w1 < '3.5':
score += 1
if '3.5' <= w1:
score += 2
# store score values in an array
tt = zs.append(score)
print("%s has Scored %s" % (f, score))
print('\n')
pdfFileObj.close()
# Rank the CV's on the basis of Scoring
zs.sort(reverse=True)
print(zs)
程序的输出是:
cv2.pdf
Name: Danish Ejaz
GPA: 3.7
Skills: Python, Java
Experience: 2.5 years
Danish Ejaz has Scored 5.5
cv3.pdf
Name: Abdullah
GPA: 3.2
Skills: Laravel, Java
Experience: 2 years
Abdullah has Scored 4
cv5.pdf
Name: M. Abrar Hussain
GPA: 3.5
Skills: Python, Laravel
Experience: 3 years
M. Abrar Hussain has Scored 7
[7, 5.5, 4]
Process finished with exit code 0
倒数第二行是打分后的结果。在这个结果中它只给了我们分数,我可以在结果中调用文件名吗?如果是,请帮助我完成这个项目。
您只需要存储您的文件名并在最后将它们与乐谱一起打印出来:
# Create a dictionary with filenames as key and add the score as value
# Note that this might be an issue if you have irrelevant files in your directory
file_scores = dict.fromkeys(listdir('resume/'))
# call the Resume files by calling the folder name
for filename in file_scores:
# Your scoring logic
(...)
# store score values in the dictionary
file_scores[filename] = score
(...)
# Remove items without value
file_scores = {k: v for k, v in file_scores.items() if v}
# Sort the dictionary based on score descending
file_scores = {k: v for k, v in sorted(file_scores.items(), key=lambda x: x[1], reverse=True)}
# Print the file and the score together
for filename, score in file_scores.items():
if score: # Ignore other files
print(f"File {filename}: Score = {score}")
实现您想要的最简单方法是使用 table-结构。鉴于您在每个结果中都有相似的字段,您可以创建一个 pd.DataFrame that you fill with all your values and then afterwards use sort_values 和 select 分数列。
当然还有其他选择,您可以使用 np.argsort 或类似的方法,但 DataFrame 可能是实现您所追求的目标的最简单方法。
恕我直言,定义 class 的正确方法是,最小变体是
class Candidate:
def __init__(self, name, score, filename):
self.name = name
self.score = score
self.filename = filename
def __gt__(self, other):
return self.score > other.score
def __str__(self):
return f'Candidate{self.name, self.filename, self.score}'
def __repr__(self):
return self.__str__()
将其放在主 for
循环之前。然后代替
tt = zs.append(score)
放
tt = zs.append(Candidate(f, score, filename))
否则应该是一样的。这里有一些解释性的用法:
class Candidate:
def __init__(self, name, score, filename):
self.name = name
self.score = score
self.filename = filename
def __gt__(self, other):
return self.score > other.score
def __str__(self):
return f'Candidate{self.name, self.filename, self.score}'
def __repr__(self):
return self.__str__()
# __init__ allows this
a = Candidate("Arnold", 10, "arnold.pdf")
b = Candidate("Betty", 11, "betty.pdf")
# __gt__ allows this
print(a < b)
print(a > b)
# __str__ allows this
print(a)
# __repr__ allows human-readable this
print([a, b])
# __repr__ and __gt__ allows human-readable this
print(sorted([b, a]))
这将打印
True
False
Candidate('Arnold', 'arnold.pdf', 10)
[Candidate('Arnold', 'arnold.pdf', 10), Candidate('Betty', 'betty.pdf', 11)]
[Candidate('Arnold', 'arnold.pdf', 10), Candidate('Betty', 'betty.pdf', 11)]
这是一个基于简历筛选的程序。在这个程序中,我给每份简历打分以对他们进行排名。现在输出只给了我我想要的分数,它给了我文件名。 程序是:
import re
import fitz
import os
# Create an array "zs[]" that store the score values
zs = []
# call the Resume files by calling the folder name
for filename in os.listdir('resume/'):
# Select only PDF files
if filename.endswith('.pdf'):
print(filename)
os.chdir('C:/Users/M. Abrar Hussain/Desktop/cv/resume')
pdfFileObj = open(filename, 'rb')
# Extract the text Data from resume files
with fitz.open(pdfFileObj) as doc:
text = ""
for page in doc:
text += page.getText()
print(text)
# Splitting the Resume Data into many indexes of Array
p = doc.loadPage(0)
p_text = p.getText()
p_lines = p_text.splitlines()
# Split the information and the data
split_lst = [i.split(': ', 1) for i in p_lines]
d = {a: b.rstrip() for a, b in split_lst}
# Naming the information part
f = d["Name"]
g = d["GPA"]
h = d["Skills"]
i = d["Experience"]
p = re.findall(r"[-+]?\d*\.\d+|\d+", i)
# search the keywords with the data that extract from resume
search_keywords = ['Laravel', 'Java', 'Python']
search_exp = ['1', '1.5', '2', '2.5', '3']
search_gpa = ['2.5', '2.6', '2.7', '2.8', '2.9', '3.0', '3.1', '3.2', '3.3', '3.4', '3.5', '3.6', '3.7',
'3.8', '3.9', '4.0']
# Comparing GPA data with the keywords
lst = []
for gpa in search_gpa:
if gpa in g:
lst.append(gpa)
# Comparing Skills data with keywords
lst1 = []
for word in search_keywords:
if word in h:
lst1.append(word)
# Comparing Experience data with keywords
lst2 = []
for exp in search_exp:
if exp in p:
lst2.append(exp)
# Scoring the Extracted data to see the best resume
score = 0
w1 = []
# Scoring the GPA
for w1 in lst:
if '3.0' <= w1 < '3.5':
score += 1
if '3.5' <= w1 <= '4':
score += 2
# Scoring the SKills
for w1 in lst1:
if w1 == 'Laravel':
score += 2
if w1 == 'Python':
score += 2
if w1 == 'Java':
score += 1
# Scoring the Experience
for w1 in lst2:
if '2.5' <= w1 < '3':
score += 0.5
if '3' <= w1 < '3.5':
score += 1
if '3.5' <= w1:
score += 2
# store score values in an array
tt = zs.append(score)
print("%s has Scored %s" % (f, score))
print('\n')
pdfFileObj.close()
# Rank the CV's on the basis of Scoring
zs.sort(reverse=True)
print(zs)
程序的输出是:
cv2.pdf
Name: Danish Ejaz
GPA: 3.7
Skills: Python, Java
Experience: 2.5 years
Danish Ejaz has Scored 5.5
cv3.pdf
Name: Abdullah
GPA: 3.2
Skills: Laravel, Java
Experience: 2 years
Abdullah has Scored 4
cv5.pdf
Name: M. Abrar Hussain
GPA: 3.5
Skills: Python, Laravel
Experience: 3 years
M. Abrar Hussain has Scored 7
[7, 5.5, 4]
Process finished with exit code 0
倒数第二行是打分后的结果。在这个结果中它只给了我们分数,我可以在结果中调用文件名吗?如果是,请帮助我完成这个项目。
您只需要存储您的文件名并在最后将它们与乐谱一起打印出来:
# Create a dictionary with filenames as key and add the score as value
# Note that this might be an issue if you have irrelevant files in your directory
file_scores = dict.fromkeys(listdir('resume/'))
# call the Resume files by calling the folder name
for filename in file_scores:
# Your scoring logic
(...)
# store score values in the dictionary
file_scores[filename] = score
(...)
# Remove items without value
file_scores = {k: v for k, v in file_scores.items() if v}
# Sort the dictionary based on score descending
file_scores = {k: v for k, v in sorted(file_scores.items(), key=lambda x: x[1], reverse=True)}
# Print the file and the score together
for filename, score in file_scores.items():
if score: # Ignore other files
print(f"File {filename}: Score = {score}")
实现您想要的最简单方法是使用 table-结构。鉴于您在每个结果中都有相似的字段,您可以创建一个 pd.DataFrame that you fill with all your values and then afterwards use sort_values 和 select 分数列。
当然还有其他选择,您可以使用 np.argsort 或类似的方法,但 DataFrame 可能是实现您所追求的目标的最简单方法。
恕我直言,定义 class 的正确方法是,最小变体是
class Candidate:
def __init__(self, name, score, filename):
self.name = name
self.score = score
self.filename = filename
def __gt__(self, other):
return self.score > other.score
def __str__(self):
return f'Candidate{self.name, self.filename, self.score}'
def __repr__(self):
return self.__str__()
将其放在主 for
循环之前。然后代替
tt = zs.append(score)
放
tt = zs.append(Candidate(f, score, filename))
否则应该是一样的。这里有一些解释性的用法:
class Candidate:
def __init__(self, name, score, filename):
self.name = name
self.score = score
self.filename = filename
def __gt__(self, other):
return self.score > other.score
def __str__(self):
return f'Candidate{self.name, self.filename, self.score}'
def __repr__(self):
return self.__str__()
# __init__ allows this
a = Candidate("Arnold", 10, "arnold.pdf")
b = Candidate("Betty", 11, "betty.pdf")
# __gt__ allows this
print(a < b)
print(a > b)
# __str__ allows this
print(a)
# __repr__ allows human-readable this
print([a, b])
# __repr__ and __gt__ allows human-readable this
print(sorted([b, a]))
这将打印
True
False
Candidate('Arnold', 'arnold.pdf', 10)
[Candidate('Arnold', 'arnold.pdf', 10), Candidate('Betty', 'betty.pdf', 11)]
[Candidate('Arnold', 'arnold.pdf', 10), Candidate('Betty', 'betty.pdf', 11)]