在执行程序之前显示用户在 GUI 中选择的文件的名称
Displaying the name of the file that a user selects in the GUI, BEFORE having the program execute
在我的程序中,我试图让 GUI 显示我 select 的 PDF 的名称,然后我让程序实际执行它应该对该 PDF 执行的操作。目前,个人可以打开一个文件目录,select一个文件,然后在GUI中按下运行按钮让程序执行,但是用户看不到文件名他们 selected。
我试过放置一个简单的 "print" 函数,但似乎没有用,我还尝试添加 "selector.filename" ato lbl1,并使其成为自己的标签并放置它在不同的区域,包括几乎所有代码的末尾,但这似乎没有用。
import os
import PyPDF2
import pandas
import webbrowser
import tkinter as tk
from tkinter import ttk
from tkinter import filedialog
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.tokenize import word_tokenize
#----Functions----#
#Method that a PDF that is read into the program goes through to eliminate any unwanted words or symbols#
def preprocess(text):
#Filters out punctuation from paragraph witch becomes tokenized to words and punctuation#
tokenizer = RegexpTokenizer(r'\w+')
result = tokenizer.tokenize(text)
#Makes all words lowercase#
words = [item.lower() for item in result]
#Removes all remaining tokens that are not alphabetic#
result = [word for word in words if word.isalpha()]
#Imports stopwords to be removed from paragraph#
stop_words = set(stopwords.words("english"))
#Removes the stop words from the paragraph#
filtered_sent = []
for w in result:
if w not in stop_words:
filtered_sent.append(w)
#Return word to root word/chop-off derivational affixes#
ps = PorterStemmer()
stemmed_words = []
for w in filtered_sent:
stemmed_words.append(ps.stem(w))
#Lemmatization, which reduces word to their base word, which is linguistically correct lemmas#
lem = WordNetLemmatizer()
lemmatized_words = ' '.join([lem.lemmatize(w,'n') and lem.lemmatize(w,'v') for w in filtered_sent])
#Re-tokenize lemmatized words string#
tokenized_word = word_tokenize(lemmatized_words)
return tokenized_word
#Wraps two functions inside an object which allows both functions to use filename#
class PDFSelector:
#Creates global variable 'filename'#
def __init(self):
self.filename = ""
#Allows user to select PDF to use in program#
def select_PDF(self):
#Opens file directory to select a file, and shows both folders and PDF files only#
self.filename = filedialog.askopenfilename(initialdir = "/", title = "Select file", filetypes = (("pdf files", "*.pdf"), ("all files", "*.*")))
print(self.filename)
#Method for PDF to run through to convert it into text, then print it out in a browser#
def run_program(self):
#Loads in PDF into program#
PDF_file = open(self.filename, 'rb')
read_pdf = PyPDF2.PdfFileReader(PDF_file)
#Determines number of pages in PDF file and sets the document content to 'null'#
number_of_pages = read_pdf.getNumPages()
doc_content = ""
#Extract text from the PDF file#
for i in range(number_of_pages):
page = read_pdf.getPage(0)
page_content = page.extractText()
doc_content += page_content
#Turns the text drawn from the PDF file into data the remaining code can understand#
tokenized_words = preprocess(doc_content)
#Determine frequency of words tokenized + lemmatized text#
from nltk.probability import FreqDist
fdist = FreqDist(tokenized_words)
final_list = fdist.most_common(len(fdist))
#Organize data into two columns and export the data to an html that automatically opens#
df = pandas.DataFrame(final_list, columns = ["Word", "Frequency"])
df.to_html('word_frequency.html')
webbrowser.open('file://' + os.path.realpath('word_frequency.html'))
#----Main----#
#Creates an instance of the wrapped functions to use the GUI#
selector = PDFSelector()
#Creats the GUI that will be used to select inputs#
window = tk.Tk()
window.geometry("385x130")
window.resizable(0, 0)
window.title("Word Frequency Program")
#Code literally just to make the GUI look better#
lblfilla = tk.Label(window, text = " ").grid(row = 0, column = 0)
lblfillb = tk.Label(window, text = " ").grid(row = 0, column = 1)
lblfillc = tk.Label(window, text = " ").grid(row = 0, column = 2)
lblfilld = tk.Label(window, text = " ").grid(row = 0, column = 3)
lblfille = tk.Label(window, text = " ").grid(row = 0, column = 4)
lblfillf = tk.Label(window, text = " ").grid(row = 1, column = 0)
lblfillg = tk.Label(window, text = " ").grid(row = 2, column = 0)
lblfillh = tk.Label(window, text = " ").grid(row = 3, column = 0)
lblfilli = tk.Label(window, text = " ").grid(row = 4, column = 0)
#Just a simple label on the GUI#
lbl1 = tk.Label(window, text = "File Selected: ").grid(row = 1, column = 1)
#Label asking for input to determine number of words to be displayed in the data table# (NOT IMPLEMENTED YET)
lbl2 = tk.Label(window, text = "Number of Words: ").grid(row = 2, column = 1)
lbl2a = tk.Entry(window).grid(row = 2, column = 2, columnspan = 2)
#Calls the select_PDF method to choose a PDF for the program to read#
button1 = ttk.Button(window, text = "Select File", command = selector.select_PDF).grid(row = 1, column = 4)
#Quits out of the program when certain button clicked#
button2 = ttk.Button(window, text = "Quit", command = window.quit).grid(row = 3, column = 2)
#Button to make the program execute#
button3 = ttk.Button(window, text = "Run", command = selector.run_program).grid(row = 3, column = 3)
lbl4 = tk.Label(window, text = selector.filename).grid(row = 1, column = 2)
window.mainloop()
window.destroy()
截至目前,在 selected PDF 文件后,它应该显示在 GUI
中的 "file Selected: " 旁边
我建议将标签变量 lbl1
中的 textvariable
设置为某些 StringVar
。然后你可以在 select_PDF()
中设置变量并让 tk 为你处理更新 GUI。
看到这个 answer。
在我的程序中,我试图让 GUI 显示我 select 的 PDF 的名称,然后我让程序实际执行它应该对该 PDF 执行的操作。目前,个人可以打开一个文件目录,select一个文件,然后在GUI中按下运行按钮让程序执行,但是用户看不到文件名他们 selected。
我试过放置一个简单的 "print" 函数,但似乎没有用,我还尝试添加 "selector.filename" ato lbl1,并使其成为自己的标签并放置它在不同的区域,包括几乎所有代码的末尾,但这似乎没有用。
import os
import PyPDF2
import pandas
import webbrowser
import tkinter as tk
from tkinter import ttk
from tkinter import filedialog
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.tokenize import word_tokenize
#----Functions----#
#Method that a PDF that is read into the program goes through to eliminate any unwanted words or symbols#
def preprocess(text):
#Filters out punctuation from paragraph witch becomes tokenized to words and punctuation#
tokenizer = RegexpTokenizer(r'\w+')
result = tokenizer.tokenize(text)
#Makes all words lowercase#
words = [item.lower() for item in result]
#Removes all remaining tokens that are not alphabetic#
result = [word for word in words if word.isalpha()]
#Imports stopwords to be removed from paragraph#
stop_words = set(stopwords.words("english"))
#Removes the stop words from the paragraph#
filtered_sent = []
for w in result:
if w not in stop_words:
filtered_sent.append(w)
#Return word to root word/chop-off derivational affixes#
ps = PorterStemmer()
stemmed_words = []
for w in filtered_sent:
stemmed_words.append(ps.stem(w))
#Lemmatization, which reduces word to their base word, which is linguistically correct lemmas#
lem = WordNetLemmatizer()
lemmatized_words = ' '.join([lem.lemmatize(w,'n') and lem.lemmatize(w,'v') for w in filtered_sent])
#Re-tokenize lemmatized words string#
tokenized_word = word_tokenize(lemmatized_words)
return tokenized_word
#Wraps two functions inside an object which allows both functions to use filename#
class PDFSelector:
#Creates global variable 'filename'#
def __init(self):
self.filename = ""
#Allows user to select PDF to use in program#
def select_PDF(self):
#Opens file directory to select a file, and shows both folders and PDF files only#
self.filename = filedialog.askopenfilename(initialdir = "/", title = "Select file", filetypes = (("pdf files", "*.pdf"), ("all files", "*.*")))
print(self.filename)
#Method for PDF to run through to convert it into text, then print it out in a browser#
def run_program(self):
#Loads in PDF into program#
PDF_file = open(self.filename, 'rb')
read_pdf = PyPDF2.PdfFileReader(PDF_file)
#Determines number of pages in PDF file and sets the document content to 'null'#
number_of_pages = read_pdf.getNumPages()
doc_content = ""
#Extract text from the PDF file#
for i in range(number_of_pages):
page = read_pdf.getPage(0)
page_content = page.extractText()
doc_content += page_content
#Turns the text drawn from the PDF file into data the remaining code can understand#
tokenized_words = preprocess(doc_content)
#Determine frequency of words tokenized + lemmatized text#
from nltk.probability import FreqDist
fdist = FreqDist(tokenized_words)
final_list = fdist.most_common(len(fdist))
#Organize data into two columns and export the data to an html that automatically opens#
df = pandas.DataFrame(final_list, columns = ["Word", "Frequency"])
df.to_html('word_frequency.html')
webbrowser.open('file://' + os.path.realpath('word_frequency.html'))
#----Main----#
#Creates an instance of the wrapped functions to use the GUI#
selector = PDFSelector()
#Creats the GUI that will be used to select inputs#
window = tk.Tk()
window.geometry("385x130")
window.resizable(0, 0)
window.title("Word Frequency Program")
#Code literally just to make the GUI look better#
lblfilla = tk.Label(window, text = " ").grid(row = 0, column = 0)
lblfillb = tk.Label(window, text = " ").grid(row = 0, column = 1)
lblfillc = tk.Label(window, text = " ").grid(row = 0, column = 2)
lblfilld = tk.Label(window, text = " ").grid(row = 0, column = 3)
lblfille = tk.Label(window, text = " ").grid(row = 0, column = 4)
lblfillf = tk.Label(window, text = " ").grid(row = 1, column = 0)
lblfillg = tk.Label(window, text = " ").grid(row = 2, column = 0)
lblfillh = tk.Label(window, text = " ").grid(row = 3, column = 0)
lblfilli = tk.Label(window, text = " ").grid(row = 4, column = 0)
#Just a simple label on the GUI#
lbl1 = tk.Label(window, text = "File Selected: ").grid(row = 1, column = 1)
#Label asking for input to determine number of words to be displayed in the data table# (NOT IMPLEMENTED YET)
lbl2 = tk.Label(window, text = "Number of Words: ").grid(row = 2, column = 1)
lbl2a = tk.Entry(window).grid(row = 2, column = 2, columnspan = 2)
#Calls the select_PDF method to choose a PDF for the program to read#
button1 = ttk.Button(window, text = "Select File", command = selector.select_PDF).grid(row = 1, column = 4)
#Quits out of the program when certain button clicked#
button2 = ttk.Button(window, text = "Quit", command = window.quit).grid(row = 3, column = 2)
#Button to make the program execute#
button3 = ttk.Button(window, text = "Run", command = selector.run_program).grid(row = 3, column = 3)
lbl4 = tk.Label(window, text = selector.filename).grid(row = 1, column = 2)
window.mainloop()
window.destroy()
截至目前,在 selected PDF 文件后,它应该显示在 GUI
中的 "file Selected: " 旁边我建议将标签变量 lbl1
中的 textvariable
设置为某些 StringVar
。然后你可以在 select_PDF()
中设置变量并让 tk 为你处理更新 GUI。
看到这个 answer。