我正在尝试为我的 "Sentiment Analysis" 项目制作前端,但不知道如何让我的 "prediction_function" 在前端工作
I'm trying to make Front-end for my "Sentiment Analysis" project but can't figure out how to make my "prediction_function" work at the Front-end
我正在制作一个可用于任何语言的情感分析项目。它是这样工作的:在代码的末尾部分 "result" 将句子翻译成英语。然后predict_function(result.text)将英文文本分类为正面、负面或中性。
如果我单独运行代码,代码工作正常。现在我正在尝试制作前端,唯一的问题是我不知道如何使用它 link prediction_function 。翻译功能在那里工作,但唯一剩下的就是在前端对翻译后的文本进行分类。我是新手,我确实做了很多更改,但无法正常工作。
这是我的全部代码:(我想没必要看整个代码,因为我觉得问题在最后部分,@app.route('/', methods=['POST'])行)
from flask import Flask, request, render_template
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
import pandas as pd
import numpy as np
import seaborn as sns
import regex as re
import math
import googletrans
from googletrans import Translator
from nltk.tokenize import word_tokenize
app = Flask(__name__)
@app.route('/')
def my_form():
return render_template('form.html')
df = pd.read_csv('C:/Users/path/file.csv')
df = df.rename(columns = {'clean_text':'Comment'})
df.head()
df.describe()
cat = []
for val in df['category'].values:
if val not in cat:
cat.append(val)
print(cat)
index_arr = []
for index, val in df.iterrows():
if val['category'] not in [-1.0, 0.0, 1.0]:
index_arr.append(index)
print(index_arr)
df.drop(index_arr, axis = 0, inplace = True)
sns.countplot(x='category',data=df)
def clean_comments(comment):
comment = re.sub(r'$\w*', '', str(comment))
comment = re.sub(r'^RT[\s]+', '', str(comment))
comment = re.sub(r'https?:\/\/.*[\r\n]*', '', str(comment))
comment = re.sub(r'#', '', str(comment))
comment = re.sub(r"@[^\s]+[\s]?",'',comment)
comment = re.sub('[^ a-zA-Z0-9]', '', comment)
comment = re.sub('[0-9]', '', comment)
return comment
df['Comment'] = df['Comment'].apply(clean_comments)
df.head()
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = stopwords.words('english')
def removing_stopwords(words):
cleaned_tokens = []
for val in words.split(' '):
val = val.lower()
if val not in stop_words and val != '':
cleaned_tokens.append(val)
return(cleaned_tokens)
df['Comment'] = df['Comment'].apply(removing_stopwords)
df.head()
from nltk.stem.porter import PorterStemmer
def stem_comments(words):
ps = PorterStemmer()
stemmed_review = []
for review in words:
stemmed_review.append(ps.stem(review))
return stemmed_review
df['Comment'] = df['Comment'].apply(stem_comments)
df.head()
temp = df.iloc[:,0].values
X = [' '.join(ele) for ele in temp]
X = np.array(X)
Y = df.iloc[:,1].values
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(X).toarray()
print(X.shape)
print(Y[:5])
print(Y.shape)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.01)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
del X
del Y
del temp
del df
from sklearn.naive_bayes import MultinomialNB
classifier = MultinomialNB()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
print("Accuracy = ", accuracy_score(y_pred, y_test))
import seaborn as sn
from matplotlib.figure import Figure
df_cm = pd.DataFrame(cm, index = [0,1,2],columns = [0,1,2])
f = Figure(figsize = (20,10))
sn.heatmap(df_cm, annot=True)
def predict_function(sentence):
sentence = clean_comments(sentence)
sentence = removing_stopwords(sentence)
sentence = stem_comments(sentence)
X = [' '.join([str(elem) for elem in sentence])]
X = np.array(X)
X = vectorizer.transform(X).toarray()
result = classifier.predict(X)
if result == -1.0:
print("Negative")
elif result == 0.0:
print("Neutral")
else:
print("Positive")
@app.route('/', methods=['POST'])
def my_form_post():
text1 = request.form['text1'].lower()
translator = Translator(service_urls=['translate.googleapis.com'])
result = translator.translate(text1, dest='en')
senti=predict_function(result.text)
return render_template('form.html', final=result.text, last=senti, text1=text1)
if __name__ == "__main__":
app.run(debug=True, host="127.0.0.1", port=5002, threaded=True)
HTML 前端代码:
<body>
<h1>Welcome To Sentiment Analyzer</h1>
<form method="POST">
<textarea name="text1" placeholder="Say Something: ...." rows="10" cols="109"></textarea><br><br>
<input class="example_a" type="submit">
</form>
{% if final %}
<div>
<h2>The Sentiment of</h2> '{{ text1 }}' <h2>is {{ final }} </h2> <h2>is {{ last }} </h2>
{% else %}
<p></p>
{% endif %}
</div>
</body>
在您的 predict_function 函数中,您不会 return 打印任何值,无论它是否为正。尝试用 return 语句替换最后的那些打印语句。
我正在制作一个可用于任何语言的情感分析项目。它是这样工作的:在代码的末尾部分 "result" 将句子翻译成英语。然后predict_function(result.text)将英文文本分类为正面、负面或中性。
如果我单独运行代码,代码工作正常。现在我正在尝试制作前端,唯一的问题是我不知道如何使用它 link prediction_function 。翻译功能在那里工作,但唯一剩下的就是在前端对翻译后的文本进行分类。我是新手,我确实做了很多更改,但无法正常工作。
这是我的全部代码:(我想没必要看整个代码,因为我觉得问题在最后部分,@app.route('/', methods=['POST'])行)
from flask import Flask, request, render_template
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
import pandas as pd
import numpy as np
import seaborn as sns
import regex as re
import math
import googletrans
from googletrans import Translator
from nltk.tokenize import word_tokenize
app = Flask(__name__)
@app.route('/')
def my_form():
return render_template('form.html')
df = pd.read_csv('C:/Users/path/file.csv')
df = df.rename(columns = {'clean_text':'Comment'})
df.head()
df.describe()
cat = []
for val in df['category'].values:
if val not in cat:
cat.append(val)
print(cat)
index_arr = []
for index, val in df.iterrows():
if val['category'] not in [-1.0, 0.0, 1.0]:
index_arr.append(index)
print(index_arr)
df.drop(index_arr, axis = 0, inplace = True)
sns.countplot(x='category',data=df)
def clean_comments(comment):
comment = re.sub(r'$\w*', '', str(comment))
comment = re.sub(r'^RT[\s]+', '', str(comment))
comment = re.sub(r'https?:\/\/.*[\r\n]*', '', str(comment))
comment = re.sub(r'#', '', str(comment))
comment = re.sub(r"@[^\s]+[\s]?",'',comment)
comment = re.sub('[^ a-zA-Z0-9]', '', comment)
comment = re.sub('[0-9]', '', comment)
return comment
df['Comment'] = df['Comment'].apply(clean_comments)
df.head()
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = stopwords.words('english')
def removing_stopwords(words):
cleaned_tokens = []
for val in words.split(' '):
val = val.lower()
if val not in stop_words and val != '':
cleaned_tokens.append(val)
return(cleaned_tokens)
df['Comment'] = df['Comment'].apply(removing_stopwords)
df.head()
from nltk.stem.porter import PorterStemmer
def stem_comments(words):
ps = PorterStemmer()
stemmed_review = []
for review in words:
stemmed_review.append(ps.stem(review))
return stemmed_review
df['Comment'] = df['Comment'].apply(stem_comments)
df.head()
temp = df.iloc[:,0].values
X = [' '.join(ele) for ele in temp]
X = np.array(X)
Y = df.iloc[:,1].values
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(X).toarray()
print(X.shape)
print(Y[:5])
print(Y.shape)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.01)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
del X
del Y
del temp
del df
from sklearn.naive_bayes import MultinomialNB
classifier = MultinomialNB()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
print("Accuracy = ", accuracy_score(y_pred, y_test))
import seaborn as sn
from matplotlib.figure import Figure
df_cm = pd.DataFrame(cm, index = [0,1,2],columns = [0,1,2])
f = Figure(figsize = (20,10))
sn.heatmap(df_cm, annot=True)
def predict_function(sentence):
sentence = clean_comments(sentence)
sentence = removing_stopwords(sentence)
sentence = stem_comments(sentence)
X = [' '.join([str(elem) for elem in sentence])]
X = np.array(X)
X = vectorizer.transform(X).toarray()
result = classifier.predict(X)
if result == -1.0:
print("Negative")
elif result == 0.0:
print("Neutral")
else:
print("Positive")
@app.route('/', methods=['POST'])
def my_form_post():
text1 = request.form['text1'].lower()
translator = Translator(service_urls=['translate.googleapis.com'])
result = translator.translate(text1, dest='en')
senti=predict_function(result.text)
return render_template('form.html', final=result.text, last=senti, text1=text1)
if __name__ == "__main__":
app.run(debug=True, host="127.0.0.1", port=5002, threaded=True)
HTML 前端代码:
<body>
<h1>Welcome To Sentiment Analyzer</h1>
<form method="POST">
<textarea name="text1" placeholder="Say Something: ...." rows="10" cols="109"></textarea><br><br>
<input class="example_a" type="submit">
</form>
{% if final %}
<div>
<h2>The Sentiment of</h2> '{{ text1 }}' <h2>is {{ final }} </h2> <h2>is {{ last }} </h2>
{% else %}
<p></p>
{% endif %}
</div>
</body>
在您的 predict_function 函数中,您不会 return 打印任何值,无论它是否为正。尝试用 return 语句替换最后的那些打印语句。