我想 运行 使用 django 的机器学习算法,但 django 没有采用 csv 文件路径
I want to run machine learning algorithm using django but django is not taking csv file path
我正在我的主页应用程序中的 django 中创建垃圾邮件检测系统我将输入任何字符串,该字符串将转到 ml 函数,该函数 return 字符串是垃圾邮件还是非垃圾邮件,结果将打印出来在下一页,但我无法在 pd.read_csv 函数中定义 csv 文件的 pata。它显示错误 '../data/spam.csv' 不存在:b'../data/spam.csv'
view.py 文件
def hompage(request):
form = DetectForm(request.POST)
return render(request, 'index.html', {'form': form})
def result(request):
form=DetectForm(request.POST)
if form.is_valid():
x=form.cleaned_data['msg']
y=machine(x)
return render(request, 'result.html',{'msg':y})
ml.py 文件
def machine(stringx):
import pandas as pd
import numpy as np
import re
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords
data = pd.read_csv('../data/spam.csv', encoding='latin-1')
data = data.iloc[:, [0, 1]]
data['v1'] = data.v1.map({'ham': 0, 'spam': 1})
courpas = []
# data_cleaning
string = stringx
df2 = pd.DataFrame({"v1": [0],
"v2": [string]})
data = data.append(df2, ignore_index=True)
# data_cleaning
for a in data['v2']:
review = re.sub('[^a-zA-Z]', ' ', a)
review = review.lower()
review = review.split()
ps = PorterStemmer()
review = [ps.stem(x) for x in review if not x in stopwords.words('english')]
review = ' '.join(review)
courpas.append(review)
# create a bag of word model
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000)
x = cv.fit_transform(courpas).toarray()
y = data.iloc[:, 0].values
x_train, ytrain = x[:-1], y[:-1]
x_test, y_test = x[5572:5573], y[5572:5573]
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(x_train, ytrain)
y_pred = classifier.predict(x_test)
if y_pred == 1:
return 'spam'
else:
return 'ham'
result.html 文件
{% block title %}
<h2>Result</h2>
{% endblock %}
{% block content %}
<p>{{msg}}</p>
{% endblock %}
url.py 文件
from django.conf.urls import url
from . import views
from django.urls import path
app_name = "spam"
urlpatterns=[
url(r'^',views.hompage,name='hompage'),
]
如果数据文件夹是项目的根目录那么
data = pd.read_csv('data/spam.csv', encoding='latin-1')
会完成工作
或者您可以使用
使用 BASE_DIR
变量创建路径
然后在您的视图中使用此变量
data_dir = os.path.join(BASE_DIR, 'data') # place this in settings.py
观看次数
from django.conf import settings
data = pd.read_csv(settings.data_dir + 'spam.csv', encoding='latin-1')
我正在我的主页应用程序中的 django 中创建垃圾邮件检测系统我将输入任何字符串,该字符串将转到 ml 函数,该函数 return 字符串是垃圾邮件还是非垃圾邮件,结果将打印出来在下一页,但我无法在 pd.read_csv 函数中定义 csv 文件的 pata。它显示错误 '../data/spam.csv' 不存在:b'../data/spam.csv'
view.py 文件
def hompage(request):
form = DetectForm(request.POST)
return render(request, 'index.html', {'form': form})
def result(request):
form=DetectForm(request.POST)
if form.is_valid():
x=form.cleaned_data['msg']
y=machine(x)
return render(request, 'result.html',{'msg':y})
ml.py 文件
def machine(stringx):
import pandas as pd
import numpy as np
import re
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords
data = pd.read_csv('../data/spam.csv', encoding='latin-1')
data = data.iloc[:, [0, 1]]
data['v1'] = data.v1.map({'ham': 0, 'spam': 1})
courpas = []
# data_cleaning
string = stringx
df2 = pd.DataFrame({"v1": [0],
"v2": [string]})
data = data.append(df2, ignore_index=True)
# data_cleaning
for a in data['v2']:
review = re.sub('[^a-zA-Z]', ' ', a)
review = review.lower()
review = review.split()
ps = PorterStemmer()
review = [ps.stem(x) for x in review if not x in stopwords.words('english')]
review = ' '.join(review)
courpas.append(review)
# create a bag of word model
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000)
x = cv.fit_transform(courpas).toarray()
y = data.iloc[:, 0].values
x_train, ytrain = x[:-1], y[:-1]
x_test, y_test = x[5572:5573], y[5572:5573]
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(x_train, ytrain)
y_pred = classifier.predict(x_test)
if y_pred == 1:
return 'spam'
else:
return 'ham'
result.html 文件
{% block title %}
<h2>Result</h2>
{% endblock %}
{% block content %}
<p>{{msg}}</p>
{% endblock %}
url.py 文件
from django.conf.urls import url
from . import views
from django.urls import path
app_name = "spam"
urlpatterns=[
url(r'^',views.hompage,name='hompage'),
]
如果数据文件夹是项目的根目录那么
data = pd.read_csv('data/spam.csv', encoding='latin-1')
会完成工作
或者您可以使用
BASE_DIR
变量创建路径
然后在您的视图中使用此变量
data_dir = os.path.join(BASE_DIR, 'data') # place this in settings.py
观看次数
from django.conf import settings
data = pd.read_csv(settings.data_dir + 'spam.csv', encoding='latin-1')