使用 pandas 以更快的方式遍历 CSV 行
Iterate over CSV rows using pandas in a faster way
我正在尝试通过从 html 模板上传文件来读取 CSV 文件,然后遍历行并创建模型对象。
views.py
@login_required
def uploadStudents1(request):
if request.method == 'POST':
uploaded_file = request.FILES['document']
ext = os.path.splitext(uploaded_file.name)[-1].lower()
if ext == '.csv':
data_file = pd.read_csv(
uploaded_file, parse_dates=['date_of_birth'])
data_file.fillna('-', inplace=True)
for _, row in data_file.iterrows():
Sem1Students.objects.create(
fname=row['first_name'],
lname=row['last_name'],
reg_no=row['reg_no'],
gender=row['gender'],
birth_date=row['date_of_birth'],
)
messages.success(request, 'Uploaded student details successfully!')
return redirect('/students')
else:
messages.error(request, "Invalid file type. Please upload again.")
return render(request, 'students/upload1.html')
return render(request, "students/upload/upload1.html")
但是这个过程真的很慢,读取和创建 74 条记录大约需要 5-6 秒。
有没有更好的方法来做到这一点,即使过程更快?
你应该使用 bulk_create 因为如果你在循环中创建会花费一些时间
@login_required
def uploadStudents1(request):
if request.method == 'POST':
uploaded_file = request.FILES['document']
ext = os.path.splitext(uploaded_file.name)[-1].lower()
if ext == '.csv':
data_file = pd.read_csv(
uploaded_file, parse_dates=['date_of_birth'])
data_file.fillna('-', inplace=True)
semi_st = []
for _, row in data_file.iterrows():
semi_st.append(Sem1Students(
fname=row['first_name'],
lname=row['last_name'],
reg_no=row['reg_no'],
gender=row['gender'],
birth_date=row['date_of_birth'],
))
Sem1Students.objects.bulk_create(semi_st)
messages.success(request, 'Uploaded student details successfully!')
return redirect('/students')
else:
messages.error(request, "Invalid file type. Please upload again.")
return render(request, 'students/upload1.html')
return render(request, "students/upload/upload1.html")
我正在尝试通过从 html 模板上传文件来读取 CSV 文件,然后遍历行并创建模型对象。
views.py
@login_required
def uploadStudents1(request):
if request.method == 'POST':
uploaded_file = request.FILES['document']
ext = os.path.splitext(uploaded_file.name)[-1].lower()
if ext == '.csv':
data_file = pd.read_csv(
uploaded_file, parse_dates=['date_of_birth'])
data_file.fillna('-', inplace=True)
for _, row in data_file.iterrows():
Sem1Students.objects.create(
fname=row['first_name'],
lname=row['last_name'],
reg_no=row['reg_no'],
gender=row['gender'],
birth_date=row['date_of_birth'],
)
messages.success(request, 'Uploaded student details successfully!')
return redirect('/students')
else:
messages.error(request, "Invalid file type. Please upload again.")
return render(request, 'students/upload1.html')
return render(request, "students/upload/upload1.html")
但是这个过程真的很慢,读取和创建 74 条记录大约需要 5-6 秒。
有没有更好的方法来做到这一点,即使过程更快?
你应该使用 bulk_create 因为如果你在循环中创建会花费一些时间
@login_required
def uploadStudents1(request):
if request.method == 'POST':
uploaded_file = request.FILES['document']
ext = os.path.splitext(uploaded_file.name)[-1].lower()
if ext == '.csv':
data_file = pd.read_csv(
uploaded_file, parse_dates=['date_of_birth'])
data_file.fillna('-', inplace=True)
semi_st = []
for _, row in data_file.iterrows():
semi_st.append(Sem1Students(
fname=row['first_name'],
lname=row['last_name'],
reg_no=row['reg_no'],
gender=row['gender'],
birth_date=row['date_of_birth'],
))
Sem1Students.objects.bulk_create(semi_st)
messages.success(request, 'Uploaded student details successfully!')
return redirect('/students')
else:
messages.error(request, "Invalid file type. Please upload again.")
return render(request, 'students/upload1.html')
return render(request, "students/upload/upload1.html")