使用 python 将大数据存储到 mongodb 集合中
storing large data into mongodb collection using python
我正在开发一个 Django 项目,我必须在其中创建 10 年的数据并将它们存储在 MongoDB 中检索它并将其显示在 HTML 页面上。我试图将 10 年的数据分成 1 年,然后将其存储在 MongoDB 集合中,但每当我尝试这样做时,只会存储两个文档并且
此错误显示在 pymongo 中。 errors.DocumentTooLarge: BSON 文档太大(29948865 字节)- 连接的服务器支持最大 16793598 字节的 BSON 文档。
我的python代码是
now=start
workdate=now.date()
nowtime=now.time()
endt=end
ktime=start
times=[]
states=[]
level=[]
#generating random level of water in the tank
while (now!=endt): # loop for creating data for given time
ktime=ktime+relativedelta(months=5)
print(current_level)
def fill():
global df
global now
global workdate
global nowtime
global ktime
global current_level
global flag
global times
global states
global level
while x=='on' and current_level<=450:
times.append(now)
states.append(x)
level.append(current_level)
current_level+=filling
current_level=round(current_level,2)
now=now+timedelta(minutes=1)
nowtime=now.time()
workdate=now.date
if now==ktime:
times.append(now)
states.append(x)
level.append(current_level)
print("true")
flag='red'
break
def drain():
global df
global now
global workdate
global nowtime
global ktime
global current_level
global flag
global times
global states
global level
while x=='off' and current_level>50:
times.append(now)
states.append(x)
level.append(current_level)
print(current_level)
current_level-=emptyrate
current_level=round(current_level,4)
now=now+timedelta(minutes=1)
nowtime=now.time()
workdate=now.date()
if now==ktime:
times.append(now)
states.append(x)
level.append(current_level)
print("true")
flag='red'
break
flag='green'
k=True
while k:
if x=='off' and current_level>50:
drain()
x='on'
if flag =='red':
break
if x=='on' and current_level<450:
fill()
x='off'
if flag=='red':
break
data = {'time': times, 'status': states, 'level': level}
df = pd.DataFrame(data)
df.reset_index(inplace=True)
data = df.to_dict('records')
colle.insert({"data":data}) #transfering data to collection
del df
data.clear()
所以问题出在我的逻辑上而不是清除数据我应该在循环结束后清除时间,状态和级别列表中已经存储的数据
所以最后应该改的部分如下
while k:
if x=='off' and current_level>50:
drain()
x='on'
if flag=='red':
data = {'time': times, 'status': states, 'level': level}
flag='green'
break
if x=='on' and current_level<450:
fill()
x='off'
if flag=='red':
data = {'time': times, 'status': states, 'level': level}
flag='green'
break
df = pd.DataFrame(data)
print(df)
df.reset_index(inplace=True)
data_dict = df.to_dict("records")
colle.insert_one({"DATA":data_dict})
df=df.iloc[0:0]
times.clear()
states.clear()
控制以下代码运行周期的外循环或主循环保持不变
我正在开发一个 Django 项目,我必须在其中创建 10 年的数据并将它们存储在 MongoDB 中检索它并将其显示在 HTML 页面上。我试图将 10 年的数据分成 1 年,然后将其存储在 MongoDB 集合中,但每当我尝试这样做时,只会存储两个文档并且
此错误显示在 pymongo 中。 errors.DocumentTooLarge: BSON 文档太大(29948865 字节)- 连接的服务器支持最大 16793598 字节的 BSON 文档。
我的python代码是
now=start
workdate=now.date()
nowtime=now.time()
endt=end
ktime=start
times=[]
states=[]
level=[]
#generating random level of water in the tank
while (now!=endt): # loop for creating data for given time
ktime=ktime+relativedelta(months=5)
print(current_level)
def fill():
global df
global now
global workdate
global nowtime
global ktime
global current_level
global flag
global times
global states
global level
while x=='on' and current_level<=450:
times.append(now)
states.append(x)
level.append(current_level)
current_level+=filling
current_level=round(current_level,2)
now=now+timedelta(minutes=1)
nowtime=now.time()
workdate=now.date
if now==ktime:
times.append(now)
states.append(x)
level.append(current_level)
print("true")
flag='red'
break
def drain():
global df
global now
global workdate
global nowtime
global ktime
global current_level
global flag
global times
global states
global level
while x=='off' and current_level>50:
times.append(now)
states.append(x)
level.append(current_level)
print(current_level)
current_level-=emptyrate
current_level=round(current_level,4)
now=now+timedelta(minutes=1)
nowtime=now.time()
workdate=now.date()
if now==ktime:
times.append(now)
states.append(x)
level.append(current_level)
print("true")
flag='red'
break
flag='green'
k=True
while k:
if x=='off' and current_level>50:
drain()
x='on'
if flag =='red':
break
if x=='on' and current_level<450:
fill()
x='off'
if flag=='red':
break
data = {'time': times, 'status': states, 'level': level}
df = pd.DataFrame(data)
df.reset_index(inplace=True)
data = df.to_dict('records')
colle.insert({"data":data}) #transfering data to collection
del df
data.clear()
所以问题出在我的逻辑上而不是清除数据我应该在循环结束后清除时间,状态和级别列表中已经存储的数据 所以最后应该改的部分如下
while k:
if x=='off' and current_level>50:
drain()
x='on'
if flag=='red':
data = {'time': times, 'status': states, 'level': level}
flag='green'
break
if x=='on' and current_level<450:
fill()
x='off'
if flag=='red':
data = {'time': times, 'status': states, 'level': level}
flag='green'
break
df = pd.DataFrame(data)
print(df)
df.reset_index(inplace=True)
data_dict = df.to_dict("records")
colle.insert_one({"DATA":data_dict})
df=df.iloc[0:0]
times.clear()
states.clear()
控制以下代码运行周期的外循环或主循环保持不变