使用 python 将大数据存储到 mongodb 集合中

storing large data into mongodb collection using python

我正在开发一个 Django 项目,我必须在其中创建 10 年的数据并将它们存储在 MongoDB 中检索它并将其显示在 HTML 页面上。我试图将 10 年的数据分成 1 年,然后将其存储在 MongoDB 集合中,但每当我尝试这样做时,只会存储两个文档并且

此错误显示在 pymongo 中。 errors.DocumentTooLarge: BSON 文档太大(29948865 字节)- 连接的服务器支持最大 16793598 字节的 BSON 文档。

我的python代码是


now=start
workdate=now.date()
nowtime=now.time()

endt=end
ktime=start

times=[]
states=[]
level=[]



#generating random level of water in the tank 
while (now!=endt): # loop for creating data for given time
    ktime=ktime+relativedelta(months=5)
    print(current_level)
    def fill():
        global df 
        global now
        global workdate
        global nowtime
        global ktime
        global current_level
        global flag
       
        global times
        global states 
        global level
        while x=='on' and current_level<=450:
            times.append(now)
            states.append(x)
            level.append(current_level)
            
           
              
            current_level+=filling
            current_level=round(current_level,2)
            now=now+timedelta(minutes=1)
            nowtime=now.time()
            workdate=now.date
            if now==ktime:
              times.append(now)
              states.append(x)
              level.append(current_level)
                
              print("true")
              flag='red'
              break
            
          
    
    def drain():
        global df
        global now 
        global workdate
        global nowtime 
        global ktime
        global current_level
        global flag 
     
        global times
        global states 
        global level
       
    
    
        while x=='off' and  current_level>50:
            times.append(now)
            states.append(x)
            level.append(current_level)
            
            
           
            print(current_level)
            current_level-=emptyrate
            current_level=round(current_level,4)
            now=now+timedelta(minutes=1)
            nowtime=now.time()
            workdate=now.date()
            if now==ktime:
              times.append(now)
              states.append(x)
              level.append(current_level)
                
              print("true")
              flag='red'
              break
               
         
           
    
           
          
    
       
    flag='green'
    k=True
    while k:       
        if  x=='off' and current_level>50:
            drain()
            x='on'
            
    
    
        if flag =='red':
         break
    
    
    
    
        if x=='on' and  current_level<450: 
            fill()
            x='off'
            
       
            
          
            
        if flag=='red':
            break
    
    
    
    data = {'time': times, 'status': states, 'level': level}
    df = pd.DataFrame(data)
    
    
    df.reset_index(inplace=True)
    data = df.to_dict('records')
    colle.insert({"data":data}) #transfering data to collection 
    del df
    data.clear()
    

所以问题出在我的逻辑上而不是清除数据我应该在循环结束后清除时间,状态和级别列表中已经存储的数据 所以最后应该改的部分如下

while k:       
    if  x=='off' and current_level>50:
        drain()
        x='on'

    if flag=='red':
        data = {'time': times, 'status': states, 'level': level}
        flag='green'
        break

    if x=='on' and  current_level<450: 
        fill()
        x='off'

    if flag=='red':
        data = {'time': times, 'status': states, 'level': level}
        flag='green'
        break

df = pd.DataFrame(data)
print(df)
df.reset_index(inplace=True)
data_dict = df.to_dict("records")
colle.insert_one({"DATA":data_dict})
df=df.iloc[0:0]
times.clear()
states.clear()

控制以下代码运行周期的外循环或主循环保持不变