使用 HappyBase 更新 HBase 数据
Updating HBase data with HappyBase
我正在尝试编写一个函数来更新 HBase 中保存的 table 中的数据。我有一个函数会被调用来更新它,我有一个很好的开始,但在完成它的时候我有点迷茫。我可以将基于一个字符串的单行更新为另一个字符串,但是在比较日志时间时,我似乎无法弄清楚该怎么做,因为没有设置日志时间。我将 table 中的所有值存储到字典中。这是我的代码:
def updateHBase(row):
dict = row.asDict() #create a dictionary from Row
columns = dict.keys()
for col in columns: #creates the colfamily:colname for happybase format
dict["DriveInfo:" +col] = dict.pop(col) #adds data into rows
del dict[DriveInfo:serialnumb] #removes the row key that HBase uses, serialnum
x = table.row(row.serialnum)
if (len(x) == 0) #length of row is zero, row key does not exist
table.put(row.serialnum, dict) #add new row is row key does not exist
else #check for health
if (dict['health'].lower() != 'healthy') #if the row isnt healthy... next steps
if (dict['health'].lower() != 'archived' and x['health'] == 'archived' and dict['logtime'] < x['logtime']) #update a row with a health status of archived
table.put(row.serialnum, dict)
else #if the row that is being replaced isn't archived, just replace the row
table.put(row.serialnum, dict)
return
elif (dict['logtime'] > x['logtime'] and dict['health'].lower() == 'healthy') # new log time > old log time and health is healthy, replace the row with new data
table.put(row.serialnum, dict)
else
return
编辑:在我所有的 if 语句中应该... dict['health']
... 是 x['health']
?
想通了...
def updateHBase(row):
dict = row.asDict() #create a dictionary from Row
columns = dict.keys()
for col in columns: #creates the colfamily:colname for happybase format
dict["DriveInfo:" +col] = dict.pop(col) #adds data into rows
del dict[DriveInfo:serialnumb] #removes the row key that HBase uses, serialnum
x = table.row(row.serialnum)
if (len(x) == 0) #length of row is zero, row key does not exist
table.put(row.serialnum, dict) #add new row is row key does not exist
else #check for health
if (x['health'].lower() != 'healthy') #if the row isnt healthy... next steps
if (x['health'].lower() != 'archived' and x['health'] == 'archived') #update a row with a health status of archived
table.put(row.serialnum, dict)
else #if the row that is being replaced isn't archived, just replace the row
table.put(row.serialnum, dict)
return
elif (x['logtime'] > row.logtime and x['health'].lower() == 'healthy') # new log time > old log time and health is healthy, replace the row with new data
table.put(row.serialnum, dict)
else
return
我正在尝试编写一个函数来更新 HBase 中保存的 table 中的数据。我有一个函数会被调用来更新它,我有一个很好的开始,但在完成它的时候我有点迷茫。我可以将基于一个字符串的单行更新为另一个字符串,但是在比较日志时间时,我似乎无法弄清楚该怎么做,因为没有设置日志时间。我将 table 中的所有值存储到字典中。这是我的代码:
def updateHBase(row):
dict = row.asDict() #create a dictionary from Row
columns = dict.keys()
for col in columns: #creates the colfamily:colname for happybase format
dict["DriveInfo:" +col] = dict.pop(col) #adds data into rows
del dict[DriveInfo:serialnumb] #removes the row key that HBase uses, serialnum
x = table.row(row.serialnum)
if (len(x) == 0) #length of row is zero, row key does not exist
table.put(row.serialnum, dict) #add new row is row key does not exist
else #check for health
if (dict['health'].lower() != 'healthy') #if the row isnt healthy... next steps
if (dict['health'].lower() != 'archived' and x['health'] == 'archived' and dict['logtime'] < x['logtime']) #update a row with a health status of archived
table.put(row.serialnum, dict)
else #if the row that is being replaced isn't archived, just replace the row
table.put(row.serialnum, dict)
return
elif (dict['logtime'] > x['logtime'] and dict['health'].lower() == 'healthy') # new log time > old log time and health is healthy, replace the row with new data
table.put(row.serialnum, dict)
else
return
编辑:在我所有的 if 语句中应该... dict['health']
... 是 x['health']
?
想通了...
def updateHBase(row):
dict = row.asDict() #create a dictionary from Row
columns = dict.keys()
for col in columns: #creates the colfamily:colname for happybase format
dict["DriveInfo:" +col] = dict.pop(col) #adds data into rows
del dict[DriveInfo:serialnumb] #removes the row key that HBase uses, serialnum
x = table.row(row.serialnum)
if (len(x) == 0) #length of row is zero, row key does not exist
table.put(row.serialnum, dict) #add new row is row key does not exist
else #check for health
if (x['health'].lower() != 'healthy') #if the row isnt healthy... next steps
if (x['health'].lower() != 'archived' and x['health'] == 'archived') #update a row with a health status of archived
table.put(row.serialnum, dict)
else #if the row that is being replaced isn't archived, just replace the row
table.put(row.serialnum, dict)
return
elif (x['logtime'] > row.logtime and x['health'].lower() == 'healthy') # new log time > old log time and health is healthy, replace the row with new data
table.put(row.serialnum, dict)
else
return