.csv works fine, .tsv gives 'TypeError: expected string or buffer'
.csv works fine, .tsv gives 'TypeError: expected string or buffer'
我正在开发一个 python 脚本来解析用户代理字符串并将它们缩减为 'family'(即 chrome、firefox、safari 等)。
我有一个脚本,当 运行 针对 csv 文件时,它完全可以正常工作,但是当我 运行 针对 .tsv 文件时,它会给我以下错误:
类型错误:预期的字符串或缓冲区
其他人 运行 遇到过这个问题吗?示例代码如下。
import pandas as pd
import numpy as np
import glob as glob
from ua_parser import user_agent_parser as uaparser
#THIS WORKS FINE:
def parse_uagent():
ua_list = []
uadf = pd.DataFrame()
for datafile in glob.glob("*.csv"):
df = pd.read_csv(datafile, sep=',')
df = df[['user_agent','date_time','user_name']]
ua = df[df.columns[0]].values
for line in ua:
uagent = uaparser.ParseUserAgent(line)
ua_list.append(uagent)
uadf = uadf.append(ua_list)
print uadf
#THIS GIVES AN ERROR:
def parse_uagent():
ua_list = []
uadf = pd.DataFrame()
for datafile in glob.glob("*.tsv"):
df = pd.read_csv(datafile, sep='\t')
df = df[['user_agent','date_time','user_name']]
ua = df[df.columns[0]].values
for line in ua:
uagent = uaparser.ParseUserAgent(line)
ua_list.append(uagent)
uadf = uadf.append(ua_list)
print uadf
回溯:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-92-14c05dc8ee13> in <module>()
29
30
---> 31 parse_uagent()
32
<ipython-input-92-14c05dc8ee13> in parse_uagent()
19 ua = df[df.columns[0]].values
20 for line in ua:
---> 21 uagent = uaparser.ParseUserAgent(line)
22 ua_list.append(uagent)
23 uadf = uadf.append(ua_list)
/anaconda2/lib/python2.7/site-packages/ua_parser/user_agent_parser.pyc in ParseUserAgent(user_agent_string, **jsParseBits)
247 else:
248 for uaParser in USER_AGENT_PARSERS:
--> 249 family, v1, v2, v3 = uaParser.Parse(user_agent_string)
250 if family:
251 break
/anaconda2/lib/python2.7/site-packages/ua_parser/user_agent_parser.pyc in Parse(self, user_agent_string)
49 def Parse(self, user_agent_string):
50 family, v1, v2, v3 = None, None, None, None
---> 51 match = self.user_agent_re.search(user_agent_string)
52 if match:
53 if self.family_replacement:
TypeError: expected string or buffer
解决了问题..ua-parser 在遇到空单元格时失败了。在解析之前删除所有带有 NaN 的行修复了错误。
我正在开发一个 python 脚本来解析用户代理字符串并将它们缩减为 'family'(即 chrome、firefox、safari 等)。
我有一个脚本,当 运行 针对 csv 文件时,它完全可以正常工作,但是当我 运行 针对 .tsv 文件时,它会给我以下错误:
类型错误:预期的字符串或缓冲区
其他人 运行 遇到过这个问题吗?示例代码如下。
import pandas as pd
import numpy as np
import glob as glob
from ua_parser import user_agent_parser as uaparser
#THIS WORKS FINE:
def parse_uagent():
ua_list = []
uadf = pd.DataFrame()
for datafile in glob.glob("*.csv"):
df = pd.read_csv(datafile, sep=',')
df = df[['user_agent','date_time','user_name']]
ua = df[df.columns[0]].values
for line in ua:
uagent = uaparser.ParseUserAgent(line)
ua_list.append(uagent)
uadf = uadf.append(ua_list)
print uadf
#THIS GIVES AN ERROR:
def parse_uagent():
ua_list = []
uadf = pd.DataFrame()
for datafile in glob.glob("*.tsv"):
df = pd.read_csv(datafile, sep='\t')
df = df[['user_agent','date_time','user_name']]
ua = df[df.columns[0]].values
for line in ua:
uagent = uaparser.ParseUserAgent(line)
ua_list.append(uagent)
uadf = uadf.append(ua_list)
print uadf
回溯:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-92-14c05dc8ee13> in <module>()
29
30
---> 31 parse_uagent()
32
<ipython-input-92-14c05dc8ee13> in parse_uagent()
19 ua = df[df.columns[0]].values
20 for line in ua:
---> 21 uagent = uaparser.ParseUserAgent(line)
22 ua_list.append(uagent)
23 uadf = uadf.append(ua_list)
/anaconda2/lib/python2.7/site-packages/ua_parser/user_agent_parser.pyc in ParseUserAgent(user_agent_string, **jsParseBits)
247 else:
248 for uaParser in USER_AGENT_PARSERS:
--> 249 family, v1, v2, v3 = uaParser.Parse(user_agent_string)
250 if family:
251 break
/anaconda2/lib/python2.7/site-packages/ua_parser/user_agent_parser.pyc in Parse(self, user_agent_string)
49 def Parse(self, user_agent_string):
50 family, v1, v2, v3 = None, None, None, None
---> 51 match = self.user_agent_re.search(user_agent_string)
52 if match:
53 if self.family_replacement:
TypeError: expected string or buffer
解决了问题..ua-parser 在遇到空单元格时失败了。在解析之前删除所有带有 NaN 的行修复了错误。