Influxdb-python: 插入 utf8 数据时出错
Influxdb-python: error inserting utf8 data
我想从 mysql 中插入一些数据到 influxDB。 sql 中的数据是 utf-8 编码的,我在 vagrant 上使用 python 2.6.6。
$] python
Python 2.6.6 (r266:84292, Jul 23 2015, 15:22:56)
[GCC 4.4.7 20120313 (Red Hat 4.4.7-11)] on linux2
这是table的结构和mysql的示例数据。
mysql> show create table countries;
-----------------------------------+
| Table | Create Table |
+-----------+----------------------+
| countries | CREATE TABLE `countries` (
`id` smallint(5) unsigned DEFAULT NULL,
`name` varchar(100) DEFAULT NULL,
KEY `name` (`name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 |
+----------------------------------+
mysql> select id,name from countries;
+------+----------------------------------+
| id | name |
+------+----------------------------------+
| 11 | Afghanistan |
| 12 | Åland Islands |
| 13 | Albania |
| 14 | Côte d’Ivoire |
+------+----------------------------------+
有些国家/地区包含特殊字符。我正在使用下面的 python 代码从 mysql 中获取数据并插入到 influxDB 中。
#!/usr/bin/python
import MySQLdb
import json
from influxdb import InfluxDBClient
# Open database connection
db = MySQLdb.connect("localhost","root","","platform" )
# prepare a cursor object using cursor() method
cursor = db.cursor()
# execute SQL query using execute() method.
cursor.execute("SELECT id,name from countries")
# Fetch a all row using fetchall() method.
data = cursor.fetchall()
json_body = []
for id,name in data:
print id,name
json_1 = {
"measurement": "cpu_load_short",
"tags": {
"host": "server01",
"region": "us-west"
},
"time": id,
"fields": {
"value": name.decode('utf8') // ERROR
}
}
#json_1 = json.dumps(json_1).encode('utf8')
json_body.append(json_1)
#json_body = json.dumps(json_body, ensure_ascii=False).encode('utf8')
client = InfluxDBClient('localhost', 8086, 'root', 'root', 'example')
client.create_database('example')
client.write_points(json_body)
result = client.query('select * from cpu_load_short;')
print("Result: {0}".format(result))
# disconnect from server
db.close()
解码数据时出现错误:
$] python test.py
11 Afghanistan
12 Åland Islands
Traceback (most recent call last):
File "test.py", line 31, in <module>
"value": name.decode('utf8')
File "/usr/lib64/python2.6/encodings/utf_8.py", line 16, in decode
return codecs.utf_8_decode(input, errors, True)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xc5 in position 0: invalid continuation byte
如果我从名称中删除解码:
"fields": {
"value": name
}
我仍然遇到错误
Traceback (most recent call last):
File "test.py", line 43, in <module>
client.write_points(json_body)
File "/usr/lib/python2.6/site-packages/influxdb/client.py", line 391, in write_points
tags=tags)
File "/usr/lib/python2.6/site-packages/influxdb/client.py", line 436, in _write_points
expected_response_code=204
File "/usr/lib/python2.6/site-packages/influxdb/client.py", line 276, in write
data=make_lines(data, precision).encode('utf-8'),
File "/usr/lib/python2.6/site-packages/influxdb/line_protocol.py", line 119, in make_lines
value = _escape_value(point['fields'][field_key])
File "/usr/lib/python2.6/site-packages/influxdb/line_protocol.py", line 53, in _escape_value
value = _get_unicode(value)
File "/usr/lib/python2.6/site-packages/influxdb/line_protocol.py", line 73, in _get_unicode
return data.decode('utf-8')
File "/usr/lib64/python2.6/encodings/utf_8.py", line 16, in decode
return codecs.utf_8_decode(input, errors, True)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xc5 in position 0: invalid continuation byte
这个问题有什么解决方案吗?
使用unidecode
将unicode数据转换为ASCII文本。
import unidecode
name = unidecode.unidecode_expect_nonascii(name)
你不应该做任何decoding/encoding。
连接到MySQL时,指定utf8。
我想从 mysql 中插入一些数据到 influxDB。 sql 中的数据是 utf-8 编码的,我在 vagrant 上使用 python 2.6.6。
$] python
Python 2.6.6 (r266:84292, Jul 23 2015, 15:22:56)
[GCC 4.4.7 20120313 (Red Hat 4.4.7-11)] on linux2
这是table的结构和mysql的示例数据。
mysql> show create table countries;
-----------------------------------+
| Table | Create Table |
+-----------+----------------------+
| countries | CREATE TABLE `countries` (
`id` smallint(5) unsigned DEFAULT NULL,
`name` varchar(100) DEFAULT NULL,
KEY `name` (`name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 |
+----------------------------------+
mysql> select id,name from countries;
+------+----------------------------------+
| id | name |
+------+----------------------------------+
| 11 | Afghanistan |
| 12 | Åland Islands |
| 13 | Albania |
| 14 | Côte d’Ivoire |
+------+----------------------------------+
有些国家/地区包含特殊字符。我正在使用下面的 python 代码从 mysql 中获取数据并插入到 influxDB 中。
#!/usr/bin/python
import MySQLdb
import json
from influxdb import InfluxDBClient
# Open database connection
db = MySQLdb.connect("localhost","root","","platform" )
# prepare a cursor object using cursor() method
cursor = db.cursor()
# execute SQL query using execute() method.
cursor.execute("SELECT id,name from countries")
# Fetch a all row using fetchall() method.
data = cursor.fetchall()
json_body = []
for id,name in data:
print id,name
json_1 = {
"measurement": "cpu_load_short",
"tags": {
"host": "server01",
"region": "us-west"
},
"time": id,
"fields": {
"value": name.decode('utf8') // ERROR
}
}
#json_1 = json.dumps(json_1).encode('utf8')
json_body.append(json_1)
#json_body = json.dumps(json_body, ensure_ascii=False).encode('utf8')
client = InfluxDBClient('localhost', 8086, 'root', 'root', 'example')
client.create_database('example')
client.write_points(json_body)
result = client.query('select * from cpu_load_short;')
print("Result: {0}".format(result))
# disconnect from server
db.close()
解码数据时出现错误:
$] python test.py
11 Afghanistan
12 Åland Islands
Traceback (most recent call last):
File "test.py", line 31, in <module>
"value": name.decode('utf8')
File "/usr/lib64/python2.6/encodings/utf_8.py", line 16, in decode
return codecs.utf_8_decode(input, errors, True)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xc5 in position 0: invalid continuation byte
如果我从名称中删除解码:
"fields": {
"value": name
}
我仍然遇到错误
Traceback (most recent call last):
File "test.py", line 43, in <module>
client.write_points(json_body)
File "/usr/lib/python2.6/site-packages/influxdb/client.py", line 391, in write_points
tags=tags)
File "/usr/lib/python2.6/site-packages/influxdb/client.py", line 436, in _write_points
expected_response_code=204
File "/usr/lib/python2.6/site-packages/influxdb/client.py", line 276, in write
data=make_lines(data, precision).encode('utf-8'),
File "/usr/lib/python2.6/site-packages/influxdb/line_protocol.py", line 119, in make_lines
value = _escape_value(point['fields'][field_key])
File "/usr/lib/python2.6/site-packages/influxdb/line_protocol.py", line 53, in _escape_value
value = _get_unicode(value)
File "/usr/lib/python2.6/site-packages/influxdb/line_protocol.py", line 73, in _get_unicode
return data.decode('utf-8')
File "/usr/lib64/python2.6/encodings/utf_8.py", line 16, in decode
return codecs.utf_8_decode(input, errors, True)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xc5 in position 0: invalid continuation byte
这个问题有什么解决方案吗?
使用unidecode
将unicode数据转换为ASCII文本。
import unidecode
name = unidecode.unidecode_expect_nonascii(name)
你不应该做任何decoding/encoding。
连接到MySQL时,指定utf8。