python 文件读取未读取许多表情符号字符
Many emoji characters are not read by python file read
我在 json 文件中有一个包含 1500 个表情符号字符字典的列表,我想将它们导入我的 python 代码,我读取了一个文件并将其转换为 python 字典,但现在我只有 143 条记录。如何将所有表情符号导入我的代码,这是我的代码。
import sys
import ast
file = open('emojidescription.json','r').read()
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
emoji_dictionary = ast.literal_eval(file.translate(non_bmp_map))
#word = word.replaceAll(",", " ");
keys = list(emoji_dictionary["emojis"][0].keys())
values = list(emoji_dictionary["emojis"][0].values())
file_write = open('output.txt','a')
print(len(keys))
for i in range(len(keys)):
try:
content = 'word = word.replace("{0}", "{1}")'.format(keys[i],values[i][0])
except Exception as e:
content = 'word = word.replace("{0}", "{1}")'.format(keys[i],'')
#file.write()
#print(keys[i],values[i])
print(content)
file_write.close()
这是我的输入样本
{
"emojis": [
{
"": ["Graduate"],
"©": ["Copy right"],
"®": ["Registered"],
"": ["family"],
"❤️": ["love"],
"™": ["trademark"],
"❤": ["love"],
"⌚": ["time"],
"⌛": ["wait"],
"⭐": ["star"],
"": ["Elephant"],
"": ["Cat"],
"": ["ant"],
"": ["cock"],
"": ["cock"],
这是我的结果,143表示表情符号的数量。
143
word = word.replace("����", "family")
word = word.replace("Ⓜ", "")
word = word.replace("♥", "")
word = word.replace("♠", "")
word = word.replace("⌛", "wait")
我不确定为什么您从输入 1500 中只看到 143 条记录(您的示例似乎没有显示此行为)。
设置似乎没有做任何有用的事情,但你所做的归结为(简化并跳过了很多细节):
d = ..read json as python dict.
keys = d.keys()
values = d.values()
for i in range(len(keys)):
key = keys[i]
value = values[i]
这应该是完全正确的。在 Python 中有更好的方法可以做到这一点,但是,例如使用 zip
函数:
d = ..read json as python dict.
keys = d.keys()
values = d.values()
for key, value in zip(keys, values): # zip picks pair-wise elements
...
或者简单地向字典询问它的项目:
for key, value in d.items():
...
json
模块使读写json 变得更简单(也更安全),使用上面的习语问题简化为:
import json
emojis = json.load(open('emoji.json', 'rb'))
with open('output.py', 'wb') as fp:
for k,v in emojis['emojis'][0].items():
val = u'word = word.replace("{0}", "{1}")\n'.format(k, v[0] if v else "")
fp.write(val.encode('u8'))
为什么要用 0xfffd
替换所有表情符号:
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
emoji_dictionary = ast.literal_eval(file.translate(non_bmp_map))
千万别这样!
使用json:
import json
with open('emojidescription.json', encoding="utf8") as emojis:
emojis = json.load(emojis)
with open('output.txt','a', encoding="utf8") as output:
for emoji, text in emojis["emojis"][0].items():
text = "" if not text else text[0]
output.write('word = word.replace("{0}", "{1}")\n'.format(emoji, text))
我在 json 文件中有一个包含 1500 个表情符号字符字典的列表,我想将它们导入我的 python 代码,我读取了一个文件并将其转换为 python 字典,但现在我只有 143 条记录。如何将所有表情符号导入我的代码,这是我的代码。
import sys
import ast
file = open('emojidescription.json','r').read()
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
emoji_dictionary = ast.literal_eval(file.translate(non_bmp_map))
#word = word.replaceAll(",", " ");
keys = list(emoji_dictionary["emojis"][0].keys())
values = list(emoji_dictionary["emojis"][0].values())
file_write = open('output.txt','a')
print(len(keys))
for i in range(len(keys)):
try:
content = 'word = word.replace("{0}", "{1}")'.format(keys[i],values[i][0])
except Exception as e:
content = 'word = word.replace("{0}", "{1}")'.format(keys[i],'')
#file.write()
#print(keys[i],values[i])
print(content)
file_write.close()
这是我的输入样本
{
"emojis": [
{
"": ["Graduate"],
"©": ["Copy right"],
"®": ["Registered"],
"": ["family"],
"❤️": ["love"],
"™": ["trademark"],
"❤": ["love"],
"⌚": ["time"],
"⌛": ["wait"],
"⭐": ["star"],
"": ["Elephant"],
"": ["Cat"],
"": ["ant"],
"": ["cock"],
"": ["cock"],
这是我的结果,143表示表情符号的数量。
143
word = word.replace("����", "family")
word = word.replace("Ⓜ", "")
word = word.replace("♥", "")
word = word.replace("♠", "")
word = word.replace("⌛", "wait")
我不确定为什么您从输入 1500 中只看到 143 条记录(您的示例似乎没有显示此行为)。
设置似乎没有做任何有用的事情,但你所做的归结为(简化并跳过了很多细节):
d = ..read json as python dict.
keys = d.keys()
values = d.values()
for i in range(len(keys)):
key = keys[i]
value = values[i]
这应该是完全正确的。在 Python 中有更好的方法可以做到这一点,但是,例如使用 zip
函数:
d = ..read json as python dict.
keys = d.keys()
values = d.values()
for key, value in zip(keys, values): # zip picks pair-wise elements
...
或者简单地向字典询问它的项目:
for key, value in d.items():
...
json
模块使读写json 变得更简单(也更安全),使用上面的习语问题简化为:
import json
emojis = json.load(open('emoji.json', 'rb'))
with open('output.py', 'wb') as fp:
for k,v in emojis['emojis'][0].items():
val = u'word = word.replace("{0}", "{1}")\n'.format(k, v[0] if v else "")
fp.write(val.encode('u8'))
为什么要用 0xfffd
替换所有表情符号:
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
emoji_dictionary = ast.literal_eval(file.translate(non_bmp_map))
千万别这样!
使用json:
import json
with open('emojidescription.json', encoding="utf8") as emojis:
emojis = json.load(emojis)
with open('output.txt','a', encoding="utf8") as output:
for emoji, text in emojis["emojis"][0].items():
text = "" if not text else text[0]
output.write('word = word.replace("{0}", "{1}")\n'.format(emoji, text))