os.walk 没有看到文件夹中的文件
os.walk does not see files that are in the folder
我前段时间在一个涉及解析推文的项目中使用了以下代码。现在,我想提取更多与这些推文相关的特征,并在我之前解析的相同文件上重新 运行 它;但是,收到意外错误。错误 中提到的文件 1.json 在目录中 (以及其他数百个 json)。我做错了什么?
import os
import json
import pandas as pd
import numpy as np
from collections import defaultdict
elements_keys = ['created_at', 'text', 'lang', 'geo', 'location', 'quote_count', 'reply_count', 'retweet_count', 'favorite_count', 'in_reply_to_screen_name', 'screen_name', 'description', 'verified', 'followers_count', 'friends_count', 'listed_count', 'favourites_count', 'statuses_count']
elements = defaultdict(list)
for dirs, subdirs, files in os.walk('/Users/user/Desktop/'):
for file in files:
if file.endswith('.json'):
with open(file, 'r') as input_file: # print (tweet.keys())
for line in input_file:
try:
tweet = json.loads(line)
items = [(key, tweet[key]) for key in elements_keys] # should raise error if any key is missing
for key, value in items:
elements[key].append(value)
except:
continue
df=pd.DataFrame({'created_at': pd.Index(elements['created_at']),
'text': pd.Index(elements['text']),
'lang': pd.Index(elements['lang']),
'geo': pd.Index(elements['geo']),
'location': pd.Index(elements['location']),
'quote_count': pd.Index(elements['quote_count']),
'reply_count': pd.Index(elements['reply_count']),
'retweet_count': pd.Index(elements['retweet_count']),
'favorite_count': pd.Index(elements['favorite_count']),
'in_reply_to_screen_name': pd.Index(elements['in_reply_to_screen_name']),
'screen_name': pd.Index(elements['screen_name']),
'verified': pd.Index(elements['verified']),
'followers_count': pd.Index(elements['followers_count']),
'friends_count': pd.Index(elements['friends_count']),
'listed_count': pd.Index(elements['listed_count']),
'favourites_count': pd.Index(elements['favourites_count']),
'statuses_count': pd.Index(elements['statuses_count'])})
df.to_csv('df.csv')
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-21-fba7b7321509> in <module>()
11 for file in files:
12 if file.endswith('.json'):
---> 13 with open(file, 'r') as input_file: # print (tweet.keys())
14 for line in input_file:
15 try:
FileNotFoundError: [Errno 2] No such file or directory: '1.json'
哦,我想我刚刚弄明白了:
for dirs, subdirs, files in os.walk('/Users/user/Desktop/*.json'):
我已经有一段时间没用了Python。对不起:)
我前段时间在一个涉及解析推文的项目中使用了以下代码。现在,我想提取更多与这些推文相关的特征,并在我之前解析的相同文件上重新 运行 它;但是,收到意外错误。错误 中提到的文件 1.json 在目录中 (以及其他数百个 json)。我做错了什么?
import os
import json
import pandas as pd
import numpy as np
from collections import defaultdict
elements_keys = ['created_at', 'text', 'lang', 'geo', 'location', 'quote_count', 'reply_count', 'retweet_count', 'favorite_count', 'in_reply_to_screen_name', 'screen_name', 'description', 'verified', 'followers_count', 'friends_count', 'listed_count', 'favourites_count', 'statuses_count']
elements = defaultdict(list)
for dirs, subdirs, files in os.walk('/Users/user/Desktop/'):
for file in files:
if file.endswith('.json'):
with open(file, 'r') as input_file: # print (tweet.keys())
for line in input_file:
try:
tweet = json.loads(line)
items = [(key, tweet[key]) for key in elements_keys] # should raise error if any key is missing
for key, value in items:
elements[key].append(value)
except:
continue
df=pd.DataFrame({'created_at': pd.Index(elements['created_at']),
'text': pd.Index(elements['text']),
'lang': pd.Index(elements['lang']),
'geo': pd.Index(elements['geo']),
'location': pd.Index(elements['location']),
'quote_count': pd.Index(elements['quote_count']),
'reply_count': pd.Index(elements['reply_count']),
'retweet_count': pd.Index(elements['retweet_count']),
'favorite_count': pd.Index(elements['favorite_count']),
'in_reply_to_screen_name': pd.Index(elements['in_reply_to_screen_name']),
'screen_name': pd.Index(elements['screen_name']),
'verified': pd.Index(elements['verified']),
'followers_count': pd.Index(elements['followers_count']),
'friends_count': pd.Index(elements['friends_count']),
'listed_count': pd.Index(elements['listed_count']),
'favourites_count': pd.Index(elements['favourites_count']),
'statuses_count': pd.Index(elements['statuses_count'])})
df.to_csv('df.csv')
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-21-fba7b7321509> in <module>()
11 for file in files:
12 if file.endswith('.json'):
---> 13 with open(file, 'r') as input_file: # print (tweet.keys())
14 for line in input_file:
15 try:
FileNotFoundError: [Errno 2] No such file or directory: '1.json'
哦,我想我刚刚弄明白了:
for dirs, subdirs, files in os.walk('/Users/user/Desktop/*.json'):
我已经有一段时间没用了Python。对不起:)