无法将 pandas 数据框存储为 csv

Unable to store pandas data frame as a csv

我关注这个 tutorial 是为了从新闻站点检索数据。

主要功能是getDailyNews。它会在每个新闻源上循环,请求 api,提取数据并将其转储到 pandas DataFrame,然后将结果导出到 csv 文件。

但是当我 运行 代码时,出现错误。

import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
from tqdm import tqdm, tqdm_notebook
from functools import reduce


def getSources():
    source_url = 'https://newsapi.org/v1/sources?language=en'
    response = requests.get(source_url).json()
    sources = []
    for source in response['sources']:
        sources.append(source['id'])
    return sources

def mapping():
    d = {}
    response = requests.get('https://newsapi.org/v1/sources?language=en')
    response = response.json()
    for s in response['sources']:
        d[s['id']] = s['category']
    return d

def category(source, m):
    try:
        return m[source]
    except:
        return 'NC'

def getDailyNews():
    sources = getSources()
    key = '96f279e1b7f845669089abc016e915cc'


    url = 'https://newsapi.org/v1/articles?source={0}&sortBy={1}&apiKey={2}'
    responses = []
    for i, source in tqdm_notebook(enumerate(sources), total=len(sources)):

        try:
            u = url.format(source, 'top', key)
        except:
            u = url.format(source, 'latest', key)

        response = requests.get(u)
        r = response.json()
        try:
            for article in r['articles']:
                article['source'] = source
            responses.append(r)
        except:
            print('Rate limit exceeded ... please wait and retry in 6 hours')
            return None

    articles = list(map(lambda r: r['articles'], responses))
    articles = list(reduce(lambda x,y: x+y, articles))

    news = pd.DataFrame(articles)
    news = news.dropna()
    news = news.drop_duplicates()
    news.reset_index(inplace=True, drop=True)
    d = mapping()
    news['category'] = news['source'].map(lambda s: category(s, d))
    news['scraping_date'] = datetime.now()

    try:
        aux = pd.read_csv('./data/news.csv')
        aux = aux.append(news)
        aux = aux.drop_duplicates('url')
        aux.reset_index(inplace=True, drop=True)
        aux.to_csv('./data/news.csv', encoding='utf-8', index=False)
    except:
        news.to_csv('./data/news.csv', index=False, encoding='utf-8')

    print('Done')

if __name__=='__main__':
    getDailyNews() 

错误:

FileNotFoundError: [Errno 2] No such file or directory: './data/news.csv'

我知道我必须在pd.read_csv中给出路径名,但我不知道我必须在这里给出哪条路径。

如果您从中执行此程序的目录中还没有 data 文件夹,则此错误是有意义的。 post .

中也有类似的问题