Pandas 和雅虎 charatapi

Pandas and Yahoo ChartAPI

我正在尝试从第 17 行开始读取 this source - Yahoo Finance。我想获取日期、最高价、最低价等,这是 6 列。

我的代码:

import pandas as pd 
import datetime
import urllib
web = 'http://chartapi.finance.yahoo.com/instrument/1.0/EURUSD=X/chartdata;type=quote;range=1y/csv'

urlToVisit = 'http://chartapi.finance.yahoo.com/instrument/1.0/EURUSD=X/chartdata;type=quote;range=1y/csv'
with urllib.request.urlopen(urlToVisit) as response:
    sourcePage = response.read().decode()
df = pd.read_csv(sourcePage, skiprows=17, header=None)
df.head()

    OSError                                   Traceback (most recent call last)
<ipython-input-10-bf04141dec86> in <module>()
      7 with urllib.request.urlopen(urlToVisit) as response:
      8     sourcePage = response.read().decode()
----> 9 df = pd.read_csv(sourcePage, skiprows=17, header=None)
     10 df.head()

/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
    496                     skip_blank_lines=skip_blank_lines)
    497 
--> 498         return _read(filepath_or_buffer, kwds)
    499 
    500     parser_f.__name__ = name

/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    273 
    274     # Create the parser.
--> 275     parser = TextFileReader(filepath_or_buffer, **kwds)
    276 
    277     if (nrows is not None) and (chunksize is not None):

/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    588             self.options['has_index_names'] = kwds['has_index_names']
    589 
--> 590         self._make_engine(self.engine)
    591 
    592     def _get_options_with_defaults(self, engine):

/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
    729     def _make_engine(self, engine='c'):
    730         if engine == 'c':
--> 731             self._engine = CParserWrapper(self.f, **self.options)
    732         else:
    733             if engine == 'python':

/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1101         kwds['allow_leading_cols'] = self.index_col is not False
   1102 
-> 1103         self._reader = _parser.TextReader(src, **kwds)
   1104 
   1105         # XXX

pandas/parser.pyx in pandas.parser.TextReader.__cinit__ (pandas/parser.c:3246)()

pandas/parser.pyx in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:6111)()

如果有人能帮我纠正一下,将不胜感激!谢谢。

根据文档,pandas.read_csv需要一个类似文件的对象作为第一个参数。

所以你可以将文件保存在本地,然后使用pandas.read_csv方法重新读取,或者你可以使用Python中的io.StringIO in Python 3.x or StringIO.StringIO 2.x来转换数据.

示例代码如下:

import io
import urllib
import urllib.request
import pandas as pd

urlToVisit = 'http://chartapi.finance.yahoo.com/instrument/1.0/EURUSD=X/chartdata;type=quote;range=1y/csv'
with urllib.request.urlopen(urlToVisit) as response:
    sourcePage = response.read().decode()

df = pd.read_csv(io.StringIO(sourcePage), skiprows=18, header=None, sep=",")
print(df.head())

希望对您有所帮助。