Pandas 和雅虎 charatapi
Pandas and Yahoo ChartAPI
我正在尝试从第 17 行开始读取 this source - Yahoo Finance。我想获取日期、最高价、最低价等,这是 6 列。
我的代码:
import pandas as pd
import datetime
import urllib
web = 'http://chartapi.finance.yahoo.com/instrument/1.0/EURUSD=X/chartdata;type=quote;range=1y/csv'
urlToVisit = 'http://chartapi.finance.yahoo.com/instrument/1.0/EURUSD=X/chartdata;type=quote;range=1y/csv'
with urllib.request.urlopen(urlToVisit) as response:
sourcePage = response.read().decode()
df = pd.read_csv(sourcePage, skiprows=17, header=None)
df.head()
OSError Traceback (most recent call last)
<ipython-input-10-bf04141dec86> in <module>()
7 with urllib.request.urlopen(urlToVisit) as response:
8 sourcePage = response.read().decode()
----> 9 df = pd.read_csv(sourcePage, skiprows=17, header=None)
10 df.head()
/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
496 skip_blank_lines=skip_blank_lines)
497
--> 498 return _read(filepath_or_buffer, kwds)
499
500 parser_f.__name__ = name
/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
273
274 # Create the parser.
--> 275 parser = TextFileReader(filepath_or_buffer, **kwds)
276
277 if (nrows is not None) and (chunksize is not None):
/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
588 self.options['has_index_names'] = kwds['has_index_names']
589
--> 590 self._make_engine(self.engine)
591
592 def _get_options_with_defaults(self, engine):
/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
729 def _make_engine(self, engine='c'):
730 if engine == 'c':
--> 731 self._engine = CParserWrapper(self.f, **self.options)
732 else:
733 if engine == 'python':
/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
1101 kwds['allow_leading_cols'] = self.index_col is not False
1102
-> 1103 self._reader = _parser.TextReader(src, **kwds)
1104
1105 # XXX
pandas/parser.pyx in pandas.parser.TextReader.__cinit__ (pandas/parser.c:3246)()
pandas/parser.pyx in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:6111)()
如果有人能帮我纠正一下,将不胜感激!谢谢。
根据文档,pandas.read_csv
需要一个类似文件的对象作为第一个参数。
所以你可以将文件保存在本地,然后使用pandas.read_csv
方法重新读取,或者你可以使用Python中的io.StringIO
in Python 3.x or StringIO.StringIO
2.x来转换数据.
示例代码如下:
import io
import urllib
import urllib.request
import pandas as pd
urlToVisit = 'http://chartapi.finance.yahoo.com/instrument/1.0/EURUSD=X/chartdata;type=quote;range=1y/csv'
with urllib.request.urlopen(urlToVisit) as response:
sourcePage = response.read().decode()
df = pd.read_csv(io.StringIO(sourcePage), skiprows=18, header=None, sep=",")
print(df.head())
希望对您有所帮助。
我正在尝试从第 17 行开始读取 this source - Yahoo Finance。我想获取日期、最高价、最低价等,这是 6 列。
我的代码:
import pandas as pd
import datetime
import urllib
web = 'http://chartapi.finance.yahoo.com/instrument/1.0/EURUSD=X/chartdata;type=quote;range=1y/csv'
urlToVisit = 'http://chartapi.finance.yahoo.com/instrument/1.0/EURUSD=X/chartdata;type=quote;range=1y/csv'
with urllib.request.urlopen(urlToVisit) as response:
sourcePage = response.read().decode()
df = pd.read_csv(sourcePage, skiprows=17, header=None)
df.head()
OSError Traceback (most recent call last)
<ipython-input-10-bf04141dec86> in <module>()
7 with urllib.request.urlopen(urlToVisit) as response:
8 sourcePage = response.read().decode()
----> 9 df = pd.read_csv(sourcePage, skiprows=17, header=None)
10 df.head()
/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
496 skip_blank_lines=skip_blank_lines)
497
--> 498 return _read(filepath_or_buffer, kwds)
499
500 parser_f.__name__ = name
/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
273
274 # Create the parser.
--> 275 parser = TextFileReader(filepath_or_buffer, **kwds)
276
277 if (nrows is not None) and (chunksize is not None):
/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
588 self.options['has_index_names'] = kwds['has_index_names']
589
--> 590 self._make_engine(self.engine)
591
592 def _get_options_with_defaults(self, engine):
/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
729 def _make_engine(self, engine='c'):
730 if engine == 'c':
--> 731 self._engine = CParserWrapper(self.f, **self.options)
732 else:
733 if engine == 'python':
/opt/conda/lib/python3.4/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
1101 kwds['allow_leading_cols'] = self.index_col is not False
1102
-> 1103 self._reader = _parser.TextReader(src, **kwds)
1104
1105 # XXX
pandas/parser.pyx in pandas.parser.TextReader.__cinit__ (pandas/parser.c:3246)()
pandas/parser.pyx in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:6111)()
如果有人能帮我纠正一下,将不胜感激!谢谢。
根据文档,pandas.read_csv
需要一个类似文件的对象作为第一个参数。
所以你可以将文件保存在本地,然后使用pandas.read_csv
方法重新读取,或者你可以使用Python中的io.StringIO
in Python 3.x or StringIO.StringIO
2.x来转换数据.
示例代码如下:
import io
import urllib
import urllib.request
import pandas as pd
urlToVisit = 'http://chartapi.finance.yahoo.com/instrument/1.0/EURUSD=X/chartdata;type=quote;range=1y/csv'
with urllib.request.urlopen(urlToVisit) as response:
sourcePage = response.read().decode()
df = pd.read_csv(io.StringIO(sourcePage), skiprows=18, header=None, sep=",")
print(df.head())
希望对您有所帮助。