是否有使用 ib api 生成 OHLCV pandas 数据帧的正确方法?

Is there a proper way to produce a OHLCV pandas dataframe using ib api?

这是打印数据的代码。但是我不知道如何将这些数据收集到 pandas 数据框中。我使用从 ibapi(交互式代理)导入的 reqHistoricalData 从继承 EClient 和 EWrapper 的 TestApp class 函数请求数据。

from ibapi.client import EClient
from ibapi.wrapper import EWrapper
from ibapi.contract import Contract
from ibapi.order import Order
from ibapi.ticktype import TickTypeEnum
import pandas as pd
import numpy as np
import os.path  # To manage paths
import sys  # To find out the script name (in argv[0])
from datetime import datetime
from time import sleep, strftime, localtime 
from socket import error as SocketError
import errno

class TestApp(EWrapper, EClient):
    def __init__(self):
        EClient.__init__(self,self)

    def error(self, reqId, errorCode, errorString):
        print ('Error: ', reqId, errorCode, ' ', errorString)

    def historicalData(self,reqId, bar):

         print (bar.date, bar.open, bar.high, bar.low, bar.close, bar.volume)


def create_contract(symbol, sec_type, exch, prim_exch, curr):

    contract = Contract()
    contract.symbol = symbol
    contract.secType = sec_type
    contract.exchange = exch
    contract.currency = curr
    contract.primaryExchange = prim_exch

    return contract

def create_order(order_type, quantity, action):

    order = Order()
    order.orderType = order_type
    order.totalQuantity = quantity
    order.action = action

    return order

app = TestApp()
app.connect('127.0.0.1', 7497, 0)

contract = create_contract('AAPL', 'STK', 'SMART', 'NASDAQ', 'USD')

app.reqHistoricalData(      reqId = 0, 
                            contract = contract, 
                            endDateTime = '', 
                            durationStr = '1 Y', 
                            barSizeSetting = '1 month', 
                            whatToShow = 'TRADES',
                            useRTH = 1, # =1 for RTH data
                            formatDate = 1,
                            keepUpToDate = False,
                            chartOptions = []
                         ) 

app.run()

输出为:

20181031 222.52 224.23 206.09 218.86 1752000
20181130 219.07 222.36 170.26 178.58 7249186
20181231 184.39 184.94 146.6 157.74 6851826
20190131 154.89 169.0 142.0 166.44 6383564
20190228 166.93 175.87 165.93 173.15 3478346
20190329 174.28 197.69 169.5 189.95 4956586
20190430 191.64 208.48 188.38 200.67 3812115
20190531 209.88 215.31 174.99 175.07 5642571
20190628 175.58 201.57 170.27 197.92 3592406
20190731 203.28 221.37 198.41 213.04 3418242
20190830 213.82 218.03 192.58 208.74 5078104
20190930 206.42 226.42 204.22 223.97 3768842
20191023 225.13 243.18 215.13 242.51 3253952

我在找什么:

           Open   High    Low  Close Volume
Date                                       
20181031 222.52 224.23 206.09 218.86 1752000
20181130 219.07 222.36 170.26 178.58 7249186
20181231 184.39 184.94 146.6 157.74 6851826
20190131 154.89 169.0 142.0 166.44 6383564
20190228 166.93 175.87 165.93 173.15 3478346
20190329 174.28 197.69 169.5 189.95 4956586
20190430 191.64 208.48 188.38 200.67 3812115
20190531 209.88 215.31 174.99 175.07 5642571
20190628 175.58 201.57 170.27 197.92 3592406
20190731 203.28 221.37 198.41 213.04 3418242
20190830 213.82 218.03 192.58 208.74 5078104
20190930 206.42 226.42 204.22 223.97 3768842
20191023 225.13 243.18 215.13 242.51 3253952

您可以向 TestApp 添加一个数据框成员,然后在每次调用 historicalData() 时向其添加一行:

...
self.cols = ['date', 'open', 'high', 'low', 'close', 'volume']
self.df = pd.DataFrame(columns=self.cols)

def historicalData(self, reqId, bar):
    print (bar.date, bar.open, bar.high, bar.low, bar.close, bar.volume)
    self.df.loc[len(self.df)] = [bar.date, bar.open, bar.high, bar.low, bar.close, bar.volume]

您可能希望每个 reqId 都有一个单独的 DataFrame。

我所做的是从模块 queue.

创建一个队列

所以这就像...

def create_queue(self):
    my_queue = queue.Queue()
    self.my_hist_queue = my_queue
    return my_queue

然后,当我在包装器中定义 historicalData 时,我将其添加到队列中。它就像...

def historicalData(self, reqId, bar):
    print("HistoricalData. ", reqId,
          "Date:", bar.date,
          "Open:", bar.open,
          "High:", bar.high,
          "Low:", bar.low,
          "Close:", bar.close,
          "Volume:", bar.volume,
          "Count:", bar.barCount,
          "WAP:", bar.average)
    self.my_hist_queue.put({'Request ID': reqId,
                            'Date': bar.date,
                            'Open': bar.open,
                            'High': bar.high,
                            'Low': bar.low,
                            'Close': bar.close,
                            'Volume': bar.volume,
                            'Count': bar.barCount,
                            'WAP': bar.average})

然后,迭代队列并将历史数据放入字典列表中是比较直接的。这样,pandas 就可以轻松将其转换为数据框。这是我的做法...

def create_dataframe:
    ticker_list = []
    hist_storage = self.create_queue()
    num_of_days = 5 #put however many days you want
    data = self.reqHistoricalData(101, Contract, '', '{} D'.format(num_of_days), '1 day', "TRADES", 1, 1, False, [])
    for i in range(number_of_days):
        ticker_list.append(hist_storage.get())
    df = pd.DataFrame(ticker_list)
    print(df)

希望对您有所帮助!干杯!

对于更大的数据集,使用字典列表。从 api 返回的 bar 对象可以翻译成一个字典,它可以附加到 api.

的列表成员
class IBApi(TestWrapper, TestClient):  # the actual API 'app' = API Object we interact with when sending/receiving
    def __init__(self):
        TestWrapper.__init__(self)  # requires the wrapper...
        TestClient.__init__(self, wrapper=self)
        ###.....
        self.histbars=[]

def historicalData(self, reqId:int, bar: BarData):
   bardict={'reqid':reqId,'datetime':bar.date,'open':bar.open,'high':bar.high,'low':bar.low,'close':bar.close,'vol':bar.volume,'wap':bar.wap,'barcount':bar.barCount}
   self.histbars.append(bardict)

生成的字典列表很容易转换为数据框:

df =DataFrame.from_records(apclient.histbars)

df
    reqid       datetime    open    high    low    close    vol     wap barCount
0   1   20220519 09:30:00   191.50  193.80  189.60  191.58  16178   191.778 7890
1   1   20220519 09:45:00   191.64  194.30  190.21  192.98  12876   192.433 6090
2   1   20220519 10:00:00   192.97  194.74  191.88  192.33  12974   193.27  7835
3   1   20220519 10:15:00   192.39  193.75  191.77  192.79  8370    192.906 4372
4   1   20220519 10:30:00   192.71  194.07  191.29  191.69  7269    192.425 3774
5   1   20220519 10:45:00   191.72  193.19  191.01  192.26  6565    192.093 3167
6   1   20220519 11:00:00   192.24  193.99  191.80  193.44  6664    192.998 3023
7   1   20220519 11:15:00   193.37  193.85  192.58  192.97  5105    193.132 2278
8   1   20220519 11:30:00   193.04  194.99  192.99  194.63  5787    194.196 2703
9   1   20220519 11:45:00   194.60  194.97  193.90  194.80  7207    194.467 2949
10  1   20220519 12:00:00   194.82  195.29  194.50  194.67  4862    194.839 2169
11  1   20220519 12:15:00   194.66  195.15  194.04  194.59  5753    194.605 2638
12  1   20220519 12:30:00   194.61  194.75  192.92  192.92  3618    193.83  1723
13  1   20220519 12:45:00   192.90  193.39  192.38  193.20  3921    192.911 1739
14  1   20220519 13:00:00   193.20  193.32  191.69  191.98  3309    192.602 1844
15  1   20220519 13:15:00   191.97  192.28  191.45  191.98  4601    191.883 2407
16  1   20220519 13:30:00   192.00  192.62  191.55  192.02  4240    192.034 2031
...

(请原谅手动格式)
要执行的操作:从请求 ID 到代码的简单查找字典,并将日期时间格式化为 pandas-readable datetime.