如何根据在列表或环境 object 中使用 Python API 传递的数据在 r 中制作数据框或 tibble?

How do I make a data frame or tibble in r from data passed using a Python API in a list or environment object?

使用 Reticulate,我通过 Interactive Brokers 从 Python API 获取数据。我想将从我的 Python API 传递的数据转换为数据框或 tibble,但我完全不知道如何完成它。我也不知道如何在 reprex 中重新创建数据,所以我只是提供显示在我的 R 控制台上的文本。

这是 Python API 在 openOrders object 中传递给我的数据。当我在 R 提示符下输入 object 名称时,它看起来像这样:

> openOrders
[[1]]
Order(orderId=16, clientId=501, permId=115804563, action='Buy', totalQuantity=1.0)

[[2]]
Order(orderId=17, clientId=501, permId=115804564, action='SELL', totalQuantity=1.0)

[[3]]
Order(orderId=18, clientId=501, permId=115804565, action='SELL', totalQuantity=1.0)

当我输入 openOrders 并查看文件时,我得到:

> dput(openOrders) %>% print()
list(<environment>, <environment>, <environment>)
[[1]]

Order(orderId=25, clientId=501, permId=306800005, action='Buy', totalQuantity=1.0)

[[2]]
Order(orderId=26, clientId=501, permId=306800006, action='SELL', totalQuantity=1.0)

[[3]]
Order(orderId=27, clientId=501, permId=306800007, action='SELL', totalQuantity=1.0)

R 告诉我数据类型是列表:

> typeof(openOrders)
[1] "list"

我可以访问列表中的个别条目:

> openOrders[1]
[[1]]

Order(orderId=16, clientId=501, permId=115804563, action='Buy', totalQuantity=1.0)

我可以使用以下方法按名称访问列表中的各个数据元素:

> openOrders[[1]]$orderId
[1] 16

当我询问列表中的名字时,有 134 个(这里是摘录):

> names(openOrders[[1]])
  [1] "account"                        "action"                         "activeStartTime"            
    ...         
[133] "volatilityType"                 "whatIf"

但我不知道如何将数据放入数据框或 tibble。理想情况下,数据框或小标题应如下所示:

> openOrders

    orderId clientId permId     action totalQuantity
[1] 16      501      115804563  'Buy'  1.0 
[2] 17      501      115804564  'SELL' 1.0
[3] 18      501      115804565. 'SELL' 1.0

我已经按照另一个 post 中的建议尝试了 enframe 函数,我得到:

> enframe(openOrders)
# A tibble: 3 x 2
   name value     
  <int> <list>    
1     1 <ib_ns..O>
2     2 <ib_ns..O>
3     3 <ib_ns..O>

我还尝试了另一个 post 的以下方法并得到了错误:

x <- as.data.frame(do.call(rbind, openOrders))
Warning: Error in <Anonymous>: environments cannot be coerced to other types

这是 python 代码:

from ib_insync import *
import pandas as pd
import numpy as np

# Identify open orders 
def ibOpenOrders():
  orders = ib.openOrders()
  ib.sleep(0)
  return (orders)

ib = IB()

这是我在 R 中的代码:

library (reticulate)
use_python("/usr/local/bin/python3.7")
source_python("iBrokersCallsReprex.py")
openOrders <- ibOpenOrders()

为了获取 dput 的数据,我在 python 侧的 pandas 数据框中设置了数据,然后将其返回给 R。我更改了 python 代码至:

def ibOpenOrders():
  orders = ib.openOrders()
  ib.sleep(0)
  df = util.df(orders)
  return (df)

通过这样做,输入 returns 以下内容:

structure(list(orderId = c(68, 69, 70), clientId = c(500, 500, 
500), permId = c(306801738, 306801739, 306801740), action = c("Buy", 
"SELL", "SELL"), totalQuantity = c(1, 1, 1), orderType = c("LMT", 
"LMT", "STP"), lmtPrice = c(9646.25, 9656.25, 1.79769313486232e+308
), auxPrice = c(1.79769313486232e+308, 1.79769313486232e+308, 
9626.25), tif = c("", "", ""), activeStartTime = c("", "", ""
), activeStopTime = c("", "", ""), ocaGroup = c("", "", ""), 
ocaType = c(0, 0, 0), orderRef = c("", "", ""), transmit = c(FALSE, 
FALSE, TRUE), parentId = c(0, 68, 68), blockOrder = c(FALSE, 
FALSE, FALSE), sweepToFill = c(FALSE, FALSE, FALSE), displaySize =  c(0, 
0, 0), triggerMethod = c(0, 0, 0), outsideRth = c(FALSE, 
FALSE, FALSE), hidden = c(FALSE, FALSE, FALSE), goodAfterTime = c("", 
"", ""), goodTillDate = c("", "", ""), rule80A = c("", "", 
""), allOrNone = c(FALSE, FALSE, FALSE), minQty = c(2147483647, 
2147483647, 2147483647), percentOffset = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308),      overridePercentageConstraints = c(FALSE, 
FALSE, FALSE), trailStopPrice = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), trailingPercent = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), faGroup = c("", 
"", ""), faProfile = c("", "", ""), faMethod = c("", "", 
""), faPercentage = c("", "", ""), designatedLocation = c("", 
"", ""), openClose = c("O", "O", "O"), origin = c(0, 0, 0
), shortSaleSlot = c(0, 0, 0), exemptCode = c(-1, -1, -1), 
discretionaryAmt = c(0, 0, 0), eTradeOnly = c(TRUE, TRUE, 
TRUE), firmQuoteOnly = c(TRUE, TRUE, TRUE), nbboPriceCap = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), optOutSmartRouting = c(FALSE, 
FALSE, FALSE), auctionStrategy = c(0, 0, 0), startingPrice = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), stockRefPrice = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), delta = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), stockRangeLower = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), stockRangeUpper = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), randomizePrice = c(FALSE, 
FALSE, FALSE), randomizeSize = c(FALSE, FALSE, FALSE), volatility = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), volatilityType = c(2147483647, 
2147483647, 2147483647), deltaNeutralOrderType = c("", "", 
""), deltaNeutralAuxPrice = c(1.79769313486232e+308, 1.79769313486232e+308, 
1.79769313486232e+308), deltaNeutralConId = c(0, 0, 0), deltaNeutralSettlingFirm = c("", 
"", ""), deltaNeutralClearingAccount = c("", "", ""), deltaNeutralClearingIntent = c("", 
"", ""), deltaNeutralOpenClose = c("", "", ""), deltaNeutralShortSale = c(FALSE, 
FALSE, FALSE), deltaNeutralShortSaleSlot = c(0, 0, 0), deltaNeutralDesignatedLocation = c("", 
"", ""), continuousUpdate = c(FALSE, FALSE, FALSE), referencePriceType = c(2147483647, 
2147483647, 2147483647), basisPoints = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), basisPointsType = c(2147483647, 
2147483647, 2147483647), scaleInitLevelSize = c(2147483647, 
2147483647, 2147483647), scaleSubsLevelSize = c(2147483647, 
2147483647, 2147483647), scalePriceIncrement = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), scalePriceAdjustValue = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), scalePriceAdjustInterval = c(2147483647, 
2147483647, 2147483647), scaleProfitOffset = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), scaleAutoReset = c(FALSE, 
FALSE, FALSE), scaleInitPosition = c(2147483647, 2147483647, 
2147483647), scaleInitFillQty = c(2147483647, 2147483647, 
2147483647), scaleRandomPercent = c(FALSE, FALSE, FALSE), 
scaleTable = c("", "", ""), hedgeType = c("", "", ""), hedgeParam = c("", 
"", ""), account = c("", "", ""), settlingFirm = c("", "", 
""), clearingAccount = c("", "", ""), clearingIntent = c("", 
"", ""), algoStrategy = c("", "", ""), algoParams = list(
    list(), list(), list()), smartComboRoutingParams = list(
    list(), list(), list()), algoId = c("", "", ""), whatIf = c(FALSE, 
FALSE, FALSE), notHeld = c(FALSE, FALSE, FALSE), solicited = c(FALSE, 
FALSE, FALSE), modelCode = c("", "", ""), orderComboLegs = list(
    list(), list(), list()), orderMiscOptions = list(list(), 
    list(), list()), referenceContractId = c(0, 0, 0), peggedChangeAmount = c(0, 
0, 0), isPeggedChangeAmountDecrease = c(FALSE, FALSE, FALSE
), referenceChangeAmount = c(0, 0, 0), referenceExchangeId = c("", 
"", ""), adjustedOrderType = c("", "", ""), triggerPrice = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), adjustedStopPrice = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), adjustedStopLimitPrice = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), adjustedTrailingAmount = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), adjustableTrailingUnit = c(0, 
0, 0), lmtPriceOffset = c(1.79769313486232e+308, 1.79769313486232e+308, 
1.79769313486232e+308), conditions = list(list(), list(), 
    list()), conditionsCancelOrder = c(FALSE, FALSE, FALSE
), conditionsIgnoreRth = c(FALSE, FALSE, FALSE), extOperator = c("", 
"", ""), softDollarTier = list(<environment>, <environment>, 
    <environment>), cashQty = c(1.79769313486232e+308, 1.79769313486232e+308, 
1.79769313486232e+308), mifid2DecisionMaker = c("", "", ""
), mifid2DecisionAlgo = c("", "", ""), mifid2ExecutionTrader = c("", 
"", ""), mifid2ExecutionAlgo = c("", "", ""), dontUseAutoPriceForHedge = c(FALSE, 
FALSE, FALSE), isOmsContainer = c(FALSE, FALSE, FALSE), discretionaryUpToLimitPrice = c(FALSE, 
FALSE, FALSE), autoCancelDate = c("", "", ""), filledQuantity = c(1.79769313486232e+308, 
1.79769313486232e+308, 1.79769313486232e+308), refFuturesConId = c(0, 
0, 0), autoCancelParent = c(FALSE, FALSE, FALSE), shareholder = c("", 
"", ""), imbalanceOnly = c(FALSE, FALSE, FALSE), routeMarketableToBbo = c(FALSE, 
FALSE, FALSE), parentPermId = c(0, 0, 0), usePriceMgmtAlgo = c(FALSE, 
FALSE, FALSE)), class = "data.frame", row.names = c(NA, -3L
), pandas.index = <environment>)

在R端,这里是openOrders的结构

> str(openOrders)

tibble [9 × 130] (S3: tbl_df/tbl/data.frame)
 $ orderId                       : num [1:9] 140 141 142 133 134 132 148 149 150
 $ clientId                      : num [1:9] 500 500 500 500 500 500 500 500 500
 $ permId                        : num [1:9] 1.78e+09 1.78e+09 1.78e+09 1.78e+09 1.78e+09 ...
 $ action                        : chr [1:9] "BUY" "SELL" "SELL" "SELL" ...
 $ isPeggedChangeAmountDecrease  : logi [1:9] FALSE FALSE FALSE FALSE FALSE FALSE ...
  [list output truncated]
 - attr(*, "pandas.index")=RangeIndex(start=0, stop=9, step=1)

这是我在 python 侧打印 pandas 数据框时得到的结果:

def ibOpenOrders():
  openOrders = ib.openOrders()
  ib.sleep(0)
  #print (openOrders.head())
  df = util.df(openOrders)
  print (df.head())
   orderId  clientId  ...  parentPermId usePriceMgmtAlgo
0       13       400  ...             0            False
1       14       400  ...             0            False
2       12       400  ...             0            False
3        7       400  ...             0            False
4        5       400  ...             0            False

而且,这是我在 python 侧打印 softDollarTier 属性时得到的结果:

print (openOrders.softDollarTier)
[18 rows x 130 columns]
0     SoftDollarTier(name='', val='', displayName='')
1     SoftDollarTier(name='', val='', displayName='')
2     SoftDollarTier(name='', val='', displayName='')
3     SoftDollarTier(name='', val='', displayName='')

这是我直接访问同一属性时在 R 端获得的结果。

> head(openOrders$softDollarTier)
[[1]]
SoftDollarTier(name='', val='', displayName='')

[[2]]
SoftDollarTier(name='', val='', displayName='')

[[3]]
SoftDollarTier(name='', val='', displayName='')

有什么想法吗?

如前所述,只要Python方法returns一个实际的Pandas数据帧,reticulate就会转换为R数据帧。用各种原子类型的可重现示例进行演示:

Python

import numpy as np
import pandas as pd

alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
data_tools = ['sas', 'stata', 'spss', 'python', 'r', 'julia']

### DATA BUILD
def build_py_df():
    np.random.seed(6520)
    random_df = pd.DataFrame({'group': np.random.choice(data_tools, 500),
                              'int': np.random.randint(1, 10, 500),
                              'num': np.random.randn(500),
                              'char': [''.join(np.random.choice(list(alpha), 3)) for _ in range(500)],
                              'bool': np.random.choice([True, False], 500),
                              'date': np.random.choice(pd.date_range('2000-01-01', '2019-05-31'), 500)
                             })


    return random_df

df = build_py_df()

print(df.head(10))

输出

#     group  int       num char   bool       date
# 0       r    8 -0.604529  eNR   True 2008-09-01
# 1   stata    7  0.875878  0G9   True 2004-07-13
# 2    spss    4 -0.857370  mrH  False 2017-11-29
# 3   stata    6 -2.144899  MFj   True 2003-03-03
# 4   stata    3 -0.408117  Gsh   True 2008-11-28
# 5   stata    2  1.324790  gR0   True 2004-04-15
# 6   julia    6  0.682228  jhR   True 2004-09-18
# 7  python    6 -0.993106  cqT   True 2002-03-27
# 8   julia    5 -0.346687  GfC   True 2007-04-30
# 9       r    7  0.925665  d1a   True 2006-01-01

R

library (reticulate)

source_python("/path/to/Python/script.py")

py_df <- build_py_df()
head(py_df, 10) 

输出

#     group int        num char  bool       date
# 1       r   8 -0.6045292  eNR  TRUE 2008-09-01
# 2   stata   7  0.8758784  0G9  TRUE 2004-07-13
# 3    spss   4 -0.8573697  mrH FALSE 2017-11-29
# 4   stata   6 -2.1448990  MFj  TRUE 2003-03-03
# 5   stata   3 -0.4081175  Gsh  TRUE 2008-11-28
# 6   stata   2  1.3247895  gR0  TRUE 2004-04-15
# 7   julia   6  0.6822280  jhR  TRUE 2004-09-18
# 8  python   6 -0.9931057  cqT  TRUE 2002-03-27
# 9   julia   5 -0.3466866  GfC  TRUE 2007-04-30
# 10      r   7  0.9256647  d1a  TRUE 2006-01-01

对于元数据

str(py_df)
# 'data.frame': 500 obs. of  6 variables:
#  $ group: chr  "r" "stata" "spss" "stata" ...
#  $ int  : num  8 7 4 6 3 2 6 6 5 7 ...
#  $ num  : num  -0.605 0.876 -0.857 -2.145 -0.408 ...
#  $ char : chr  "eNR" "0G9" "mrH" "MFj" ...
#  $ bool : logi  TRUE TRUE FALSE TRUE TRUE TRUE ...
#  $ date : POSIXct, format: "2008-09-01" "2004-07-13" "2017-11-29" "2003-03-03" ...
#  - attr(*, "pandas.index")=RangeIndex(start=0, stop=500, step=1)

attributes(py_df)
# $`names`
# [1] "group" "int"   "num"   "char"  "bool"  "date" 

# $class
# [1] "data.frame"

# $row.names
# [1]   1   2   3   4   5   6  
# SHOW ALL ATTRIBUTES

# $pandas.index
# RangeIndex(start=0, stop=500, step=1)

attributes(py_df)$pandas.index
# RangeIndex(start=0, stop=500, step=1)