从 Power Pivot 中提取 2000 万行 ("Item.data")
Rip 20 million rows from Power Pivot ("Item.data")
我收到了一本工作簿,其中包含两个 table 的 power-pivot(一个大约一百万行,另一个 20 行)。我想把它撕掉(真的 - 但让我们说一个 CSV)以便我可以在 R + PostGreSQL 中使用它。
我无法导出到 Excel table,因为有超过 100 万行;只有当我 select 大约 200,000 行时,复制粘贴数据才有效。
我尝试将 xlsx 转换为 zip 并在记事本 ++ 中打开“item.data”文件,但它已被加密。
我整理了一些 VBA,适用于大约 0.5 行:
Public Sub CreatePowerPivotDmvInventory()
Dim conn As ADODB.Connection
Dim sheet As Excel.Worksheet
Dim wbTarget As Workbook
On Error GoTo FailureOutput
Set wbTarget = ActiveWorkbook
Set conn = wbTarget.Model.DataModelConnection.ModelConnection.ADOConnection
' Call function by passing the DMV name
' E.g. Partners
WriteDmvContent "Partners", conn
MsgBox "Finished"
Exit Sub
MsgBox Err.Description
End Sub
Private Sub WriteDmvContent(ByVal dmvName As String, ByRef conn As ADODB.Connection)
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Integer
mdx = "EVALUATE " & dmvName
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = "H:\output_table_" & dmvName & ".csv"
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, rs.Fields(i).Name
Write #1, rs.Fields(i).Name,
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, rs.Fields(i)
Write #1, rs.Fields(i),
End If
Next i
Close #1
Set rs = Nothing
Exit Sub
MsgBox Err.Description
End Sub
DAX Studio 将允许您查询 Excel 工作簿中的数据模型并输出为各种格式,包括平面文件。
<table name>
我找到了一个可行的 (VBA) 解决方案 [但 greggy 也对我有用!] -> 我的 table 太大而无法导出为一个块,所以我循环并过滤通过 'month'。这似乎有效并在我将所有内容附加在一起后生成 1.2 gb CSV:
Function YYYYMM(aDate As Date)
YYYYMM = year(aDate) * 100 + month(aDate)
End Function
Function NextYYYYMM(YYYYMM As Long)
If YYYYMM Mod 100 = 12 Then
NextYYYYMM = YYYYMM + 100 - 11
End If
End Function
Public Sub CreatePowerPivotDmvInventory()
Dim conn As ADODB.Connection
Dim tblname As String
Dim wbTarget As Workbook
On Error GoTo FailureOutput
Set wbTarget = ActiveWorkbook
Set conn = wbTarget.Model.DataModelConnection.ModelConnection.ADOConnection
' Call function by passing the DMV name
tblname = "table1"
WriteDmvContent tblname, conn
MsgBox "Finished"
Exit Sub
MsgBox Err.Description
End Sub
Private Sub WriteDmvContent(ByVal dmvName As String, ByRef conn As ADODB.Connection)
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Integer
'If table small enough:
'mdx = "EVALUATE " & dmvName
'Other-wise filter:
Dim eval_field As String
Dim eval_val As Variant
'Loop through year_month
Dim CurrYM As Long, LimYM As Long
Dim String_Date As String
CurrYM = YYYYMM(#12/1/2000#)
LimYM = YYYYMM(#12/1/2015#)
Do While CurrYM <= LimYM
String_Date = CStr(Left(CurrYM, 4)) + "-" + CStr(Right(CurrYM, 2))
Debug.Print String_Date
eval_field = "yearmonth"
eval_val = String_Date
mdx = "EVALUATE(CALCULATETABLE(" & dmvName & ", " & dmvName & "[" & eval_field & "] = """ & eval_val & """))"
Debug.Print (mdx)
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = "H:\vba_tbl_" & dmvName & "_" & eval_val & ".csv"
Debug.Print (myFile)
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, """" & rs.Fields(i).Name & """"
Write #1, """" & rs.Fields(i).Name & """",
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, """" & rs.Fields(i) & """"
Write #1, """" & rs.Fields(i) & """",
End If
Next i
CurrYM = NextYYYYMM(CurrYM)
i = i + 1
Close #1
Set rs = Nothing
Exit Sub
MsgBox Err.Description
End Sub
我修改了 mptevsion 脚本 - 现在它保存来自 table 的数据以每 n 行分隔 csv(默认 100k 行,可以通过更改 chunk_size
此脚本的优点是它不依赖于 table 中的任何字段来分隔数据,以实现它使用 TOPNSKIP (https://dax.guide/topnskip/).
Public Sub CreatePowerPivotDmvInventory()
Dim save_path As String
Dim chunk_size As Long
save_path = "H:\power pivot\csv"
tblName = "data"
chunk_size = 100000
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Long
Dim rows_limit As Long
Dim rows_left As Long
Dim conn As ADODB.Connection
Set conn = ActiveWorkbook.Model.DataModelConnection.ModelConnection.ADOConnection
' calculating number of rows in a table
mdx = "evaluate {COUNTROWS('" & tblName & "')}"
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
rows_limit = rs.Fields(0)
rows_left = rows_limit
chunk_id = 1
Do While rows_left > 0
If rows_left < chunk_size Then
chunk_size = rows_left
End If
mdx = "define var data_table = '" & tblName & "'" & Chr(10) & _
"EVALUATE(" & Chr(10) & _
" TOPNSKIP(" & chunk_size & ", " & rows_limit - rows_left & ", data_table)" & Chr(10) & _
Debug.Print (mdx)
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = save_path & "\vba_tbl_" & tblName & "_" & chunk_id & ".csv"
Debug.Print (myFile)
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.Count - 1
If i = rs.Fields.Count - 1 Then
Write #1, """" & rs.Fields(i).Name & """"
Write #1, """" & rs.Fields(i).Name & """",
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.Count - 1
If i = rs.Fields.Count - 1 Then
Write #1, """" & rs.Fields(i) & """"
Write #1, """" & rs.Fields(i) & """",
End If
Next i
rows_left = rows_left - chunk_size
chunk_id = chunk_id + 1
Close #1
Set rs = Nothing
MsgBox "Finished"
Exit Sub
MsgBox Err.Description
End Sub
我收到了一本工作簿,其中包含两个 table 的 power-pivot(一个大约一百万行,另一个 20 行)。我想把它撕掉(真的 - 但让我们说一个 CSV)以便我可以在 R + PostGreSQL 中使用它。
我无法导出到 Excel table,因为有超过 100 万行;只有当我 select 大约 200,000 行时,复制粘贴数据才有效。
我尝试将 xlsx 转换为 zip 并在记事本 ++ 中打开“item.data”文件,但它已被加密。
我整理了一些 VBA,适用于大约 0.5 行:
Public Sub CreatePowerPivotDmvInventory()
Dim conn As ADODB.Connection
Dim sheet As Excel.Worksheet
Dim wbTarget As Workbook
On Error GoTo FailureOutput
Set wbTarget = ActiveWorkbook
Set conn = wbTarget.Model.DataModelConnection.ModelConnection.ADOConnection
' Call function by passing the DMV name
' E.g. Partners
WriteDmvContent "Partners", conn
MsgBox "Finished"
Exit Sub
MsgBox Err.Description
End Sub
Private Sub WriteDmvContent(ByVal dmvName As String, ByRef conn As ADODB.Connection)
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Integer
mdx = "EVALUATE " & dmvName
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = "H:\output_table_" & dmvName & ".csv"
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, rs.Fields(i).Name
Write #1, rs.Fields(i).Name,
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, rs.Fields(i)
Write #1, rs.Fields(i),
End If
Next i
Close #1
Set rs = Nothing
Exit Sub
MsgBox Err.Description
End Sub
DAX Studio 将允许您查询 Excel 工作簿中的数据模型并输出为各种格式,包括平面文件。
<table name>
我找到了一个可行的 (VBA) 解决方案 [但 greggy 也对我有用!] -> 我的 table 太大而无法导出为一个块,所以我循环并过滤通过 'month'。这似乎有效并在我将所有内容附加在一起后生成 1.2 gb CSV:
Function YYYYMM(aDate As Date)
YYYYMM = year(aDate) * 100 + month(aDate)
End Function
Function NextYYYYMM(YYYYMM As Long)
If YYYYMM Mod 100 = 12 Then
NextYYYYMM = YYYYMM + 100 - 11
End If
End Function
Public Sub CreatePowerPivotDmvInventory()
Dim conn As ADODB.Connection
Dim tblname As String
Dim wbTarget As Workbook
On Error GoTo FailureOutput
Set wbTarget = ActiveWorkbook
Set conn = wbTarget.Model.DataModelConnection.ModelConnection.ADOConnection
' Call function by passing the DMV name
tblname = "table1"
WriteDmvContent tblname, conn
MsgBox "Finished"
Exit Sub
MsgBox Err.Description
End Sub
Private Sub WriteDmvContent(ByVal dmvName As String, ByRef conn As ADODB.Connection)
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Integer
'If table small enough:
'mdx = "EVALUATE " & dmvName
'Other-wise filter:
Dim eval_field As String
Dim eval_val As Variant
'Loop through year_month
Dim CurrYM As Long, LimYM As Long
Dim String_Date As String
CurrYM = YYYYMM(#12/1/2000#)
LimYM = YYYYMM(#12/1/2015#)
Do While CurrYM <= LimYM
String_Date = CStr(Left(CurrYM, 4)) + "-" + CStr(Right(CurrYM, 2))
Debug.Print String_Date
eval_field = "yearmonth"
eval_val = String_Date
mdx = "EVALUATE(CALCULATETABLE(" & dmvName & ", " & dmvName & "[" & eval_field & "] = """ & eval_val & """))"
Debug.Print (mdx)
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = "H:\vba_tbl_" & dmvName & "_" & eval_val & ".csv"
Debug.Print (myFile)
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, """" & rs.Fields(i).Name & """"
Write #1, """" & rs.Fields(i).Name & """",
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, """" & rs.Fields(i) & """"
Write #1, """" & rs.Fields(i) & """",
End If
Next i
CurrYM = NextYYYYMM(CurrYM)
i = i + 1
Close #1
Set rs = Nothing
Exit Sub
MsgBox Err.Description
End Sub
我修改了 mptevsion 脚本 - 现在它保存来自 table 的数据以每 n 行分隔 csv(默认 100k 行,可以通过更改 chunk_size
此脚本的优点是它不依赖于 table 中的任何字段来分隔数据,以实现它使用 TOPNSKIP (https://dax.guide/topnskip/).
Public Sub CreatePowerPivotDmvInventory()
Dim save_path As String
Dim chunk_size As Long
save_path = "H:\power pivot\csv"
tblName = "data"
chunk_size = 100000
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Long
Dim rows_limit As Long
Dim rows_left As Long
Dim conn As ADODB.Connection
Set conn = ActiveWorkbook.Model.DataModelConnection.ModelConnection.ADOConnection
' calculating number of rows in a table
mdx = "evaluate {COUNTROWS('" & tblName & "')}"
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
rows_limit = rs.Fields(0)
rows_left = rows_limit
chunk_id = 1
Do While rows_left > 0
If rows_left < chunk_size Then
chunk_size = rows_left
End If
mdx = "define var data_table = '" & tblName & "'" & Chr(10) & _
"EVALUATE(" & Chr(10) & _
" TOPNSKIP(" & chunk_size & ", " & rows_limit - rows_left & ", data_table)" & Chr(10) & _
Debug.Print (mdx)
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = save_path & "\vba_tbl_" & tblName & "_" & chunk_id & ".csv"
Debug.Print (myFile)
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.Count - 1
If i = rs.Fields.Count - 1 Then
Write #1, """" & rs.Fields(i).Name & """"
Write #1, """" & rs.Fields(i).Name & """",
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.Count - 1
If i = rs.Fields.Count - 1 Then
Write #1, """" & rs.Fields(i) & """"
Write #1, """" & rs.Fields(i) & """",
End If
Next i
rows_left = rows_left - chunk_size
chunk_id = chunk_id + 1
Close #1
Set rs = Nothing
MsgBox "Finished"
Exit Sub
MsgBox Err.Description
End Sub