从 Power Pivot 中提取 2000 万行 ("Item.data")
Rip 20 million rows from Power Pivot ("Item.data")
我收到了一本工作簿,其中包含两个 table 的 power-pivot(一个大约一百万行,另一个 20 行)。我想把它撕掉(真的 - 但让我们说一个 CSV)以便我可以在 R + PostGreSQL 中使用它。
我无法导出到 Excel table,因为有超过 100 万行;只有当我 select 大约 200,000 行时,复制粘贴数据才有效。
我尝试将 xlsx 转换为 zip 并在记事本 ++ 中打开“item.data”文件,但它已被加密。
我整理了一些 VBA,适用于大约 0.5 行:
Public Sub CreatePowerPivotDmvInventory()
Dim conn As ADODB.Connection
Dim sheet As Excel.Worksheet
Dim wbTarget As Workbook
On Error GoTo FailureOutput
Set wbTarget = ActiveWorkbook
wbTarget.Model.Initialize
Set conn = wbTarget.Model.DataModelConnection.ModelConnection.ADOConnection
' Call function by passing the DMV name
' E.g. Partners
WriteDmvContent "Partners", conn
MsgBox "Finished"
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
Private Sub WriteDmvContent(ByVal dmvName As String, ByRef conn As ADODB.Connection)
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Integer
mdx = "EVALUATE " & dmvName
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = "H:\output_table_" & dmvName & ".csv"
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, rs.Fields(i).Name
Else
Write #1, rs.Fields(i).Name,
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, rs.Fields(i)
Else
Write #1, rs.Fields(i),
End If
Next i
rs.MoveNext
Loop
Close #1
rs.Close
Set rs = Nothing
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
DAX Studio 将允许您查询 Excel 工作簿中的数据模型并输出为各种格式,包括平面文件。
您需要的查询是:
EVALUATE
<table name>
我找到了一个可行的 (VBA) 解决方案 [但 greggy 也对我有用!] -> 我的 table 太大而无法导出为一个块,所以我循环并过滤通过 'month'。这似乎有效并在我将所有内容附加在一起后生成 1.2 gb CSV:
Function YYYYMM(aDate As Date)
YYYYMM = year(aDate) * 100 + month(aDate)
End Function
Function NextYYYYMM(YYYYMM As Long)
If YYYYMM Mod 100 = 12 Then
NextYYYYMM = YYYYMM + 100 - 11
Else
NextYYYYMM = YYYYMM + 1
End If
End Function
Public Sub CreatePowerPivotDmvInventory()
Dim conn As ADODB.Connection
Dim tblname As String
Dim wbTarget As Workbook
On Error GoTo FailureOutput
Set wbTarget = ActiveWorkbook
wbTarget.Model.Initialize
Set conn = wbTarget.Model.DataModelConnection.ModelConnection.ADOConnection
' Call function by passing the DMV name
tblname = "table1"
WriteDmvContent tblname, conn
MsgBox "Finished"
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
Private Sub WriteDmvContent(ByVal dmvName As String, ByRef conn As ADODB.Connection)
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Integer
'If table small enough:
'mdx = "EVALUATE " & dmvName
'Other-wise filter:
Dim eval_field As String
Dim eval_val As Variant
'Loop through year_month
Dim CurrYM As Long, LimYM As Long
Dim String_Date As String
CurrYM = YYYYMM(#12/1/2000#)
LimYM = YYYYMM(#12/1/2015#)
Do While CurrYM <= LimYM
String_Date = CStr(Left(CurrYM, 4)) + "-" + CStr(Right(CurrYM, 2))
Debug.Print String_Date
eval_field = "yearmonth"
eval_val = String_Date
mdx = "EVALUATE(CALCULATETABLE(" & dmvName & ", " & dmvName & "[" & eval_field & "] = """ & eval_val & """))"
Debug.Print (mdx)
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = "H:\vba_tbl_" & dmvName & "_" & eval_val & ".csv"
Debug.Print (myFile)
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, """" & rs.Fields(i).Name & """"
Else
Write #1, """" & rs.Fields(i).Name & """",
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, """" & rs.Fields(i) & """"
Else
Write #1, """" & rs.Fields(i) & """",
End If
Next i
rs.MoveNext
Loop
CurrYM = NextYYYYMM(CurrYM)
i = i + 1
Close #1
rs.Close
Set rs = Nothing
Loop
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
我修改了 mptevsion 脚本 - 现在它保存来自 table 的数据以每 n 行分隔 csv(默认 100k 行,可以通过更改 chunk_size
来更改)。
此脚本的优点是它不依赖于 table 中的任何字段来分隔数据,以实现它使用 TOPNSKIP (https://dax.guide/topnskip/).
Public Sub CreatePowerPivotDmvInventory()
ActiveWorkbook.Model.Initialize
Dim save_path As String
Dim chunk_size As Long
save_path = "H:\power pivot\csv"
tblName = "data"
chunk_size = 100000
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Long
Dim rows_limit As Long
Dim rows_left As Long
Dim conn As ADODB.Connection
Set conn = ActiveWorkbook.Model.DataModelConnection.ModelConnection.ADOConnection
' calculating number of rows in a table
mdx = "evaluate {COUNTROWS('" & tblName & "')}"
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
rows_limit = rs.Fields(0)
rows_left = rows_limit
chunk_id = 1
Do While rows_left > 0
If rows_left < chunk_size Then
chunk_size = rows_left
End If
mdx = "define var data_table = '" & tblName & "'" & Chr(10) & _
"EVALUATE(" & Chr(10) & _
" TOPNSKIP(" & chunk_size & ", " & rows_limit - rows_left & ", data_table)" & Chr(10) & _
");"
Debug.Print (mdx)
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = save_path & "\vba_tbl_" & tblName & "_" & chunk_id & ".csv"
Debug.Print (myFile)
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.Count - 1
If i = rs.Fields.Count - 1 Then
Write #1, """" & rs.Fields(i).Name & """"
Else
Write #1, """" & rs.Fields(i).Name & """",
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.Count - 1
If i = rs.Fields.Count - 1 Then
Write #1, """" & rs.Fields(i) & """"
Else
Write #1, """" & rs.Fields(i) & """",
End If
Next i
rs.MoveNext
Loop
rows_left = rows_left - chunk_size
chunk_id = chunk_id + 1
Close #1
rs.Close
Set rs = Nothing
Loop
MsgBox "Finished"
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
我收到了一本工作簿,其中包含两个 table 的 power-pivot(一个大约一百万行,另一个 20 行)。我想把它撕掉(真的 - 但让我们说一个 CSV)以便我可以在 R + PostGreSQL 中使用它。
我无法导出到 Excel table,因为有超过 100 万行;只有当我 select 大约 200,000 行时,复制粘贴数据才有效。
我尝试将 xlsx 转换为 zip 并在记事本 ++ 中打开“item.data”文件,但它已被加密。
我整理了一些 VBA,适用于大约 0.5 行:
Public Sub CreatePowerPivotDmvInventory()
Dim conn As ADODB.Connection
Dim sheet As Excel.Worksheet
Dim wbTarget As Workbook
On Error GoTo FailureOutput
Set wbTarget = ActiveWorkbook
wbTarget.Model.Initialize
Set conn = wbTarget.Model.DataModelConnection.ModelConnection.ADOConnection
' Call function by passing the DMV name
' E.g. Partners
WriteDmvContent "Partners", conn
MsgBox "Finished"
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
Private Sub WriteDmvContent(ByVal dmvName As String, ByRef conn As ADODB.Connection)
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Integer
mdx = "EVALUATE " & dmvName
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = "H:\output_table_" & dmvName & ".csv"
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, rs.Fields(i).Name
Else
Write #1, rs.Fields(i).Name,
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, rs.Fields(i)
Else
Write #1, rs.Fields(i),
End If
Next i
rs.MoveNext
Loop
Close #1
rs.Close
Set rs = Nothing
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
DAX Studio 将允许您查询 Excel 工作簿中的数据模型并输出为各种格式,包括平面文件。
您需要的查询是:
EVALUATE
<table name>
我找到了一个可行的 (VBA) 解决方案 [但 greggy 也对我有用!] -> 我的 table 太大而无法导出为一个块,所以我循环并过滤通过 'month'。这似乎有效并在我将所有内容附加在一起后生成 1.2 gb CSV:
Function YYYYMM(aDate As Date)
YYYYMM = year(aDate) * 100 + month(aDate)
End Function
Function NextYYYYMM(YYYYMM As Long)
If YYYYMM Mod 100 = 12 Then
NextYYYYMM = YYYYMM + 100 - 11
Else
NextYYYYMM = YYYYMM + 1
End If
End Function
Public Sub CreatePowerPivotDmvInventory()
Dim conn As ADODB.Connection
Dim tblname As String
Dim wbTarget As Workbook
On Error GoTo FailureOutput
Set wbTarget = ActiveWorkbook
wbTarget.Model.Initialize
Set conn = wbTarget.Model.DataModelConnection.ModelConnection.ADOConnection
' Call function by passing the DMV name
tblname = "table1"
WriteDmvContent tblname, conn
MsgBox "Finished"
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
Private Sub WriteDmvContent(ByVal dmvName As String, ByRef conn As ADODB.Connection)
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Integer
'If table small enough:
'mdx = "EVALUATE " & dmvName
'Other-wise filter:
Dim eval_field As String
Dim eval_val As Variant
'Loop through year_month
Dim CurrYM As Long, LimYM As Long
Dim String_Date As String
CurrYM = YYYYMM(#12/1/2000#)
LimYM = YYYYMM(#12/1/2015#)
Do While CurrYM <= LimYM
String_Date = CStr(Left(CurrYM, 4)) + "-" + CStr(Right(CurrYM, 2))
Debug.Print String_Date
eval_field = "yearmonth"
eval_val = String_Date
mdx = "EVALUATE(CALCULATETABLE(" & dmvName & ", " & dmvName & "[" & eval_field & "] = """ & eval_val & """))"
Debug.Print (mdx)
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = "H:\vba_tbl_" & dmvName & "_" & eval_val & ".csv"
Debug.Print (myFile)
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, """" & rs.Fields(i).Name & """"
Else
Write #1, """" & rs.Fields(i).Name & """",
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, """" & rs.Fields(i) & """"
Else
Write #1, """" & rs.Fields(i) & """",
End If
Next i
rs.MoveNext
Loop
CurrYM = NextYYYYMM(CurrYM)
i = i + 1
Close #1
rs.Close
Set rs = Nothing
Loop
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
我修改了 mptevsion 脚本 - 现在它保存来自 table 的数据以每 n 行分隔 csv(默认 100k 行,可以通过更改 chunk_size
来更改)。
此脚本的优点是它不依赖于 table 中的任何字段来分隔数据,以实现它使用 TOPNSKIP (https://dax.guide/topnskip/).
Public Sub CreatePowerPivotDmvInventory()
ActiveWorkbook.Model.Initialize
Dim save_path As String
Dim chunk_size As Long
save_path = "H:\power pivot\csv"
tblName = "data"
chunk_size = 100000
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Long
Dim rows_limit As Long
Dim rows_left As Long
Dim conn As ADODB.Connection
Set conn = ActiveWorkbook.Model.DataModelConnection.ModelConnection.ADOConnection
' calculating number of rows in a table
mdx = "evaluate {COUNTROWS('" & tblName & "')}"
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
rows_limit = rs.Fields(0)
rows_left = rows_limit
chunk_id = 1
Do While rows_left > 0
If rows_left < chunk_size Then
chunk_size = rows_left
End If
mdx = "define var data_table = '" & tblName & "'" & Chr(10) & _
"EVALUATE(" & Chr(10) & _
" TOPNSKIP(" & chunk_size & ", " & rows_limit - rows_left & ", data_table)" & Chr(10) & _
");"
Debug.Print (mdx)
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = save_path & "\vba_tbl_" & tblName & "_" & chunk_id & ".csv"
Debug.Print (myFile)
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.Count - 1
If i = rs.Fields.Count - 1 Then
Write #1, """" & rs.Fields(i).Name & """"
Else
Write #1, """" & rs.Fields(i).Name & """",
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.Count - 1
If i = rs.Fields.Count - 1 Then
Write #1, """" & rs.Fields(i) & """"
Else
Write #1, """" & rs.Fields(i) & """",
End If
Next i
rs.MoveNext
Loop
rows_left = rows_left - chunk_size
chunk_id = chunk_id + 1
Close #1
rs.Close
Set rs = Nothing
Loop
MsgBox "Finished"
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub