Phppowerpoint 获取文本
Phppowerpoint in getting text
PhpPowerpoint 是否能够从现有的 pptx 中获取文本?
因为我想从现有的 pptx 中获取文本到 php。有没有可能实现?
是的,是的。
PhpPresentation,旧称 PHPPowerPoint,有一些读者:PowerPoint2007、PowerPoint97 和 ODPresentation。这些阅读器允许提取具有内容和格式的形状。
这是程序,甚至可以处理 unicode 字符。
它将 LF 转换为 CRLF 中的 pptx 文本以供阅读。
完成后(另存为 AllUniB)用 Word 打开它,转换 Unicode,清理多个段落(将 ^l 替换为 ^p,然后将 ^p^p 替换为 ^p)
你准备好了。
要转换,请将此代码添加到 pptx 宏并 运行 它:
Sub ExportTextUnicodeBin()
Dim oPres As Presentation
Dim oSlides As Slides
Dim oSld As Slide 'Slide Object
Dim oShp As Shape 'Shape Object
Dim iFile As Integer 'File handle for output
iFile = FreeFile 'Get a free file number
Dim adoStream As ADODB.Stream
Dim PathSep As String
Dim FileNum As Integer
Dim sTempString As String
Dim bytes() As Byte
#If Mac Then
PathSep = ":"
#Else
PathSep = "\"
#End If
Set adoStream = New ADODB.Stream
Set oPres = ActivePresentation
Set oSlides = oPres.Slides
FileNum = FreeFile
'Open output file
' NOTE: errors here if file hasn't been saved
' Open oPres.Path & PathSep & "AllText.TXT" For Output As FileNum
adoStream.Charset = "Unicode" 'or any string listed in registry HKEY_CLASSES_ROOT\MIME\Database\Charset
'open sream
adoStream.Open
adoStream.Type = adTypeBinary
For Each oSld In oSlides 'Loop thru each slide
' Include the slide number (the number that will appear in slide's
' page number placeholder; you could also use SlideIndex
' for the ordinal number of the slide in the file
bytes = StrConv("Slide:" & vbTab & CStr(oSld.SlideNumber) & vbCrLf, vbFromUnicode)
adoStream.Write bytes
'Print #iFile, "Slide:" & vbTab & CStr(oSld.SlideNumber)
For Each oShp In oSld.Shapes 'Loop thru each shape on slide
'Check to see if shape has a text frame and text
If oShp.HasTextFrame And oShp.TextFrame.HasText Then
If oShp.Type = msoPlaceholder Then
Select Case oShp.PlaceholderFormat.Type
Case Is = ppPlaceholderTitle, ppPlaceholderCenterTitle
bytes = StrConv("Title:" & vbTab & Strings.Replace(oShp.TextFrame.TextRange, vbCr, vbCrLf) & vbCrLf, vbFromUnicode)
adoStream.Write bytes
Case Is = ppPlaceholderBody
bytes = StrConv("Body:" & vbTab & Strings.Replace(oShp.TextFrame.TextRange, vbCr, vbCrLf) & vbCrLf, vbFromUnicode)
adoStream.Write bytes
Case Is = ppPlaceholderSubtitle
bytes = StrConv("SubTitle:" & vbTab & Strings.Replace(oShp.TextFrame.TextRange, vbCr, vbCrLf) & vbCrLf, vbFromUnicode)
adoStream.Write bytes
Case Else
bytes = StrConv("Other Placeholder:" & vbTab & Strings.Replace(oShp.TextFrame.TextRange, vbCr, vbCrLf) & vbCrLf, vbFromUnicode)
adoStream.Write bytes
End Select
Else
bytes = StrConv("NoS:" & vbTab & Strings.Replace(oShp.TextFrame.TextRange, vbCr, vbCrLf) & vbCrLf, vbFromUnicode)
adoStream.Write bytes
End If ' msoPlaceholder
Else ' it doesn't have a textframe - it might be a group that contains text so:
If oShp.Type = msoGroup Then
sTempString = TextFromGroupShape(oShp)
If Len(sTempString) > 0 Then
bytes = StrConv("Group: " & vbTab & Strings.Replace(sTempString, vbCr, vbCrLf) & vbCrLf, vbFromUnicode)
adoStream.Write bytes
End If
End If
End If ' Has text frame/Has text
Next oShp
Next oSld
'Close output file
'Close #iFile
adoStream.SaveToFile oPres.Path & PathSep & "AllUniB.TXT"
adoStream.Close
End Sub
Function TextFromGroupShape(oSh As Shape) As String
' Returns the text from the shapes in a group
' and recursively, text within shapes within groups within groups etc.
Dim oGpSh As Shape
Dim sTempText As String
If oSh.Type = msoGroup Then
For Each oGpSh In oSh.GroupItems
With oGpSh
If .Type = msoGroup Then
sTempText = sTempText & TextFromGroupShape(oGpSh)
Else
If .HasTextFrame Then
If .TextFrame.HasText Then
sTempText = sTempText & "(Gp:) " & .TextFrame.TextRange.Text & vbCrLf
End If
End If
End If
End With
Next
End If
TextFromGroupShape = sTempText
NormalExit:
Exit Function
Errorhandler:
Resume Next
End Function
记得将 ADODB 库添加到 VBA 中的资源中,否则您将在 运行
上收到错误消息
PhpPowerpoint 是否能够从现有的 pptx 中获取文本? 因为我想从现有的 pptx 中获取文本到 php。有没有可能实现?
是的,是的。
PhpPresentation,旧称 PHPPowerPoint,有一些读者:PowerPoint2007、PowerPoint97 和 ODPresentation。这些阅读器允许提取具有内容和格式的形状。
这是程序,甚至可以处理 unicode 字符。 它将 LF 转换为 CRLF 中的 pptx 文本以供阅读。 完成后(另存为 AllUniB)用 Word 打开它,转换 Unicode,清理多个段落(将 ^l 替换为 ^p,然后将 ^p^p 替换为 ^p) 你准备好了。
要转换,请将此代码添加到 pptx 宏并 运行 它:
Sub ExportTextUnicodeBin()
Dim oPres As Presentation
Dim oSlides As Slides
Dim oSld As Slide 'Slide Object
Dim oShp As Shape 'Shape Object
Dim iFile As Integer 'File handle for output
iFile = FreeFile 'Get a free file number
Dim adoStream As ADODB.Stream
Dim PathSep As String
Dim FileNum As Integer
Dim sTempString As String
Dim bytes() As Byte
#If Mac Then
PathSep = ":"
#Else
PathSep = "\"
#End If
Set adoStream = New ADODB.Stream
Set oPres = ActivePresentation
Set oSlides = oPres.Slides
FileNum = FreeFile
'Open output file
' NOTE: errors here if file hasn't been saved
' Open oPres.Path & PathSep & "AllText.TXT" For Output As FileNum
adoStream.Charset = "Unicode" 'or any string listed in registry HKEY_CLASSES_ROOT\MIME\Database\Charset
'open sream
adoStream.Open
adoStream.Type = adTypeBinary
For Each oSld In oSlides 'Loop thru each slide
' Include the slide number (the number that will appear in slide's
' page number placeholder; you could also use SlideIndex
' for the ordinal number of the slide in the file
bytes = StrConv("Slide:" & vbTab & CStr(oSld.SlideNumber) & vbCrLf, vbFromUnicode)
adoStream.Write bytes
'Print #iFile, "Slide:" & vbTab & CStr(oSld.SlideNumber)
For Each oShp In oSld.Shapes 'Loop thru each shape on slide
'Check to see if shape has a text frame and text
If oShp.HasTextFrame And oShp.TextFrame.HasText Then
If oShp.Type = msoPlaceholder Then
Select Case oShp.PlaceholderFormat.Type
Case Is = ppPlaceholderTitle, ppPlaceholderCenterTitle
bytes = StrConv("Title:" & vbTab & Strings.Replace(oShp.TextFrame.TextRange, vbCr, vbCrLf) & vbCrLf, vbFromUnicode)
adoStream.Write bytes
Case Is = ppPlaceholderBody
bytes = StrConv("Body:" & vbTab & Strings.Replace(oShp.TextFrame.TextRange, vbCr, vbCrLf) & vbCrLf, vbFromUnicode)
adoStream.Write bytes
Case Is = ppPlaceholderSubtitle
bytes = StrConv("SubTitle:" & vbTab & Strings.Replace(oShp.TextFrame.TextRange, vbCr, vbCrLf) & vbCrLf, vbFromUnicode)
adoStream.Write bytes
Case Else
bytes = StrConv("Other Placeholder:" & vbTab & Strings.Replace(oShp.TextFrame.TextRange, vbCr, vbCrLf) & vbCrLf, vbFromUnicode)
adoStream.Write bytes
End Select
Else
bytes = StrConv("NoS:" & vbTab & Strings.Replace(oShp.TextFrame.TextRange, vbCr, vbCrLf) & vbCrLf, vbFromUnicode)
adoStream.Write bytes
End If ' msoPlaceholder
Else ' it doesn't have a textframe - it might be a group that contains text so:
If oShp.Type = msoGroup Then
sTempString = TextFromGroupShape(oShp)
If Len(sTempString) > 0 Then
bytes = StrConv("Group: " & vbTab & Strings.Replace(sTempString, vbCr, vbCrLf) & vbCrLf, vbFromUnicode)
adoStream.Write bytes
End If
End If
End If ' Has text frame/Has text
Next oShp
Next oSld
'Close output file
'Close #iFile
adoStream.SaveToFile oPres.Path & PathSep & "AllUniB.TXT"
adoStream.Close
End Sub
Function TextFromGroupShape(oSh As Shape) As String
' Returns the text from the shapes in a group
' and recursively, text within shapes within groups within groups etc.
Dim oGpSh As Shape
Dim sTempText As String
If oSh.Type = msoGroup Then
For Each oGpSh In oSh.GroupItems
With oGpSh
If .Type = msoGroup Then
sTempText = sTempText & TextFromGroupShape(oGpSh)
Else
If .HasTextFrame Then
If .TextFrame.HasText Then
sTempText = sTempText & "(Gp:) " & .TextFrame.TextRange.Text & vbCrLf
End If
End If
End If
End With
Next
End If
TextFromGroupShape = sTempText
NormalExit:
Exit Function
Errorhandler:
Resume Next
End Function
记得将 ADODB 库添加到 VBA 中的资源中,否则您将在 运行
上收到错误消息