在 Writer 中生成二元组的宏
macro to generate bigrams in Writer
如何使用基本语言生成二元语法?
我可以在 Python 中像这样...
import nltk, sys
from nltk.tokenize import word_tokenize
sys.stdout = open("mygram1.txt", "w")
with open("mytext.txt") as f:
for text in f:
tokens = nltk.word_tokenize(text)
bigrm = (nltk.bigrams(tokens))
print(*map(' '.join, bigrm), sep='\n')
但我需要一个可以在 Libreoffice writer 中 运行 的宏。我不想使用 Python.
更新:
就像双字母组一样,nltk 有我使用 nltk.trigrams 调用的三字母组方法,如果我需要四到五个克,还有每克!
from nltk import everygrams
import nltk, sys
from nltk.tokenize import word_tokenize
sys.stdout = open("myfourgram1.txt", "w")
with open("/home/ubuntu/mytext.txt") as f:
for text in f:
tokens = nltk.word_tokenize(text)
for i in list(everygrams(tokens, 4, 4)):
print((" ".join(i)))
在 libreoffice basic 中可以吗?
您可以通过回收我对您上一个问题 () 的回答中的代码来复制您的 Python 代码的行为。首先删除所有与拼写检查、生成备选方案和排序相关的内容,从而使其更短,然后更改将结果插入新文档的行,使其仅插入成对的单词。您不必将输入文本放在 .txt
文件中,而必须将它们放入编写器文档中,结果将出现在新的编写器文档中。
它应该类似于下面的清单。这也包括辅助功能 IsWordSeparator()
Option Explicit
Sub ListBigrams
Dim oSource As Object
oSource = ThisComponent
Dim oSourceCursor As Object
oSourceCursor = oSource.getText.createTextCursor()
oSourceCursor.gotoStart(False)
oSourceCursor.collapseToStart()
Dim oDestination As Object
oDestination = StarDesktop.loadComponentFromURL( "private:factory/swriter", "_blank", 0, Array() )
Dim oDestinationText as Object
oDestinationText = oDestination.getText()
Dim oDestinationCursor As Object
oDestinationCursor = oDestinationText.createTextCursor()
Dim s As String, sParagraph As String, sPreviousWord As String, sThisWord As String
Dim i as Long, j As Long, nWordStart As Long, nWordEnd As Long, nChar As Long
Dim bFirst as Boolean
sPreviousWord = ""
bFirst = true
Do
oSourceCursor.gotoEndOfParagraph(True)
sParagraph = oSourceCursor.getString() & " " 'It is necessary to add a space to the end of
'the string otherwise the last word of the paragraph is not recognised.
nWordStart = 1
nWordEnd = 1
For i = 1 to Len(sParagraph)
nChar = ASC(Mid(sParagraph, i, 1))
If IsWordSeparator(nChar) Then '1
If nWordEnd > nWordStart Then '2
sThisWord = Mid(sParagraph, nWordStart, nWordEnd - nWordStart)
If bFirst Then
bFirst = False
Else
oDestinationText.insertString(oDestinationCursor, sPreviousWord & " " & sThisWord & Chr(13), False)
EndIf
sPreviousWord = sThisWord
End If '2
nWordEnd = nWordEnd + 1
nWordStart = nWordEnd
Else
nWordEnd = nWordEnd + 1
End If '1
Next i
Loop While oSourceCursor.gotoNextParagraph(False)
End Sub
'----------------------------------------------------------------------------
' OOME Listing 360.
Function IsWordSeparator(iChar As Long) As Boolean
' Horizontal tab \t 9
' New line \n 10
' Carriage return \r 13
' Space 32
' Non-breaking space 160
Select Case iChar
Case 9, 10, 13, 32, 160
IsWordSeparator = True
Case Else
IsWordSeparator = False
End Select
End Function
即使在 Python 中更容易做到这一点,正如 Jim K 所建议的那样,BASIC 方法也可以更容易地向用户分发功能,因为他们不必安装 Python 和 NLTK 库(这并不简单)。
如何使用基本语言生成二元语法?
我可以在 Python 中像这样...
import nltk, sys
from nltk.tokenize import word_tokenize
sys.stdout = open("mygram1.txt", "w")
with open("mytext.txt") as f:
for text in f:
tokens = nltk.word_tokenize(text)
bigrm = (nltk.bigrams(tokens))
print(*map(' '.join, bigrm), sep='\n')
但我需要一个可以在 Libreoffice writer 中 运行 的宏。我不想使用 Python.
更新:
就像双字母组一样,nltk 有我使用 nltk.trigrams 调用的三字母组方法,如果我需要四到五个克,还有每克!
from nltk import everygrams
import nltk, sys
from nltk.tokenize import word_tokenize
sys.stdout = open("myfourgram1.txt", "w")
with open("/home/ubuntu/mytext.txt") as f:
for text in f:
tokens = nltk.word_tokenize(text)
for i in list(everygrams(tokens, 4, 4)):
print((" ".join(i)))
在 libreoffice basic 中可以吗?
您可以通过回收我对您上一个问题 (.txt
文件中,而必须将它们放入编写器文档中,结果将出现在新的编写器文档中。
它应该类似于下面的清单。这也包括辅助功能 IsWordSeparator()
Option Explicit
Sub ListBigrams
Dim oSource As Object
oSource = ThisComponent
Dim oSourceCursor As Object
oSourceCursor = oSource.getText.createTextCursor()
oSourceCursor.gotoStart(False)
oSourceCursor.collapseToStart()
Dim oDestination As Object
oDestination = StarDesktop.loadComponentFromURL( "private:factory/swriter", "_blank", 0, Array() )
Dim oDestinationText as Object
oDestinationText = oDestination.getText()
Dim oDestinationCursor As Object
oDestinationCursor = oDestinationText.createTextCursor()
Dim s As String, sParagraph As String, sPreviousWord As String, sThisWord As String
Dim i as Long, j As Long, nWordStart As Long, nWordEnd As Long, nChar As Long
Dim bFirst as Boolean
sPreviousWord = ""
bFirst = true
Do
oSourceCursor.gotoEndOfParagraph(True)
sParagraph = oSourceCursor.getString() & " " 'It is necessary to add a space to the end of
'the string otherwise the last word of the paragraph is not recognised.
nWordStart = 1
nWordEnd = 1
For i = 1 to Len(sParagraph)
nChar = ASC(Mid(sParagraph, i, 1))
If IsWordSeparator(nChar) Then '1
If nWordEnd > nWordStart Then '2
sThisWord = Mid(sParagraph, nWordStart, nWordEnd - nWordStart)
If bFirst Then
bFirst = False
Else
oDestinationText.insertString(oDestinationCursor, sPreviousWord & " " & sThisWord & Chr(13), False)
EndIf
sPreviousWord = sThisWord
End If '2
nWordEnd = nWordEnd + 1
nWordStart = nWordEnd
Else
nWordEnd = nWordEnd + 1
End If '1
Next i
Loop While oSourceCursor.gotoNextParagraph(False)
End Sub
'----------------------------------------------------------------------------
' OOME Listing 360.
Function IsWordSeparator(iChar As Long) As Boolean
' Horizontal tab \t 9
' New line \n 10
' Carriage return \r 13
' Space 32
' Non-breaking space 160
Select Case iChar
Case 9, 10, 13, 32, 160
IsWordSeparator = True
Case Else
IsWordSeparator = False
End Select
End Function
即使在 Python 中更容易做到这一点,正如 Jim K 所建议的那样,BASIC 方法也可以更容易地向用户分发功能,因为他们不必安装 Python 和 NLTK 库(这并不简单)。