从 PDF 中提取声音注释
Extract sound annotations from a PDF
我有一个列出 PDF 文件注释的脚本 Parse annotations from a pdf:
import popplerqt5
import argparse
def extract(fn):
doc = popplerqt5.Poppler.Document.load(fn)
annotations = []
for i in range(doc.numPages()):
page = doc.page(i)
for annot in page.annotations():
contents = annot.contents()
if contents:
annotations.append(contents)
print(f'page={i + 1} {contents}')
print(f'{len(annotations)} annotation(s) found')
return annotations
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('fn')
args = parser.parse_args()
extract(args.fn)
但它只适用于文本注释,有很多 Python 库,例如 Poppler、PyPDF2、PyMuPDF,我一直在大量搜索他们的文档和源代码,据我所知,他们are not able to 提取了声音注释的二进制文件。你知道任何可以做到这一点的图书馆吗?我需要提取这些声音注释的二进制文件并将它们转换为 MP3。
下一个版本的 PyMuPDF 将支持提取音频注释。使用此脚本使用 PyMuPDF 从 PDF 中提取音频注释,它易于使用,只需调用脚本并将 PDF 文件作为第一个参数传递:python script.py myfile.pdf
注意:仅适用于 Windows。
import fitz, sys, os, subprocess
assert len(sys.argv) == 2, "need filename as parameter"
ifile = sys.argv[1]
doc = fitz.open(ifile)
ofolder = os.path.dirname(ifile)
if ofolder == "":
ofolder = os.getcwd()
flnm = os.path.splitext(os.path.basename(ifile))[0]
defolder = ofolder + "\" + flnm
os.mkdir(defolder)
defolder = defolder + "\" + flnm
for page in doc:
print(page)
annotNumber = 1
for annot in page.annots(types=[fitz.PDF_ANNOT_SOUND]):
try:
sound = annot.soundGet()
except Exception as e:
print(e)
continue
for k, v in sound.items():
print(k, "=", v if k != "stream" else len(v))
ofile = defolder + ".page." + str(page.number) + ".annot." + str(annotNumber) + ".raw"
fout = open(ofile,"wb")
fout.write(sound["stream"])
fout.close()
ofileffmpeg = defolder + ".page." + str(page.number) + ".annot." + str(annotNumber) + ".mp3"
annotNumber += 1
if "channels" in sound:
channels = str(sound["channels"])
else:
channels = "1"
if "encoding" in sound:
if sound["encoding"] == "Signed":
encoding = "s"
else:
encoding = "u"
else:
encoding = "u"
if "bps" in sound:
fmt = encoding + str(sound["bps"]) + "be"
else:
fmt = encoding + "8"
subprocess.call(['ffmpeg', '-hide_banner', '-f', fmt, '-ar', str(sound["rate"]), '-ac', channels, '-i', str(ofile), str(ofileffmpeg)], shell=True)
我有一个列出 PDF 文件注释的脚本 Parse annotations from a pdf:
import popplerqt5
import argparse
def extract(fn):
doc = popplerqt5.Poppler.Document.load(fn)
annotations = []
for i in range(doc.numPages()):
page = doc.page(i)
for annot in page.annotations():
contents = annot.contents()
if contents:
annotations.append(contents)
print(f'page={i + 1} {contents}')
print(f'{len(annotations)} annotation(s) found')
return annotations
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('fn')
args = parser.parse_args()
extract(args.fn)
但它只适用于文本注释,有很多 Python 库,例如 Poppler、PyPDF2、PyMuPDF,我一直在大量搜索他们的文档和源代码,据我所知,他们are not able to 提取了声音注释的二进制文件。你知道任何可以做到这一点的图书馆吗?我需要提取这些声音注释的二进制文件并将它们转换为 MP3。
下一个版本的 PyMuPDF 将支持提取音频注释。使用此脚本使用 PyMuPDF 从 PDF 中提取音频注释,它易于使用,只需调用脚本并将 PDF 文件作为第一个参数传递:python script.py myfile.pdf
注意:仅适用于 Windows。
import fitz, sys, os, subprocess
assert len(sys.argv) == 2, "need filename as parameter"
ifile = sys.argv[1]
doc = fitz.open(ifile)
ofolder = os.path.dirname(ifile)
if ofolder == "":
ofolder = os.getcwd()
flnm = os.path.splitext(os.path.basename(ifile))[0]
defolder = ofolder + "\" + flnm
os.mkdir(defolder)
defolder = defolder + "\" + flnm
for page in doc:
print(page)
annotNumber = 1
for annot in page.annots(types=[fitz.PDF_ANNOT_SOUND]):
try:
sound = annot.soundGet()
except Exception as e:
print(e)
continue
for k, v in sound.items():
print(k, "=", v if k != "stream" else len(v))
ofile = defolder + ".page." + str(page.number) + ".annot." + str(annotNumber) + ".raw"
fout = open(ofile,"wb")
fout.write(sound["stream"])
fout.close()
ofileffmpeg = defolder + ".page." + str(page.number) + ".annot." + str(annotNumber) + ".mp3"
annotNumber += 1
if "channels" in sound:
channels = str(sound["channels"])
else:
channels = "1"
if "encoding" in sound:
if sound["encoding"] == "Signed":
encoding = "s"
else:
encoding = "u"
else:
encoding = "u"
if "bps" in sound:
fmt = encoding + str(sound["bps"]) + "be"
else:
fmt = encoding + "8"
subprocess.call(['ffmpeg', '-hide_banner', '-f', fmt, '-ar', str(sound["rate"]), '-ac', channels, '-i', str(ofile), str(ofileffmpeg)], shell=True)