从 PDF 中提取声音注释

Extract sound annotations from a PDF

我有一个列出 PDF 文件注释的脚本 Parse annotations from a pdf:

import popplerqt5
import argparse


def extract(fn):
    doc = popplerqt5.Poppler.Document.load(fn)
    annotations = []
    for i in range(doc.numPages()):
        page = doc.page(i)
        for annot in page.annotations():
            contents = annot.contents()
            if contents:
                annotations.append(contents)
                print(f'page={i + 1} {contents}')

    print(f'{len(annotations)} annotation(s) found')
    return annotations


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('fn')
    args = parser.parse_args()
    extract(args.fn)

但它只适用于文本注释,有很多 Python 库,例如 PopplerPyPDF2PyMuPDF,我一直在大量搜索他们的文档和源代码,据我所知,他们are not able to 提取了声音注释的二进制文件。你知道任何可以做到这一点的图书馆吗?我需要提取这些声音注释的二进制文件并将它们转换为 MP3。

下一个版本的 PyMuPDF 将支持提取音频注释。使用此脚本使用 PyMuPDF 从 PDF 中提取音频注释,它易于使用,只需调用脚本并将 PDF 文件作为第一个参数传递:python script.py myfile.pdf

注意:仅适用于 Windows。

import fitz, sys, os, subprocess
assert len(sys.argv) == 2, "need filename as parameter"
ifile = sys.argv[1]
doc = fitz.open(ifile)
ofolder = os.path.dirname(ifile)
if ofolder == "":
    ofolder = os.getcwd()
flnm = os.path.splitext(os.path.basename(ifile))[0]
defolder = ofolder + "\" + flnm
os.mkdir(defolder)
defolder = defolder + "\" + flnm
for page in doc:
    print(page)
    annotNumber = 1
    for annot in page.annots(types=[fitz.PDF_ANNOT_SOUND]):  
        try: 
            sound = annot.soundGet()  
        except Exception as e:
            print(e)
            continue
        for k, v in sound.items():
            print(k, "=", v if k != "stream" else len(v))
        ofile = defolder + ".page." + str(page.number) + ".annot." + str(annotNumber) + ".raw"
        fout = open(ofile,"wb") 
        fout.write(sound["stream"])
        fout.close()
        ofileffmpeg = defolder + ".page." + str(page.number) + ".annot." + str(annotNumber) + ".mp3"
        annotNumber += 1
        if "channels" in sound:
            channels = str(sound["channels"])
        else:
            channels = "1"
        if "encoding" in sound:
            if sound["encoding"] == "Signed":
                encoding = "s"
            else:
                encoding = "u"
        else:
            encoding = "u"
        if "bps" in sound:
            fmt = encoding + str(sound["bps"]) + "be"
        else:
            fmt = encoding + "8"
        subprocess.call(['ffmpeg', '-hide_banner', '-f', fmt, '-ar', str(sound["rate"]), '-ac', channels, '-i', str(ofile), str(ofileffmpeg)], shell=True)