如何使用 pydicom 创建 JPEG 压缩的 DICOM 数据集?
How to create JPEG compressed DICOM dataset using pydicom?
我正在尝试使用 pydicom. A nice source material about colorful DICOM images can be found here 创建一个 JPEG 压缩的 DICOM 图像,但它主要是理论和 C++。在下面的代码示例中,我在 output-raw.dcm
(未压缩)内创建了一个淡蓝色省略号,看起来像这样:
import io
from PIL import Image, ImageDraw
from pydicom.dataset import Dataset
from pydicom.uid import generate_uid, JPEGExtended
from pydicom._storage_sopclass_uids import SecondaryCaptureImageStorage
WIDTH = 100
HEIGHT = 100
def ensure_even(stream):
# Very important for some viewers
if len(stream) % 2:
return stream + b"\x00"
return stream
def bob_ross_magic():
image = Image.new("RGB", (WIDTH, HEIGHT), color="red")
draw = ImageDraw.Draw(image)
draw.rectangle([10, 10, 90, 90], fill="black")
draw.ellipse([30, 20, 70, 80], fill="cyan")
draw.text((11, 11), "Hello", fill=(255, 255, 0))
return image
ds = Dataset()
ds.is_little_endian = True
ds.is_implicit_VR = True
ds.SOPClassUID = SecondaryCaptureImageStorage
ds.SOPInstanceUID = generate_uid()
ds.fix_meta_info()
ds.Modality = "OT"
ds.SamplesPerPixel = 3
ds.BitsAllocated = 8
ds.BitsStored = 8
ds.HighBit = 7
ds.PixelRepresentation = 0
ds.PhotometricInterpretation = "RGB"
ds.Rows = HEIGHT
ds.Columns = WIDTH
image = bob_ross_magic()
ds.PixelData = ensure_even(image.tobytes())
image.save("output.png")
ds.save_as("output-raw.dcm", write_like_original=False) # File is OK
#
# Create compressed image
#
output = io.BytesIO()
image.save(output, format="JPEG")
ds.PixelData = ensure_even(output.getvalue())
ds.PhotometricInterpretation = "YBR_FULL_422"
ds.file_meta.TransferSyntaxUID = JPEGExtended
ds.save_as("output-jpeg.dcm", write_like_original=False) # File is corrupt
最后我试图创建压缩的 DICOM:我尝试设置各种传输语法,使用 PIL 进行压缩,但没有成功。我相信生成的 DICOM 文件已损坏。如果我要将原始 DICOM 文件转换为使用 gdcm-tools 压缩的 JPEG:
$ gdcmconv -J output-raw.dcm output-jpeg.dcm
通过对这个 转换后的文件 做一个 dcmdump
我们可以看到一个有趣的结构,我不知道如何使用 pydicom 重现它:
$ dcmdump output-jpeg.dcm
# Dicom-File-Format
# Dicom-Meta-Information-Header
# Used TransferSyntax: Little Endian Explicit
(0002,0000) UL 240 # 4, 1 FileMetaInformationGroupLength
(0002,0001) OB 00 # 2, 1 FileMetaInformationVersion
(0002,0002) UI =SecondaryCaptureImageStorage # 26, 1 MediaStorageSOPClassUID
(0002,0003) UI [1.2.826.0.1.3680043.8.498.57577581978474188964358168197934098358] # 64, 1 MediaStorageSOPInstanceUID
(0002,0010) UI =JPEGLossless:Non-hierarchical-1stOrderPrediction # 22, 1 TransferSyntaxUID
(0002,0012) UI [1.2.826.0.1.3680043.2.1143.107.104.103.115.2.8.4] # 48, 1 ImplementationClassUID
(0002,0013) SH [GDCM 2.8.4] # 10, 1 ImplementationVersionName
(0002,0016) AE [gdcmconv] # 8, 1 SourceApplicationEntityTitle
# Dicom-Data-Set
# Used TransferSyntax: JPEG Lossless, Non-hierarchical, 1st Order Prediction
...
... ### How to do the magic below?
...
(7fe0,0010) OB (PixelSequence #=2) # u/l, 1 PixelData
(fffe,e000) pi (no value available) # 0, 1 Item
(fffe,e000) pi ff\d8\ff\ee[=12=][=12=]ef[=12=][=12=][=12=][=12=][=12=][=12=]\ff\c3[=12=]... # 4492, 1 Item
(fffe,e0dd) na (SequenceDelimitationItem) # 0, 0 SequenceDelimitationItem
我尝试使用pydicom的encaps module,但我认为它主要用于读取数据,而不是写入。其他人有任何想法如何处理这个问题,如何 create/encode 这些 PixelSequence
s?很想在没有 运行 外部工具的情况下以纯 Python 创建 JPEG 压缩的 DICOM。
DICOM 需要压缩的像素数据 是encapsulated (see the tables especially). Once you have your compressed image data you can use the encaps.encapsulate() 方法来创建bytes
适用于像素数据 :
from pydicom.encaps import encapsulate
# encapsulate() requires a list of bytes, one item per frame
ds.PixelData = encapsulate([ensure_even(output.getvalue())])
# Need to set this flag to indicate the Pixel Data is compressed
ds['PixelData'].is_undefined_length = True # Only needed for < v1.4
ds.PhotometricInterpretation = "YBR_FULL_422"
ds.file_meta.TransferSyntaxUID = JPEGExtended
ds.save_as("output-jpeg.dcm", write_like_original=False)
尝试@scaramallion 的解决方案,更多细节看起来可行:
import numpy as np
from PIL import Image
import io
# set some parameters
num_frames = 4
img_size = 10
# Create a fake RGB dataset
random_image_array = (np.random.random((num_frames, img_size, img_size, 3))*255).astype('uint8')
# Convert to PIL
imlist = []
for i in range(num_frames): # convert the multiframe image into RGB of single frames (Required for compression)
imlist.append(Image.fromarray(tmp))
# Save the multipage tiff with jpeg compression
f = io.BytesIO()
imlist[0].save(f, format='tiff', append_images=imlist[1:], save_all=True, compression='jpeg')
# The BytesIO object cursor is at the end of the object, so I need to tell it to go back to the front
f.seek(0)
img = Image.open(f)
# Get each one of the frames converted to even numbered bytes
img_byte_list = []
for i in range(num_frames):
try:
img.seek(i)
with io.BytesIO() as output:
img.save(output, format='jpeg')
img_byte_list.append(output.getvalue())
except EOFError:
# Not enough frames in img
break
ds.PixelData = encapsulate([x for x in img_byte_list])
ds['PixelData'].is_undefined_length = True
ds.is_implicit_VR = False
ds.LossyImageCompression = '01'
ds.LossyImageCompressionRatio = 10 # default jpeg
ds.LossyImageCompressionMethod = 'ISO_10918_1'
ds.file_meta.TransferSyntaxUID = '1.2.840.10008.1.2.4.51'
ds.save_as("output-jpeg.dcm", write_like_original=False)
我正在尝试使用 pydicom. A nice source material about colorful DICOM images can be found here 创建一个 JPEG 压缩的 DICOM 图像,但它主要是理论和 C++。在下面的代码示例中,我在 output-raw.dcm
(未压缩)内创建了一个淡蓝色省略号,看起来像这样:
import io
from PIL import Image, ImageDraw
from pydicom.dataset import Dataset
from pydicom.uid import generate_uid, JPEGExtended
from pydicom._storage_sopclass_uids import SecondaryCaptureImageStorage
WIDTH = 100
HEIGHT = 100
def ensure_even(stream):
# Very important for some viewers
if len(stream) % 2:
return stream + b"\x00"
return stream
def bob_ross_magic():
image = Image.new("RGB", (WIDTH, HEIGHT), color="red")
draw = ImageDraw.Draw(image)
draw.rectangle([10, 10, 90, 90], fill="black")
draw.ellipse([30, 20, 70, 80], fill="cyan")
draw.text((11, 11), "Hello", fill=(255, 255, 0))
return image
ds = Dataset()
ds.is_little_endian = True
ds.is_implicit_VR = True
ds.SOPClassUID = SecondaryCaptureImageStorage
ds.SOPInstanceUID = generate_uid()
ds.fix_meta_info()
ds.Modality = "OT"
ds.SamplesPerPixel = 3
ds.BitsAllocated = 8
ds.BitsStored = 8
ds.HighBit = 7
ds.PixelRepresentation = 0
ds.PhotometricInterpretation = "RGB"
ds.Rows = HEIGHT
ds.Columns = WIDTH
image = bob_ross_magic()
ds.PixelData = ensure_even(image.tobytes())
image.save("output.png")
ds.save_as("output-raw.dcm", write_like_original=False) # File is OK
#
# Create compressed image
#
output = io.BytesIO()
image.save(output, format="JPEG")
ds.PixelData = ensure_even(output.getvalue())
ds.PhotometricInterpretation = "YBR_FULL_422"
ds.file_meta.TransferSyntaxUID = JPEGExtended
ds.save_as("output-jpeg.dcm", write_like_original=False) # File is corrupt
最后我试图创建压缩的 DICOM:我尝试设置各种传输语法,使用 PIL 进行压缩,但没有成功。我相信生成的 DICOM 文件已损坏。如果我要将原始 DICOM 文件转换为使用 gdcm-tools 压缩的 JPEG:
$ gdcmconv -J output-raw.dcm output-jpeg.dcm
通过对这个 转换后的文件 做一个 dcmdump
我们可以看到一个有趣的结构,我不知道如何使用 pydicom 重现它:
$ dcmdump output-jpeg.dcm
# Dicom-File-Format
# Dicom-Meta-Information-Header
# Used TransferSyntax: Little Endian Explicit
(0002,0000) UL 240 # 4, 1 FileMetaInformationGroupLength
(0002,0001) OB 00 # 2, 1 FileMetaInformationVersion
(0002,0002) UI =SecondaryCaptureImageStorage # 26, 1 MediaStorageSOPClassUID
(0002,0003) UI [1.2.826.0.1.3680043.8.498.57577581978474188964358168197934098358] # 64, 1 MediaStorageSOPInstanceUID
(0002,0010) UI =JPEGLossless:Non-hierarchical-1stOrderPrediction # 22, 1 TransferSyntaxUID
(0002,0012) UI [1.2.826.0.1.3680043.2.1143.107.104.103.115.2.8.4] # 48, 1 ImplementationClassUID
(0002,0013) SH [GDCM 2.8.4] # 10, 1 ImplementationVersionName
(0002,0016) AE [gdcmconv] # 8, 1 SourceApplicationEntityTitle
# Dicom-Data-Set
# Used TransferSyntax: JPEG Lossless, Non-hierarchical, 1st Order Prediction
...
... ### How to do the magic below?
...
(7fe0,0010) OB (PixelSequence #=2) # u/l, 1 PixelData
(fffe,e000) pi (no value available) # 0, 1 Item
(fffe,e000) pi ff\d8\ff\ee[=12=][=12=]ef[=12=][=12=][=12=][=12=][=12=][=12=]\ff\c3[=12=]... # 4492, 1 Item
(fffe,e0dd) na (SequenceDelimitationItem) # 0, 0 SequenceDelimitationItem
我尝试使用pydicom的encaps module,但我认为它主要用于读取数据,而不是写入。其他人有任何想法如何处理这个问题,如何 create/encode 这些 PixelSequence
s?很想在没有 运行 外部工具的情况下以纯 Python 创建 JPEG 压缩的 DICOM。
DICOM 需要压缩的像素数据 是encapsulated (see the tables especially). Once you have your compressed image data you can use the encaps.encapsulate() 方法来创建bytes
适用于像素数据 :
from pydicom.encaps import encapsulate
# encapsulate() requires a list of bytes, one item per frame
ds.PixelData = encapsulate([ensure_even(output.getvalue())])
# Need to set this flag to indicate the Pixel Data is compressed
ds['PixelData'].is_undefined_length = True # Only needed for < v1.4
ds.PhotometricInterpretation = "YBR_FULL_422"
ds.file_meta.TransferSyntaxUID = JPEGExtended
ds.save_as("output-jpeg.dcm", write_like_original=False)
尝试@scaramallion 的解决方案,更多细节看起来可行:
import numpy as np
from PIL import Image
import io
# set some parameters
num_frames = 4
img_size = 10
# Create a fake RGB dataset
random_image_array = (np.random.random((num_frames, img_size, img_size, 3))*255).astype('uint8')
# Convert to PIL
imlist = []
for i in range(num_frames): # convert the multiframe image into RGB of single frames (Required for compression)
imlist.append(Image.fromarray(tmp))
# Save the multipage tiff with jpeg compression
f = io.BytesIO()
imlist[0].save(f, format='tiff', append_images=imlist[1:], save_all=True, compression='jpeg')
# The BytesIO object cursor is at the end of the object, so I need to tell it to go back to the front
f.seek(0)
img = Image.open(f)
# Get each one of the frames converted to even numbered bytes
img_byte_list = []
for i in range(num_frames):
try:
img.seek(i)
with io.BytesIO() as output:
img.save(output, format='jpeg')
img_byte_list.append(output.getvalue())
except EOFError:
# Not enough frames in img
break
ds.PixelData = encapsulate([x for x in img_byte_list])
ds['PixelData'].is_undefined_length = True
ds.is_implicit_VR = False
ds.LossyImageCompression = '01'
ds.LossyImageCompressionRatio = 10 # default jpeg
ds.LossyImageCompressionMethod = 'ISO_10918_1'
ds.file_meta.TransferSyntaxUID = '1.2.840.10008.1.2.4.51'
ds.save_as("output-jpeg.dcm", write_like_original=False)