如何使用一些概率值在整个幻灯片图像(.svs 格式)上生成热图?
How to generate heat map on the Whole Slide Images (.svs format) using some probability values?
我正在尝试使用概率值为整个幻灯片图像 (WSI) 生成热图或概率图。我有坐标点(确定 WSI 上的区域)和相应的概率值。
WSI 的基本介绍: WSI 的尺寸很大(几乎是 100000 x 100000 像素)。因此,无法使用普通图像查看器打开这些图像。使用 OpenSlide 软件 处理 WSI。
我之前在 Stack Overflow 上看过与热图相关的帖子,但由于 WSI 的处理方式不同,我无法弄清楚如何应用这些解决方案。我遵循的一些示例:, , 3, 4, 5, 6, 7, 8, 9, 10, ,等等
要在 WSI 上生成热图,请按照以下说明操作:
首先提取图像块并保存坐标。使用下面的代码提取补丁。代码需要根据要求进行一些更改。代码复制自:patch extraction code link
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import logging
try:
import Image
except:
from PIL import Image
import math
import numpy as np
import openslide
import os
from time import strftime,gmtime
parser = argparse.ArgumentParser(description='Extract a series of patches from a whole slide image')
parser.add_argument("-i", "--image", dest='wsi', nargs='+', required=True, help="path to a whole slide image")
parser.add_argument("-p", "--patch_size", dest='patch_size', default=299, type=int, help="pixel width and height for patches")
parser.add_argument("-b", "--grey_limit", dest='grey_limit', default=0.8, type=float, help="greyscale value to determine if there is sufficient tissue present [default: `0.8`]")
parser.add_argument("-o", "--output", dest='output_name', default="output", help="Name of the output file directory [default: `output/`]")
parser.add_argument("-v", "--verbose",
dest="logLevel",
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
default="INFO",
help="Set the logging level")
args = parser.parse_args()
if args.logLevel:
logging.basicConfig(level=getattr(logging, args.logLevel))
wsi=' '.join(args.wsi)
""" Set global variables """
mean_grey_values = args.grey_limit * 255
number_of_useful_regions = 0
wsi=os.path.abspath(wsi)
outname=os.path.abspath(args.output_name)
basename = os.path.basename(wsi)
level = 0
def main():
img,num_x_patches,num_y_patches = open_slide()
logging.debug('img: {}, num_x_patches = {}, num_y_patches: {}'.format(img,num_x_patches,num_y_patches))
for x in range(num_x_patches):
for y in range(num_y_patches):
img_data = img.read_region((x*args.patch_size,y*args.patch_size),level, (args.patch_size, args.patch_size))
print_pics(x*args.patch_size,y*args.patch_size,img_data,img)
pc_uninformative = number_of_useful_regions/(num_x_patches*num_y_patches)*100
pc_uninformative = round(pc_uninformative,2)
logging.info('Completed patch extraction of {} images.'.format(number_of_useful_regions))
logging.info('{}% of the image is uninformative\n'.format(pc_uninformative))
def print_pics(x_top_left,y_top_left,img_data,img):
if x_top_left % 100 == 0 and y_top_left % 100 == 0 and x_top_left != 0:
pc_complete = round(x_top_left /img.level_dimensions[0][0],2) * 100
logging.info('{:.2f}% Complete at {}'.format(pc_complete,strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())))
exit()
img_data_np = np.array(img_data)
""" Convert to grayscale"""
grey_img = rgb2gray(img_data_np)
if np.mean(grey_img) < mean_grey_values:
logging.debug('Image grayscale = {} compared to threshold {}'.format(np.mean(grey_img),mean_grey_values))
global number_of_useful_regions
number_of_useful_regions += 1
wsi_base = os.path.basename(wsi)
wsi_base = wsi_base.split('.')[0]
img_name = wsi_base + "_" + str(x_top_left) + "_" + str(y_top_left) + "_" + str(args.patch_size)
#write_img_rotations(img_data_np,img_name)
logging.debug('Saving {} {} {}'.format(x_top_left,y_top_left,np.mean(grey_img)))
save_image(img_data_np,1,img_name)
def gen_x_and_y(xlist,ylist,img):
for x in xlist:
for y in ylist:
img_data = img.read_region((x*args.patch_size,y*args.patch_size),level, (args.patch_size, args.patch_size))
yield (x, y,img_data)
def open_slide():
"""
The first level is always the main image
Get width and height tuple for the first level
"""
logging.debug('img: {}'.format(wsi))
img = openslide.OpenSlide(wsi)
img_dim = img.level_dimensions[0]
"""
Determine what the patch size should be, and how many iterations it will take to get through the WSI
"""
num_x_patches = int(math.floor(img_dim[0] / args.patch_size))
num_y_patches = int(math.floor(img_dim[1] / args.patch_size))
remainder_x = img_dim[0] % num_x_patches
remainder_y = img_dim[1] % num_y_patches
logging.debug('The WSI shape is {}'.format(img_dim))
logging.debug('There are {} x-patches and {} y-patches to iterate through'.format(num_x_patches,num_y_patches))
return img,num_x_patches,num_y_patches
def validate_dir_exists():
if os.path.isdir(outname) == False:
os.mkdir(outname)
logging.debug('Validated {} directory exists'.format(outname))
if os.path.exists(wsi):
logging.debug('Found the file {}'.format(wsi))
else:
logging.debug('Could not find the file {}'.format(wsi))
exit()
def rgb2gray(rgb):
"""Converts an RGB image into grayscale """
r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2]
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
return gray
def save_image(img,j,img_name):
tmp = os.path.join(outname,img_name+"_"+str(j)+".png")
try:
im = Image.fromarray(img)
im.save(tmp)
except:
print('Could not print {}'.format(tmp))
exit()
if __name__ == '__main__':
validate_dir_exists()
main()
其次,生成每个补丁的概率值。
最后将一个坐标内的所有像素值替换为对应的概率值,并用彩图显示结果。
这就是WSI生成热图的基本思路。您可以根据自己的意愿修改代码和概念以获得热图。
我们开发了一个 python 包来处理整张幻灯片图像:
https://github.com/amirakbarnejad/PyDmed
这是获取整个幻灯片图像的热图的教程:
https://amirakbarnejad.github.io/Tutorial/tutorial_section5.html。
这里还有一个示例笔记本,它使用 PyDmed 获取 WSI 的热图:
Link to the sample notebook.
PyDmed 的好处是它是多进程的。数据加载器向 GPU 发送补丁流,StreamWriter 在单独的进程中写入磁盘。因此,它是高效的。 运行 时间当然取决于机器、WSI 的大小等。在具有良好 GPU 的好机器上,PyDmed 可以在一天内生成约 120 个 WSI 的热图。
我正在尝试使用概率值为整个幻灯片图像 (WSI) 生成热图或概率图。我有坐标点(确定 WSI 上的区域)和相应的概率值。
WSI 的基本介绍: WSI 的尺寸很大(几乎是 100000 x 100000 像素)。因此,无法使用普通图像查看器打开这些图像。使用 OpenSlide 软件 处理 WSI。
我之前在 Stack Overflow 上看过与热图相关的帖子,但由于 WSI 的处理方式不同,我无法弄清楚如何应用这些解决方案。我遵循的一些示例:
要在 WSI 上生成热图,请按照以下说明操作:
首先提取图像块并保存坐标。使用下面的代码提取补丁。代码需要根据要求进行一些更改。代码复制自:patch extraction code link
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import logging
try:
import Image
except:
from PIL import Image
import math
import numpy as np
import openslide
import os
from time import strftime,gmtime
parser = argparse.ArgumentParser(description='Extract a series of patches from a whole slide image')
parser.add_argument("-i", "--image", dest='wsi', nargs='+', required=True, help="path to a whole slide image")
parser.add_argument("-p", "--patch_size", dest='patch_size', default=299, type=int, help="pixel width and height for patches")
parser.add_argument("-b", "--grey_limit", dest='grey_limit', default=0.8, type=float, help="greyscale value to determine if there is sufficient tissue present [default: `0.8`]")
parser.add_argument("-o", "--output", dest='output_name', default="output", help="Name of the output file directory [default: `output/`]")
parser.add_argument("-v", "--verbose",
dest="logLevel",
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
default="INFO",
help="Set the logging level")
args = parser.parse_args()
if args.logLevel:
logging.basicConfig(level=getattr(logging, args.logLevel))
wsi=' '.join(args.wsi)
""" Set global variables """
mean_grey_values = args.grey_limit * 255
number_of_useful_regions = 0
wsi=os.path.abspath(wsi)
outname=os.path.abspath(args.output_name)
basename = os.path.basename(wsi)
level = 0
def main():
img,num_x_patches,num_y_patches = open_slide()
logging.debug('img: {}, num_x_patches = {}, num_y_patches: {}'.format(img,num_x_patches,num_y_patches))
for x in range(num_x_patches):
for y in range(num_y_patches):
img_data = img.read_region((x*args.patch_size,y*args.patch_size),level, (args.patch_size, args.patch_size))
print_pics(x*args.patch_size,y*args.patch_size,img_data,img)
pc_uninformative = number_of_useful_regions/(num_x_patches*num_y_patches)*100
pc_uninformative = round(pc_uninformative,2)
logging.info('Completed patch extraction of {} images.'.format(number_of_useful_regions))
logging.info('{}% of the image is uninformative\n'.format(pc_uninformative))
def print_pics(x_top_left,y_top_left,img_data,img):
if x_top_left % 100 == 0 and y_top_left % 100 == 0 and x_top_left != 0:
pc_complete = round(x_top_left /img.level_dimensions[0][0],2) * 100
logging.info('{:.2f}% Complete at {}'.format(pc_complete,strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())))
exit()
img_data_np = np.array(img_data)
""" Convert to grayscale"""
grey_img = rgb2gray(img_data_np)
if np.mean(grey_img) < mean_grey_values:
logging.debug('Image grayscale = {} compared to threshold {}'.format(np.mean(grey_img),mean_grey_values))
global number_of_useful_regions
number_of_useful_regions += 1
wsi_base = os.path.basename(wsi)
wsi_base = wsi_base.split('.')[0]
img_name = wsi_base + "_" + str(x_top_left) + "_" + str(y_top_left) + "_" + str(args.patch_size)
#write_img_rotations(img_data_np,img_name)
logging.debug('Saving {} {} {}'.format(x_top_left,y_top_left,np.mean(grey_img)))
save_image(img_data_np,1,img_name)
def gen_x_and_y(xlist,ylist,img):
for x in xlist:
for y in ylist:
img_data = img.read_region((x*args.patch_size,y*args.patch_size),level, (args.patch_size, args.patch_size))
yield (x, y,img_data)
def open_slide():
"""
The first level is always the main image
Get width and height tuple for the first level
"""
logging.debug('img: {}'.format(wsi))
img = openslide.OpenSlide(wsi)
img_dim = img.level_dimensions[0]
"""
Determine what the patch size should be, and how many iterations it will take to get through the WSI
"""
num_x_patches = int(math.floor(img_dim[0] / args.patch_size))
num_y_patches = int(math.floor(img_dim[1] / args.patch_size))
remainder_x = img_dim[0] % num_x_patches
remainder_y = img_dim[1] % num_y_patches
logging.debug('The WSI shape is {}'.format(img_dim))
logging.debug('There are {} x-patches and {} y-patches to iterate through'.format(num_x_patches,num_y_patches))
return img,num_x_patches,num_y_patches
def validate_dir_exists():
if os.path.isdir(outname) == False:
os.mkdir(outname)
logging.debug('Validated {} directory exists'.format(outname))
if os.path.exists(wsi):
logging.debug('Found the file {}'.format(wsi))
else:
logging.debug('Could not find the file {}'.format(wsi))
exit()
def rgb2gray(rgb):
"""Converts an RGB image into grayscale """
r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2]
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
return gray
def save_image(img,j,img_name):
tmp = os.path.join(outname,img_name+"_"+str(j)+".png")
try:
im = Image.fromarray(img)
im.save(tmp)
except:
print('Could not print {}'.format(tmp))
exit()
if __name__ == '__main__':
validate_dir_exists()
main()
其次,生成每个补丁的概率值。 最后将一个坐标内的所有像素值替换为对应的概率值,并用彩图显示结果。
这就是WSI生成热图的基本思路。您可以根据自己的意愿修改代码和概念以获得热图。
我们开发了一个 python 包来处理整张幻灯片图像: https://github.com/amirakbarnejad/PyDmed 这是获取整个幻灯片图像的热图的教程: https://amirakbarnejad.github.io/Tutorial/tutorial_section5.html。 这里还有一个示例笔记本,它使用 PyDmed 获取 WSI 的热图: Link to the sample notebook.
PyDmed 的好处是它是多进程的。数据加载器向 GPU 发送补丁流,StreamWriter 在单独的进程中写入磁盘。因此,它是高效的。 运行 时间当然取决于机器、WSI 的大小等。在具有良好 GPU 的好机器上,PyDmed 可以在一天内生成约 120 个 WSI 的热图。