slices = [dicom.read_file(os.path.join(folder_name,filename)) for filename in os.listdir(folder_name)] slices = np.stack([s.pixel_array for s in slices])
# -*- coding:utf-8 -*- ''' this script is used for basic process of lung 2017 in Data Science Bowl '''
import glob import os import pandas as pd import SimpleITK as sitk
import numpy as np # linear algebra import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) import skimage, os from skimage.morphology import ball, disk, dilation, binary_erosion, remove_small_objects, erosion, closing, reconstruction, binary_closing from skimage.measure import label,regionprops, perimeter from skimage.morphology import binary_dilation, binary_opening from skimage.filters import roberts, sobel from skimage import measure, feature from skimage.segmentation import clear_border from skimage import data from scipy import ndimage as ndi import matplotlib #matplotlib.use('Agg') import matplotlib.pyplot as plt from mpl_toolkits.mplot3d.art3d import Poly3DCollection import dicom import scipy.misc import numpy as np
def get_pixels_hu(slices): image = np.stack([s.pixel_array for s in slices]) # Convert to int16 (from sometimes int16), # should be possible as values should always be low enough (<32k) image = image.astype(np.int16) # Set outside-of-scan pixels to 0 # The intercept is usually -1024, so air is approximately 0 image[image == -2000] = 0
# Convert to Hounsfield units (HU) for slice_number in range(len(slices)):
''' This funtion segments the lungs from the given 2D slice. ''' if plot == True: f, plots = plt.subplots(8, 1, figsize=(5, 40)) ''' Step 1: Convert into a binary image. ''' binary = im < 604 if plot == True: plots[0].axis('off') plots[0].set_title('binary image') plots[0].imshow(binary, cmap=plt.cm.bone)
''' Step 2: Remove the blobs connected to the border of the image. ''' cleared = clear_border(binary) if plot == True: plots[1].axis('off') plots[1].set_title('after clear border') plots[1].imshow(cleared, cmap=plt.cm.bone)
''' Step 3: Label the image. ''' label_image = label(cleared) if plot == True: plots[2].axis('off') plots[2].set_title('found all connective graph') plots[2].imshow(label_image, cmap=plt.cm.bone) ''' Step 4: Keep the labels with 2 largest areas. ''' areas = [r.area for r in regionprops(label_image)] areas.sort() if len(areas) > 2: for region in regionprops(label_image): if region.area < areas[-2]: for coordinates in region.coords: label_image[coordinates[0], coordinates[1]] = 0 binary = label_image > 0 if plot == True: plots[3].axis('off') plots[3].set_title(' Keep the labels with 2 largest areas') plots[3].imshow(binary, cmap=plt.cm.bone) ''' Step 5: Erosion operation with a disk of radius 2. This operation is seperate the lung nodules attached to the blood vessels. ''' selem = disk(2) binary = binary_erosion(binary, selem) if plot == True: plots[4].axis('off') plots[4].set_title('seperate the lung nodules attached to the blood vessels') plots[4].imshow(binary, cmap=plt.cm.bone) ''' Step 6: Closure operation with a disk of radius 10. This operation is to keep nodules attached to the lung wall. ''' selem = disk(10) binary = binary_closing(binary, selem) if plot == True: plots[5].axis('off') plots[5].set_title('keep nodules attached to the lung wall') plots[5].imshow(binary, cmap=plt.cm.bone) ''' Step 7: Fill in the small holes inside the binary mask of lungs. ''' edges = roberts(binary) binary = ndi.binary_fill_holes(edges) if plot == True: plots[6].axis('off') plots[6].set_title('Fill in the small holes inside the binary mask of lungs') plots[6].imshow(binary, cmap=plt.cm.bone) ''' Step 8: Superimpose the binary mask on the input image. ''' get_high_vals = binary == 0 im[get_high_vals] = 0 if plot == True: plots[7].axis('off') plots[7].set_title('Superimpose the binary mask on the input image') plots[7].imshow(im, cmap=plt.cm.bone)
# not actually binary, but 1 and 2. # 0 is treated as background, which we do not want binary_image = np.array(image > -320, dtype=np.int8)+1 labels = measure.label(binary_image)
# Pick the pixel in the very corner to determine which label is air. # Improvement: Pick multiple background labels from around the patient # More resistant to "trays" on which the patient lays cutting the air # around the person in half background_label = labels[0,0,0]
#Fill the air around the person binary_image[background_label == labels] = 2
# Method of filling the lung structures (that is superior to something like # morphological closing) if fill_lung_structures: # For every slice we determine the largest solid structure for i, axial_slice in enumerate(binary_image): axial_slice = axial_slice - 1 labeling = measure.label(axial_slice) l_max = largest_label_volume(labeling, bg=0)
if l_max is not None: #This slice contains some lung binary_image[i][labeling != l_max] = 1
binary_image -= 1 #Make the image actual binary binary_image = 1-binary_image # Invert it, lungs are now 1
# Remove other air pockets insided body labels = measure.label(binary_image, background=0) l_max = largest_label_volume(labels, bg=0) if l_max is not None: # There are air pockets binary_image[labels != l_max] = 0
import SimpleITK as sitk import numpy as np import csv from glob import glob import pandas as pd file_list=glob(luna_subset_path+"*.mhd") ##################### # # Helper function to get rows in data frame associated # with each file def get_filename(case): global file_list for f in file_list: if case in f: return(f) # # The locations of the nodes df_node = pd.read_csv(luna_path+"annotations.csv") df_node["file"] = df_node["seriesuid"].apply(get_filename) df_node = df_node.dropna() ##### # # Looping over the image files # fcount = 0 for img_file in file_list: print "Getting mask for image file %s" % img_file.replace(luna_subset_path,"") mini_df = df_node[df_node["file"]==img_file] #get all nodules associate with file if len(mini_df)>0: # some files may not have a nodule--skipping those biggest_node = np.argsort(mini_df["diameter_mm"].values)[-1] # just using the biggest node node_x = mini_df["coordX"].values[biggest_node] node_y = mini_df["coordY"].values[biggest_node] node_z = mini_df["coordZ"].values[biggest_node] diam = mini_df["diameter_mm"].values[biggest_node]
itk_img = sitk.ReadImage(img_file) img_array = sitk.GetArrayFromImage(itk_img) # indexes are z,y,x (notice the ordering) center = np.array([node_x,node_y,node_z]) # nodule center origin = np.array(itk_img.GetOrigin()) # x,y,z Origin in world coordinates (mm) spacing = np.array(itk_img.GetSpacing()) # spacing of voxels in world coor. (mm) v_center =np.rint((center-origin)/spacing) # nodule center in voxel space (still x,y,z ordering)
i = 0 for i_z in range(int(v_center[2])-1,int(v_center[2])+2): mask = make_mask(center,diam,i_z*spacing[2]+origin[2],width,height,spacing,origin) masks[i] = mask imgs[i] = matrix2int16(img_array[i_z]) i+=1 np.save(output_path+"images_%d.npy" % (fcount) ,imgs) np.save(output_path+"masks_%d.npy" % (fcount) ,masks)
3.5 查看结节
以下代码用于查看原始CT和结节mask。其实就是用matplotlib打印上一步存储的npy文件。
1 2 3 4 5 6 7 8 9 10 11 12
import matplotlib.pyplot as plt imgs = np.load(output_path+'images_0.npy') masks = np.load(output_path+'masks_0.npy') for i in range(len(imgs)): print "image %d" % i fig,ax = plt.subplots(2,2,figsize=[8,8]) ax[0,0].imshow(imgs[i],cmap='gray') ax[0,1].imshow(masks[i],cmap='gray') ax[1,0].imshow(imgs[i]*masks[i],cmap='gray') plt.show() raw_input("hit enter to cont : ")
接下来的处理和DICOM格式数据差不多,腐蚀膨胀、连通区域标记等。
参考信息:
灰度值是pixel value经过重重LUT转换得到的用来进行显示的值,而这个转换过程是不可逆的,也就是说,灰度值无法转换为ct值。只能根据窗宽窗位得到一个大概的范围。 pixel value经过modality lut得到Hu,但是怀疑pixelvalue的读取出了问题。dicom文件中存在(0028,0106)(0028,0107)两个tag,分别是最大最小pixel value,可以用来检验你读取的pixel value 矩阵是否正确。
LUT全称look up table,实际上就是一张像素灰度值的映射表,它将实际采样到的像素灰度值经过一定的变换如阈值、反转、二值化、对比度调整、线性变换等,变成了另外一 个与之对应的灰度值,这样可以起到突出图像的有用信息,增强图像的光对比度的作用。