o
    iH                     @  sL  d dl mZ d dlZd dlmZ d dlmZmZ d dlZ	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZmZ d dlmZ d dlm Z m!Z!m"Z"m#Z# d dl$m%Z%m&Z&m'Z'm(Z( d dl)m*Z*m+Z+ dd Z,e(dd rej-j./e&e, e(dde'd\Z0Z1ee2dZ3dgZ4G dd dZ5dS )    )annotationsN)path)Anycast)get_context)EnsureSameShaped)
get_logger)SegSummarizer)datafold_read)config_parser)ConfigParser)
DataLoaderDatasetpartition_dataset)no_collation)ComposeEnsureTyped
LoadImagedOrientationd)ImageMetaKeyStrEnummin_versionoptional_import)DataStatsKeysImageStatsKeysc                 C  s   |  d|jS )Nztag:yaml.org,2002:str)represent_scalarvalue)dumperdata r   d/home/dell461/cl/sdc2/last_ska_mid/HISourceFinder-master-l/src/monai/apps/auto3dseg/data_analyzer.pystrenum_representer#   s   r!   yaml   tqdmz4.47.0)module_nameDataAnalyzerc                   @  sb   e Zd ZdZ										
			d8d9d$d%Zed:d+d,Zd;d.d/Z	
	0		-	d<d=d6d7ZdS )>r&   a  
    The DataAnalyzer automatically analyzes given medical image dataset and reports the statistics.
    The module expects file paths to the image data and utilizes the LoadImaged transform to read the
    files, which supports nii, nii.gz, png, jpg, bmp, npz, npy, and dcm formats. Currently, only
    segmentation task is supported, so the user needs to provide paths to the image and label files
    (if have). Also, label data format is preferred to be (1,H,W,D), with the label index in the
    first dimension. If it is in onehot format, it will be converted to the preferred format.

    Args:
        datalist: a Python dictionary storing group, fold, and other information of the medical
            image dataset, or a string to the JSON file storing the dictionary.
        dataroot: user's local directory containing the datasets.
        output_path: path to save the analysis result.
        average: whether to average the statistical value across different image modalities.
        do_ccp: apply the connected component algorithm to process the labels/images
        device: a string specifying hardware (CUDA/CPU) utilized for the operations.
        worker: number of workers to use for loading datasets in each GPU/CPU sub-process.
        image_key: a string that user specify for the image. The DataAnalyzer will look it up in the
            datalist to locate the image files of the dataset.
        label_key: a string that user specify for the label. The DataAnalyzer will look it up in the
            datalist to locate the label files of the dataset. If label_key is NoneType or "None",
            the DataAnalyzer will skip looking for labels and all label-related operations.
        hist_bins: bins to compute histogram for each image channel.
        hist_range: ranges to compute histogram for each image channel.
        fmt: format used to save the analysis results. Currently support ``"json"`` and ``"yaml"``, defaults to "yaml".
        histogram_only: whether to only compute histograms. Defaults to False.
        extra_params: other optional arguments. Currently supported arguments are :
            'allowed_shape_difference' (default 5) can be used to change the default tolerance of
            the allowed shape differences between the image and label items. In case of shape mismatch below
            the tolerance, the label image will be resized to match the image using nearest interpolation.


    Examples:
        .. code-block:: python

            from monai.apps.auto3dseg.data_analyzer import DataAnalyzer

            datalist = {
                "testing": [{"image": "image_003.nii.gz"}],
                "training": [
                    {"fold": 0, "image": "image_001.nii.gz", "label": "label_001.nii.gz"},
                    {"fold": 0, "image": "image_002.nii.gz", "label": "label_002.nii.gz"},
                    {"fold": 1, "image": "image_001.nii.gz", "label": "label_001.nii.gz"},
                    {"fold": 1, "image": "image_004.nii.gz", "label": "label_004.nii.gz"},
                ],
            }

            dataroot = '/datasets' # the directory where you have the image files (nii.gz)
            DataAnalyzer(datalist, dataroot)

    Notes:
        The module can also be called from the command line interface (CLI).

    For example:

    .. code-block:: bash

        python -m monai.apps.auto3dseg \
            DataAnalyzer \
            get_all_case_stats \
            --datalist="my_datalist.json" \
            --dataroot="my_dataroot_dir"

     ./datastats.yamlTFcuda   imagelabelr   Nr"   datalist
str | dictdatarootstroutput_pathaveragebooldo_ccpdevicestr | torch.deviceworkerint	image_key	label_key
str | None	hist_binslist | int | None
hist_rangelist | Nonefmthistogram_onlyextra_paramsr   c                 K  s   t |rtd| d t| d || _|| _|| _|| _	|| _
t|| _|| _|| _|	dkr7d n|	| _|
| _|d u rEddgn|| _|| _|| _|| _d S )NzFile z( already exists and will be overwritten.z' will be overwritten by a new datastat.Noneii  )r   isfilewarningswarnloggerdebugr-   r/   r1   r2   r4   torchr5   r7   r9   r:   r<   r>   r@   rA   rB   )selfr-   r/   r1   r2   r4   r5   r7   r9   r:   r<   r>   r@   rA   rB   r   r   r    __init__r   s"   

zDataAnalyzer.__init__keys	list[str]resultdictreturnc                   sv   t j vst j t j vrdS  fdd| D }|D ]}d|v r8t|d r8td| d|d  d  dS qdS )	ay  
        Check data uniformity since DataAnalyzer provides no support to multi-modal images with different
        affine matrices/spacings due to monai transforms.

        Args:
            keys: a list of string-type keys under image_stats dictionary.

        Returns:
            False if one of the selected key values is not constant across the dataset images.

        Tc                   s    g | ]} t j t j | qS r   )r   SUMMARYIMAGE_STATS).0keyrN   r   r    
<listcomp>   s     z7DataAnalyzer._check_data_uniformity.<locals>.<listcomp>stdevzsummary image_stats z has non-zero stdev .F)r   rQ   rR   npanyrG   rH   )rL   rN   Zconstant_propspropr   rU   r    _check_data_uniformity   s   z#DataAnalyzer._check_data_uniformitytrainingc              	   C  sn  t ji t jg i}t ji t jg i}| jjdkrd}td ntj	 }td| d |dkrt
d}| I}| }g }	t|D ]}
|j| j|
||||fd}|	| qC|	D ]}|  qZ|	D ]}|  qc|D ]}|t j |t j  qlW d   n1 sw   Y  n	| d	dd||}t| j| j| j| j| j| j| jd
}t|t j }|tt|t j |t j< ||t j d< |t j |t j< | t j!g|std | j"r!td| j" d t#j$|| j"| j%ddd | j"&d| j% d| j% }|| j"kr|d| j% 7 }td| d t#j$||| j%ddd | jjdkr-tj'  |t j |t j< |S )a  
        Get all case stats. Caller of the DataAnalyser class. The function initiates multiple GPU or CPU processes of the internal
        _get_all_case_stats functions, which iterates datalist and call SegSummarizer to generate stats for each case.
        After all case stats are generated, SegSummarizer is called to combine results.

        Args:
            key: dataset key
            transform_list: option list of transforms before SegSummarizer

        Returns:
            A data statistics dictionary containing
                "stats_summary" (summary statistics of the entire datasets). Within stats_summary
                there are "image_stats"  (summarizing info of shape, channel, spacing, and etc
                using operations_summary), "image_foreground_stats" (info of the intensity for the
                non-zero labeled voxels), and "label_stats" (info of the labels, pixel percentage,
                image_intensity, and each individual label in a list)
                "stats_by_cases" (List type value. Each element of the list is statistics of
                an image-label info. Within each element, there are: "image" (value is the
                path to an image), "label" (value is the path to the corresponding label), "image_stats"
                (summarizing info of shape, channel, spacing, and etc using operations),
                "image_foreground_stats" (similar to the previous one but one foreground image), and
                "label_stats" (stats of the individual labels )

        Notes:
            Since the backend of the statistics computation are torch/numpy, nan/inf value
            may be generated and carried over in the computation. In such cases, the output
            dictionary will include .nan/.inf in the statistics.

        cpur#   zUsing CPU for data analyzing!zFound z GPUs for data analyzing!
forkserver)targetargsNr   r2   r4   r<   r>   rA   n_caseszVData spacing is not completely uniform. MONAI transforms may provide unexpected resultzWriting data stats to rX   F)r@   default_flow_style	sort_keysz	_by_case.z	.by_case.zWriting by-case data stats to z, this may take a while.r)   )(r   rQ   BY_CASEr5   typerG   inforI   r)   device_countr   ManagerlistrangeProcess_get_all_case_statsappendstartjoinextendr	   r9   r:   r2   r4   r<   r>   rA   len	summarizer   r\   r   SPACINGr1   r   export_config_filer@   replaceempty_cache)rJ   rT   transform_listrN   result_bycasenprocsZtmp_ctxmanagermanager_list	processesrankp_
summarizerrc   Zby_case_pathr   r   r    get_all_case_stats   st   



	

zDataAnalyzer.get_all_case_statsr#   r   
world_sizer}   rT   ry   c                 C  sB  t | j| j| j| j| j| j| jd}tt	d| j| jg}|du rMt
|dddt|dtjdt|ddg}| jdurM| jd	d
}|t| j| j|d t|}	t| j| jd|d\}
}|t|
krlt|
|d| }
n|t|
k r|t|
t|
d| ng }
t|
|	d}t|dd| jt| jjdkd}tj i tj!g i}| jjdkr| jntd|}|jdkrtj"# rtj"$ dkst%&d| d td}t'st()d t'r|dkrt*|n|D ]5}|d }z;|| j +||| j< d}| jdur|| j }|j,d dkrtj-|ddn|d }d}|+||| j< ||}W n t.y } zd|/ v r6|d t0j1 }n	|| j j2t0j1 }t%&d| d| d|  | jjdkrt%&d z:|| j +d|| j< | jdur|| j }|s|j,d dkrtj-|ddn|d }|+d|| j< ||}W n, t.y } zt%&d| d| d|  W Y d}~W Y d}~qd}~ww W Y d}~qW Y d}~nd}~ww tj3|tj3 tj4|tj4 i}| js|tj5 |tj5< | jdkr|tj6 |tj6< | jdur
|7tj8|tj8 tj9|tj9 i |tj! | q|du r|S || dS )a  
        Get all case stats from a partitioned datalist. The function can only be called internally by get_all_case_stats.
        Args:
            rank: GPU process rank, 0 for CPU process
            world_size: total number of GPUs, 1 for CPU process
            manager_list: multiprocessing manager list object, if using multi-GPU.
            key: dataset key
            transform_list: option list of transforms before SegSummarizer
        rb   NT)rL   ensure_channel_first
image_onlytensor)rL   	data_typedtypeRAS)rL   axcodesallowed_shape_difference   )rL   
source_keyr   )r-   basedirfoldrT   )r   num_partitions)r   	transformr#   Fr)   )
batch_sizeshufflenum_workers
collate_fn
pin_memoryr^   r   zdevice=z5 but CUDA device is not available, using CPU instead.z;tqdm is not installed. not displaying the caching progress.)dimZimage_meta_dictzUnable to process data z on z. zSDataAnalyzer `device` set to GPU execution hit an exception. Falling back to `cpu`.):r	   r9   r:   r2   r4   r<   r>   rA   rk   filterr   r   rI   floatr   rB   popro   r   r   r
   r-   r/   rs   r   r   r   r7   r   r5   rg   r   rQ   rf   r)   is_availableri   rG   rh   has_tqdmrE   rF   r$   toshapeargmaxBaseExceptionrL   r   FILENAME_OR_OBJmetaBY_CASE_IMAGE_PATHBY_CASE_LABEL_PATHrR   IMAGE_HISTOGRAMupdateFG_IMAGE_STATSLABEL_STATS)rJ   r   r   r}   rT   ry   r   rL   r   r   filesr   dataset
dataloaderrz   r5   
batch_dataZ_label_argmaxr,   derrfilenamestats_by_casesr   r   r    rn     s   	

$
"


&

&
z DataAnalyzer._get_all_case_stats)r'   r(   TFr)   r*   r+   r,   r   Nr"   F)r-   r.   r/   r0   r1   r0   r2   r3   r4   r3   r5   r6   r7   r8   r9   r0   r:   r;   r<   r=   r>   r?   r@   r0   rA   r3   rB   r   )rL   rM   rN   rO   rP   r3   )r]   N)r   r#   Nr]   N)r   r8   r   r8   r}   r?   rT   r0   ry   r?   rP   r   )	__name__
__module____qualname____doc__rK   staticmethodr\   r   rn   r   r   r   r    r&   0   s0    D$
_)6
__future__r   rE   osr   typingr   r   numpyrY   rI   torch.multiprocessingr   Zmonai.apps.auto3dseg.transformsr   monai.apps.utilsr   monai.auto3dsegr	   monai.auto3dseg.utilsr
   monai.bundler   monai.bundle.config_parserr   
monai.datar   r   r   monai.data.utilsr   monai.transformsr   r   r   r   monai.utilsr   r   r   r   monai.utils.enumsr   r   r!   r"   
SafeDumperadd_multi_representerr$   r   r   rG   __all__r&   r   r   r   r    <module>   s2   
