4  Pydicom DICOM Tags

Code
from pyhere import here
from pydicom import dcmread

4.1 Read

Code
upenn1_dcm1 = dcmread(here("data/MRI-DICOM/UPENN-GBM-00001/02-06-2002-NA-BRAINROUTINE-33106/2.000000-t2Flairaxial ProcessedCaPTk-17693/1-01.dcm"))
upenn1_dcm1
Dataset.file_meta -------------------------------
(0002,0000) File Meta Information Group Length  UL: 204
(0002,0001) File Meta Information Version       OB: b'\x00\x01'
(0002,0002) Media Storage SOP Class UID         UI: MR Image Storage
(0002,0003) Media Storage SOP Instance UID      UI: 1.3.6.1.4.1.14519.5.2.1.224599622610134734824248166322625258212
(0002,0010) Transfer Syntax UID                 UI: Implicit VR Little Endian
(0002,0012) Implementation Class UID            UI: 1.3.6.1.4.1.22213.1.143
(0002,0013) Implementation Version Name         SH: '0.5'
(0002,0016) Source Application Entity Title     AE: 'POSDA'
-------------------------------------------------
(0008,0005) Specific Character Set              CS: 'ISO_IR 100'
(0008,0008) Image Type                          CS: ['DERIVED', 'SECONDARY', 'OTHER']
(0008,0012) Instance Creation Date              DA: '20020206'
(0008,0013) Instance Creation Time              TM: '120146.015000'
(0008,0016) SOP Class UID                       UI: MR Image Storage
(0008,0018) SOP Instance UID                    UI: 1.3.6.1.4.1.14519.5.2.1.224599622610134734824248166322625258212
(0008,0020) Study Date                          DA: '20020206'
(0008,0021) Series Date                         DA: '20020206'
(0008,0022) Acquisition Date                    DA: '20020206'
(0008,0023) Content Date                        DA: '20020206'
(0008,0030) Study Time                          TM: '152432'
(0008,0031) Series Time                         TM: '120146.015000'
(0008,0032) Acquisition Time                    TM: '120029.655000'
(0008,0033) Content Time                        TM: '120146.015000'
(0008,0050) Accession Number                    SH: ''
(0008,0060) Modality                            CS: 'MR'
(0008,0070) Manufacturer                        LO: 'SIEMENS'
(0008,0090) Referring Physician's Name          PN: ''
(0008,1030) Study Description                   LO: 'BRAIN^ROUTINE'
(0008,103E) Series Description                  LO: 't2_Flair_axial: Processed_CaPTk'
(0008,1090) Manufacturer's Model Name           LO: 'TrioTim'
(0010,0010) Patient's Name                      PN: 'UPENN-GBM-00001'
(0010,0020) Patient ID                          LO: 'UPENN-GBM-00001'
(0010,0030) Patient's Birth Date                DA: ''
(0010,0040) Patient's Sex                       CS: 'F'
(0010,1010) Patient's Age                       AS: '052Y'
(0010,1020) Patient's Size                      DS: '1.676403355'
(0010,1030) Patient's Weight                    DS: '68.038864155'
(0010,21C0) Pregnancy Status                    US: 4
(0012,0050) Clinical Trial Time Point ID        LO: '0'
(0012,0051) Clinical Trial Time Point Descripti ST: 'Days offset from diagnosis'
(0012,0062) Patient Identity Removed            CS: 'YES'
(0012,0063) De-identification Method            LO: 'Per DICOM PS 3.15 AnnexE. Details in 0012,0064'
(0012,0064)  De-identification Method Code Sequence  8 item(s) ---- 
   (0008,0100) Code Value                          SH: '113100'
   (0008,0102) Coding Scheme Designator            SH: 'DCM'
   (0008,0104) Code Meaning                        LO: 'Basic Application Confidentiality Profile'
   ---------
   (0008,0100) Code Value                          SH: '113101'
   (0008,0102) Coding Scheme Designator            SH: 'DCM'
   (0008,0104) Code Meaning                        LO: 'Clean Pixel Data Option'
   ---------
   (0008,0100) Code Value                          SH: '113104'
   (0008,0102) Coding Scheme Designator            SH: 'DCM'
   (0008,0104) Code Meaning                        LO: 'Clean Structured Content Option'
   ---------
   (0008,0100) Code Value                          SH: '113105'
   (0008,0102) Coding Scheme Designator            SH: 'DCM'
   (0008,0104) Code Meaning                        LO: 'Clean Descriptors Option'
   ---------
   (0008,0100) Code Value                          SH: '113107'
   (0008,0102) Coding Scheme Designator            SH: 'DCM'
   (0008,0104) Code Meaning                        LO: 'Retain Longitudinal Temporal Information Modified Dates Option'
   ---------
   (0008,0100) Code Value                          SH: '113108'
   (0008,0102) Coding Scheme Designator            SH: 'DCM'
   (0008,0104) Code Meaning                        LO: 'Retain Patient Characteristics Option'
   ---------
   (0008,0100) Code Value                          SH: '113109'
   (0008,0102) Coding Scheme Designator            SH: 'DCM'
   (0008,0104) Code Meaning                        LO: 'Retain Device Identity Option'
   ---------
   (0008,0100) Code Value                          SH: '113111'
   (0008,0102) Coding Scheme Designator            SH: 'DCM'
   (0008,0104) Code Meaning                        LO: 'Retain Safe Private Option'
   ---------
(0013,0010) Private Creator                     LO: 'CTP'
(0013,1010) Private tag data                    UN: b'UPENN-GBM '
(0013,1013) Private tag data                    UN: b'35191158'
(0018,0010) Contrast/Bolus Agent                LO: 'OMNISCAN 3.0 + 12.0'
(0018,0015) Body Part Examined                  CS: 'BRAIN'
(0018,0020) Scanning Sequence                   CS: ['SE', 'IR']
(0018,0021) Sequence Variant                    CS: ['SK', 'SP', 'MP', 'OSP']
(0018,0022) Scan Options                        CS: 'IR'
(0018,0023) MR Acquisition Type                 CS: '2D'
(0018,0024) Sequence Name                       SH: '*tir2d1rs13'
(0018,0025) Angio Flag                          CS: 'N'
(0018,0050) Slice Thickness                     DS: '3'
(0018,0080) Repetition Time                     DS: '9420'
(0018,0081) Echo Time                           DS: '140'
(0018,0082) Inversion Time                      DS: '2500'
(0018,0083) Number of Averages                  DS: '1'
(0018,0084) Imaging Frequency                   DS: '123.256693'
(0018,0085) Imaged Nucleus                      SH: '1H'
(0018,0086) Echo Number(s)                      IS: '1'
(0018,0087) Magnetic Field Strength             DS: '3'
(0018,0088) Spacing Between Slices              DS: '1'
(0018,0089) Number of Phase Encoding Steps      IS: '207'
(0018,0091) Echo Train Length                   IS: '13'
(0018,0093) Percent Sampling                    DS: '100'
(0018,0094) Percent Phase Field of View         DS: '75'
(0018,0095) Pixel Bandwidth                     DS: '285'
(0018,1000) Device Serial Number                LO: ''
(0018,1020) Software Versions                   LO: 'syngo MR B13 4VB13A'
(0018,1030) Protocol Name                       LO: 't2_Flair_axial'
(0018,1041) Contrast/Bolus Volume               DS: '3'
(0018,1044) Contrast/Bolus Total Dose           DS: '0'
(0018,1048) Contrast/Bolus Ingredient           CS: ''
(0018,1049) Contrast/Bolus Ingredient Concentra DS: '0'
(0018,1251) Transmit Coil Name                  SH: 'Body'
(0018,1310) Acquisition Matrix                  US: [0, 256, 192, 0]
(0018,1312) In-plane Phase Encoding Direction   CS: 'ROW'
(0018,1314) Flip Angle                          DS: '170'
(0018,1315) Variable Flip Angle Flag            CS: 'N'
(0018,1316) SAR                                 DS: '0.70089428846153'
(0018,1318) dB/dt                               DS: '0'
(0018,5100) Patient Position                    CS: 'HFS'
(0020,000D) Study Instance UID                  UI: 1.3.6.1.4.1.14519.5.2.1.325722981077189157104874710559665333106
(0020,000E) Series Instance UID                 UI: 1.3.6.1.4.1.14519.5.2.1.72111832425535404540752357374191117693
(0020,0010) Study ID                            SH: ''
(0020,0011) Series Number                       IS: '2'
(0020,0012) Acquisition Number                  IS: '1'
(0020,0013) Instance Number                     IS: '1'
(0020,0032) Image Position (Patient)            DS: [-87.807015311909, -144.35366297552, -67.675415039063]
(0020,0037) Image Orientation (Patient)         DS: [0.9999984769134, -0.0017453283007, 0, 0.00174532830068, 0.9999984769134, 0]
(0020,0052) Frame of Reference UID              UI: 1.3.6.1.4.1.14519.5.2.1.74866969049680606431110099032071163024
(0020,1040) Position Reference Indicator        LO: ''
(0020,1041) Slice Location                      DS: '-67.675415039063'
(0028,0002) Samples per Pixel                   US: 1
(0028,0004) Photometric Interpretation          CS: 'MONOCHROME2'
(0028,0010) Rows                                US: 256
(0028,0011) Columns                             US: 192
(0028,0030) Pixel Spacing                       DS: [0.9375, 0.9375]
(0028,0100) Bits Allocated                      US: 16
(0028,0101) Bits Stored                         US: 16
(0028,0102) High Bit                            US: 15
(0028,0103) Pixel Representation                US: 0
(0028,0106) Smallest Image Pixel Value          US: 0
(0028,0107) Largest Image Pixel Value           US: 1580
(0028,0303) Longitudinal Temporal Information M CS: 'MODIFIED'
(0028,1050) Window Center                       DS: '554'
(0028,1051) Window Width                        DS: '1187'
(0028,1052) Rescale Intercept                   DS: '0'
(0028,1053) Rescale Slope                       DS: '1'
(0028,1054) Rescale Type                        LO: 'US'
(0028,1055) Window Center & Width Explanation   LO: 'Algo1'
(0032,1060) Requested Procedure Description     LO: 'MRSPC'
(0040,0244) Performed Procedure Step Start Date DA: '20020206'
(0040,0245) Performed Procedure Step Start Time TM: '115758.968000'
(0040,0254) Performed Procedure Step Descriptio LO: 'MRSPC'
(7FE0,0010) Pixel Data                          OW: Array of 98304 elements

4.2 Get DICOM Tags (Single .dcm)

4.2.1 Use DICOM Attribute Names

Code
upenn1_dcm1.PatientID
'UPENN-GBM-00001'

4.2.2 Use DICOM Tags

(0010,0020) is a tag for Patient ID

Code
# Access the value of a tag directly using its group and element number, represented as a tuple (group, element).
print(upenn1_dcm1[0x10, 0x20])

print(upenn1_dcm1.get((0x0010, 0x0020)))
(0010,0020) Patient ID                          LO: 'UPENN-GBM-00001'
(0010,0020) Patient ID                          LO: 'UPENN-GBM-00001'

Safe Access:

Code
upenn1_dcm1.get((0x0010, 0x0020), "Unknown")
(0010,0020) Patient ID                          LO: 'UPENN-GBM-00001'
Code
# (0008,1010): Station Name
upenn1_dcm1.get((0x0008, 0x1010), "Unknown")
'Unknown'

4.2.3 Loop over tags

Code
# Iterate through all tags to find Patient ID
for tag in upenn1_dcm1:
    if tag.tag == (0x0010, 0x0020):  # Check for (0010,0020)
        print("Patient ID:", tag.value)
        break
Patient ID: UPENN-GBM-00001

4.3 Function: DcmMetaExtractor()

Code
import pydicom
import pandas as pd
import warnings

class DcmMetaExtractor:
    def __init__(self, tags_to_extract=None):
        """
        Initialize the DICOMExtractor with tags to extract.
        """
        self.tags_to_extract = tags_to_extract or {
            (0x0008, 0x0070): "Manufacturer",
            (0x0008, 0x1090): "Manufacturer's Model Name",
            (0x0018, 0x0087): "Magnetic Field Strength",
            (0x0018, 0x0050): "Slice Thickness",
            (0x0018, 0x0088): "Spacing Between Slices",
            (0x0018, 0x0080): "Repetition Time",
            (0x0018, 0x0081): "Echo Time",
            (0x0018, 0x1314): "Flip Angle"
        }

    def extract_tags(self, fp):
        """
        Extract specified tags from the DICOM file and return as a pandas Series.
        
        Parameters:
            fp (str): Path to the DICOM file.
        
        Returns:
            pd.Series: A pandas Series containing the extracted tags with missing values as pd.NA.
        """
        try:
            # Read the DICOM file
            ds = pydicom.dcmread(fp)
            
            # Initialize a dictionary to store the results
            results = {}
            
            # Extract values for the specified tags
            for tag, name in self.tags_to_extract.items():
                element = ds.get(tag)  # Get the DataElement for the tag
                results[name] = element.value if element is not None else pd.NA
        
        except Exception as e:
            # Log a warning and create a dictionary with all fields set to pd.NA
            warnings.warn(f"Failed to read DICOM file '{fp}'. Returning missing values. Reason: {e}")
            results = {name: pd.NA for name in self.tags_to_extract.values()}
        
        # Convert the results dictionary to a pandas Series
        return pd.Series(results)
Code
dcm_extractor = DcmMetaExtractor()

dcm_extractor.extract_tags(here("data/MRI-DICOM/UPENN-GBM-00001/02-06-2002-NA-BRAINROUTINE-33106/2.000000-t2Flairaxial ProcessedCaPTk-17693/1-01.dcm"))
Manufacturer                 SIEMENS
Manufacturer's Model Name    TrioTim
Magnetic Field Strength          3.0
Slice Thickness                  3.0
Spacing Between Slices           1.0
Repetition Time               9420.0
Echo Time                      140.0
Flip Angle                     170.0
dtype: object