1
+ #! /bin/bash
2
+ #
3
+ # Author: Francisco Maria Calisto
4
+ # Maintainer: Francisco Maria Calisto
5
+ # Email: francisco.calisto@tecnico.ulisboa.pt
6
+ # License: ACADEMIC & COMMERCIAL
7
+ # Created Date: 2024-09-28
8
+ # Revised Date: 2024-09-28
9
+ # Version: 1.3
10
+ # Status: Development
11
+ # Usage: ./check_missing_patients.sh
12
+ # Description: This script checks whether each anonymized Patient ID from the CSV file exists in any DICOM file within the "unexplored" folder.
13
+
14
+ # Exit script immediately if any command fails
15
+ set -e
16
+
17
+ # Configuration: Define key directories and file paths
18
+ home=" $HOME " # User's home directory
19
+ root_dir=" $home /Git" # Root directory where the project is located
20
+ # unchecked_dir="$root_dir/dataset-multimodal-breast/data/curation/unexplored" # Directory with unprocessed DICOM files
21
+ unchecked_dir=" $root_dir /dataset-multimodal-breast/data/curation/checking" # TO DELETE
22
+ csv_file=" $root_dir /dataset-multimodal-breast/data/birads/anonymized_patients_birads_curation.csv" # CSV file with anonymized patient IDs
23
+ LOG_DIR=" $root_dir /dataset-multimodal-breast/data/logs" # Directory for log files
24
+ LOG_FILE=" $LOG_DIR /check_missing_patients_$( date +' %Y%m%d_%H%M%S' ) .log" # Unique log file with timestamp
25
+
26
+ # Ensure the log directory exists (if not, create it)
27
+ mkdir -p " $LOG_DIR "
28
+
29
+ # Function to log messages to both the console and log file with a timestamp
30
+ log_message () {
31
+ echo " $( date +' %Y-%m-%d %H:%M:%S' ) - $1 " | tee -a " $LOG_FILE "
32
+ }
33
+
34
+ # Function to validate the existence of required paths (directories or files)
35
+ # Arguments:
36
+ # $1: Path to validate
37
+ # $2: Friendly name for the error message
38
+ validate_path () {
39
+ if [ ! -e " $1 " ]; then
40
+ log_message " Error: $2 ($1 ) does not exist. Exiting."
41
+ exit 1 # Terminate script if the path is missing
42
+ fi
43
+ }
44
+
45
+ # Ensure essential directories and CSV file exist before continuing
46
+ validate_path " $unchecked_dir " " Unchecked folder (DICOM directory)"
47
+ validate_path " $csv_file " " CSV file (Patient data)"
48
+
49
+ # Initialize arrays to store unique Patient IDs and Patient IDs not found in DICOM files
50
+ unique_patients=() # Array to store unique Patient IDs
51
+ not_found_patients=() # Array to store Patient IDs not found in DICOM files
52
+
53
+ # Function to check if an element exists in an array
54
+ # Arguments:
55
+ # $1: Element to search
56
+ # $2: Array to search in
57
+ element_in_array () {
58
+ local element=" $1 "
59
+ shift
60
+ for item in " $@ " ; do
61
+ if [[ " $item " == " $element " ]]; then
62
+ return 0 # Element found
63
+ fi
64
+ done
65
+ return 1 # Element not found
66
+ }
67
+
68
+ # Function to check if a given Patient ID exists in any DICOM file within the "unexplored" directory
69
+ # Arguments:
70
+ # $1: The anonymized Patient ID to search for in the DICOM files
71
+ check_patient_in_dicom_files () {
72
+ local patient_id=" $1 "
73
+
74
+ # Search for the Patient ID in the filenames of the DICOM files
75
+ if find " $unchecked_dir " -type f -name " *.dcm" | grep -q " $patient_id " ; then
76
+ log_message " Patient ID: $patient_id found in DICOM files."
77
+ else
78
+ log_message " Patient ID: $patient_id NOT found in any DICOM file."
79
+ not_found_patients+=(" $patient_id " ) # Add to the list of missing Patient IDs
80
+ fi
81
+ }
82
+
83
+ # Function to process the CSV and check for Patient IDs
84
+ process_csv () {
85
+ log_message " Starting to process the CSV file: $csv_file "
86
+
87
+ # Read the CSV file line by line, assuming Patient ID is in the second column
88
+ while IFS=' ,' read -r col1 patient_id rest; do
89
+ if [ -n " $patient_id " ]; then # Ensure the Patient ID is not empty
90
+ # Only process if the Patient ID is unique (not already in the array)
91
+ if ! element_in_array " $patient_id " " ${unique_patients[@]} " ; then
92
+ unique_patients+=(" $patient_id " ) # Add to unique list
93
+ log_message " Checking Patient ID: $patient_id from CSV"
94
+ check_patient_in_dicom_files " $patient_id " # Call function to check if the Patient ID exists in DICOM files
95
+ else
96
+ log_message " Skipping duplicate Patient ID: $patient_id "
97
+ fi
98
+ fi
99
+ done < " $csv_file "
100
+ }
101
+
102
+ # Start the process of checking Patient IDs from the CSV
103
+ process_csv
104
+
105
+ # After checking all Patient IDs, report any that were not found
106
+ if [ ${# not_found_patients[@]} -ne 0 ]; then
107
+ log_message " Summary: The following Patient IDs were NOT found in any DICOM files:"
108
+ for patient_id in " ${not_found_patients[@]} " ; do
109
+ log_message " $patient_id "
110
+ done
111
+ else
112
+ log_message " All Patient IDs from the CSV were found in the DICOM files."
113
+ fi
114
+
115
+ # Log the total number of unique Patient IDs processed
116
+ log_message " Total number of unique Patient IDs processed: ${# unique_patients[@]} "
117
+
118
+ log_message " Patient ID check completed successfully."
119
+
120
+ # End of script
0 commit comments