Skip to content

Commit b0e693a

Browse files
committed
[CREATE] Final Source Code
1 parent b400982 commit b0e693a

File tree

2 files changed

+122
-2
lines changed

2 files changed

+122
-2
lines changed

scripts/check_missing_patients.sh

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#!/bin/bash
2+
#
3+
# Author: Francisco Maria Calisto
4+
# Maintainer: Francisco Maria Calisto
5+
# Email: francisco.calisto@tecnico.ulisboa.pt
6+
# License: ACADEMIC & COMMERCIAL
7+
# Created Date: 2024-09-28
8+
# Revised Date: 2024-09-28
9+
# Version: 1.3
10+
# Status: Development
11+
# Usage: ./check_missing_patients.sh
12+
# Description: This script checks whether each anonymized Patient ID from the CSV file exists in any DICOM file within the "unexplored" folder.
13+
14+
# Exit script immediately if any command fails
15+
set -e
16+
17+
# Configuration: Define key directories and file paths
18+
home="$HOME" # User's home directory
19+
root_dir="$home/Git" # Root directory where the project is located
20+
# unchecked_dir="$root_dir/dataset-multimodal-breast/data/curation/unexplored" # Directory with unprocessed DICOM files
21+
unchecked_dir="$root_dir/dataset-multimodal-breast/data/curation/checking" # TO DELETE
22+
csv_file="$root_dir/dataset-multimodal-breast/data/birads/anonymized_patients_birads_curation.csv" # CSV file with anonymized patient IDs
23+
LOG_DIR="$root_dir/dataset-multimodal-breast/data/logs" # Directory for log files
24+
LOG_FILE="$LOG_DIR/check_missing_patients_$(date +'%Y%m%d_%H%M%S').log" # Unique log file with timestamp
25+
26+
# Ensure the log directory exists (if not, create it)
27+
mkdir -p "$LOG_DIR"
28+
29+
# Function to log messages to both the console and log file with a timestamp
30+
log_message() {
31+
echo "$(date +'%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
32+
}
33+
34+
# Function to validate the existence of required paths (directories or files)
35+
# Arguments:
36+
# $1: Path to validate
37+
# $2: Friendly name for the error message
38+
validate_path() {
39+
if [ ! -e "$1" ]; then
40+
log_message "Error: $2 ($1) does not exist. Exiting."
41+
exit 1 # Terminate script if the path is missing
42+
fi
43+
}
44+
45+
# Ensure essential directories and CSV file exist before continuing
46+
validate_path "$unchecked_dir" "Unchecked folder (DICOM directory)"
47+
validate_path "$csv_file" "CSV file (Patient data)"
48+
49+
# Initialize arrays to store unique Patient IDs and Patient IDs not found in DICOM files
50+
unique_patients=() # Array to store unique Patient IDs
51+
not_found_patients=() # Array to store Patient IDs not found in DICOM files
52+
53+
# Function to check if an element exists in an array
54+
# Arguments:
55+
# $1: Element to search
56+
# $2: Array to search in
57+
element_in_array() {
58+
local element="$1"
59+
shift
60+
for item in "$@"; do
61+
if [[ "$item" == "$element" ]]; then
62+
return 0 # Element found
63+
fi
64+
done
65+
return 1 # Element not found
66+
}
67+
68+
# Function to check if a given Patient ID exists in any DICOM file within the "unexplored" directory
69+
# Arguments:
70+
# $1: The anonymized Patient ID to search for in the DICOM files
71+
check_patient_in_dicom_files() {
72+
local patient_id="$1"
73+
74+
# Search for the Patient ID in the filenames of the DICOM files
75+
if find "$unchecked_dir" -type f -name "*.dcm" | grep -q "$patient_id"; then
76+
log_message "Patient ID: $patient_id found in DICOM files."
77+
else
78+
log_message "Patient ID: $patient_id NOT found in any DICOM file."
79+
not_found_patients+=("$patient_id") # Add to the list of missing Patient IDs
80+
fi
81+
}
82+
83+
# Function to process the CSV and check for Patient IDs
84+
process_csv() {
85+
log_message "Starting to process the CSV file: $csv_file"
86+
87+
# Read the CSV file line by line, assuming Patient ID is in the second column
88+
while IFS=',' read -r col1 patient_id rest; do
89+
if [ -n "$patient_id" ]; then # Ensure the Patient ID is not empty
90+
# Only process if the Patient ID is unique (not already in the array)
91+
if ! element_in_array "$patient_id" "${unique_patients[@]}"; then
92+
unique_patients+=("$patient_id") # Add to unique list
93+
log_message "Checking Patient ID: $patient_id from CSV"
94+
check_patient_in_dicom_files "$patient_id" # Call function to check if the Patient ID exists in DICOM files
95+
else
96+
log_message "Skipping duplicate Patient ID: $patient_id"
97+
fi
98+
fi
99+
done < "$csv_file"
100+
}
101+
102+
# Start the process of checking Patient IDs from the CSV
103+
process_csv
104+
105+
# After checking all Patient IDs, report any that were not found
106+
if [ ${#not_found_patients[@]} -ne 0 ]; then
107+
log_message "Summary: The following Patient IDs were NOT found in any DICOM files:"
108+
for patient_id in "${not_found_patients[@]}"; do
109+
log_message "$patient_id"
110+
done
111+
else
112+
log_message "All Patient IDs from the CSV were found in the DICOM files."
113+
fi
114+
115+
# Log the total number of unique Patient IDs processed
116+
log_message "Total number of unique Patient IDs processed: ${#unique_patients[@]}"
117+
118+
log_message "Patient ID check completed successfully."
119+
120+
# End of script

scripts/explorer.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# Email: francisco.calisto@tecnico.ulisboa.pt
66
# License: ACADEMIC & COMMERCIAL
77
# Created Date: 2024-09-22
8-
# Revised Date: 2024-09-29 # Improved logging, error handling, and optimized Patient ID processing
8+
# Revised Date: 2024-09-28 # Improved logging, error handling, and optimized Patient ID processing
99
# Version: 2.28
1010
# Status: Development
1111
# Usage: ./explorer.sh
@@ -16,7 +16,7 @@
1616
set -e
1717

1818
# Configuration: Set the maximum number of DICOM files to process in one run
19-
FILE_LIMIT=1 # You can adjust this for testing or set higher for production
19+
FILE_LIMIT=50000 # You can adjust this for testing or set higher for production
2020

2121
# Define key directories and file paths for processing
2222
home="$HOME" # User's home directory

0 commit comments

Comments
 (0)