Skip to content

Commit

Permalink
Metadata categorical (#133)
Browse files Browse the repository at this point in the history
* fixing items

* version increment

* change ordering and provide metadata creation

* adding more logging information

* automatically upload ndjson to info sibling folder

* add categorical config to metadata

* package nudge
  • Loading branch information
BryonLewis authored Apr 24, 2024
1 parent 21e2418 commit 70e13cd
Show file tree
Hide file tree
Showing 7 changed files with 160 additions and 14 deletions.
2 changes: 1 addition & 1 deletion client/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "dive-dsa",
"version": "1.10.5",
"version": "1.10.6",
"author": {
"name": "Kitware, Inc.",
"email": "Bryon.Lewis@kitware.com"
Expand Down
2 changes: 2 additions & 0 deletions client/platform/web-girder/api/divemetadata.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ export interface MetadataFilterItem {
export interface FilterDisplayConfig {
display: string[];
hide: string[];
categoricalLimit: number;
}

export interface MetadataFilterKeysItem {
category: 'search' | 'categorical' | 'numerical' | 'boolean';
count: number;
unique: number;
set?: string[] | number[];
range?: {
min: number,
Expand Down
7 changes: 7 additions & 0 deletions client/platform/web-girder/views/DIVEMetadataFilter.vue
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,10 @@ export default defineComponent({
}
return undefined;
};
const categoricalLimit = ref(props.displayConfig.categoricalLimit);
watch(() => props.displayConfig, () => {
categoricalLimit.value = props.displayConfig.categoricalLimit;
});
return {
Expand All @@ -198,6 +202,7 @@ export default defineComponent({
search,
currentFilter,
defaultEnabledKeys,
categoricalLimit,
changePage,
updateFilter,
clearFilter,
Expand Down Expand Up @@ -272,6 +277,7 @@ export default defineComponent({
:default-value="getDefaultValue(key)"
:filter-item="filterItem"
:default-enabled="defaultEnabledKeys.includes(key)"
:categorical-limit="categoricalLimit"
@update-value="updateFilter(key, $event)"
@clear-filter="clearFilter(key)"
/>
Expand All @@ -290,6 +296,7 @@ export default defineComponent({
:default-value="getDefaultValue(key)"
:filter-item="filterItem"
:default-enabled="defaultEnabledKeys.includes(key)"
:categorical-limit="categoricalLimit"
@update-value="updateFilter(key, $event)"
@clear-filter="clearFilter(key)"
/>
Expand Down
12 changes: 7 additions & 5 deletions client/platform/web-girder/views/DIVEMetadataFilterItem.vue
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,15 @@ export default defineComponent({
type: Boolean,
default: false,
},
categoricalLimit: {
type: Number,
default: 50,
},
},
setup(props, { emit }) {
const set = ref(props.filterItem.set);
const value: Ref<undefined | boolean | number | string | string[] | number[]> = ref(props.defaultValue);
const rangeFilterEnabled = ref(false);
const categoryLimit = ref(20);
const enabled = ref(props.defaultEnabled); // numerical enabled filter
watch([value, enabled], () => {
if (enabled.value) {
Expand All @@ -41,7 +44,7 @@ export default defineComponent({
value: value.value,
category: props.filterItem.category,
};
if (props.filterItem.category === 'categorical' && props.filterItem.count > categoryLimit.value) {
if (props.filterItem.category === 'categorical' && props.filterItem.unique > props.categoricalLimit) {
update.category = 'search';
}
if (props.filterItem.category === 'numerical' && !enabled.value) {
Expand All @@ -64,7 +67,6 @@ export default defineComponent({
set,
value,
rangeFilterEnabled,
categoryLimit,
enabled,
};
},
Expand All @@ -73,7 +75,7 @@ export default defineComponent({

<template>
<div class="mx-2">
<div v-if="filterItem.category === 'categorical' && filterItem.count < categoryLimit && set">
<div v-if="filterItem.category === 'categorical' && filterItem.unique < categoricalLimit && set">
<v-select
v-model="value"
:items="set"
Expand All @@ -84,7 +86,7 @@ export default defineComponent({
:label="label"
/>
</div>
<div v-else-if="filterItem.category === 'search' || (filterItem.category === 'categorical' && filterItem.count >= categoryLimit)">
<div v-else-if="filterItem.category === 'search' || (filterItem.category === 'categorical' && filterItem.unique >= categoricalLimit)">
<v-text-field v-model="value" :label="label" />
</div>
<div v-else-if="filterItem.category === 'boolean'">
Expand Down
2 changes: 1 addition & 1 deletion client/platform/web-girder/views/DIVEMetadataSearch.vue
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export default defineComponent({
},
setup(props) {
const folderList: Ref<MetadataResultItem[]> = ref([]);
const displayConfig: Ref<FilterDisplayConfig> = ref({ display: [], hide: [] });
const displayConfig: Ref<FilterDisplayConfig> = ref({ display: [], hide: [], categoricalLimit: 50 });
const totalPages = ref(0);
const currentPage = ref(0);
const count = ref(0);
Expand Down
140 changes: 140 additions & 0 deletions scripts/JSONLAddMetadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import json
import random
import click
from datetime import datetime
import re

# Categorical list of anatomies
anatomies = ['colon', 'lower intestine', 'upper intestine']

gastrointestinal_illnesses = [
"Gastroesophageal reflux disease (GERD)",
"Peptic ulcer disease",
"Barrett's esophagus",
"Esophageal cancer",
"Gastritis",
"Gastric ulcer",
"Gastric cancer",
"Helicobacter pylori infection",
"Celiac disease",
"Inflammatory bowel disease (Crohn's disease, ulcerative colitis)",
"Colon polyps",
"Colon cancer",
"Diverticulosis",
"Diverticulitis",
"Hemorrhoids",
"Anal fissures",
"Malabsorption syndromes",
"Small bowel tumors",
"Small bowel obstruction",
"Gastrointestinal bleeding",
"Gastrointestinal motility disorders",
"Gastroparesis",
"Eosinophilic esophagitis",
"Achalasia",
"Zenker's diverticulum"
]

# Function to extract date from filename and convert to YYYY-MM-DD format
def extract_date(filename):
try:
# Regular expression pattern to match date in YYYY-MM-DD format
date_pattern = r'\d{4}-\d{2}-\d{2}'

# Search for date pattern in the filename
match = re.search(date_pattern, filename)

if match:
# Extract matched date string
date_str = match.group(0)

# Parse date string and format to YYYY-MM-DD
date = datetime.strptime(date_str, '%Y-%m-%d')
return date.strftime('%Y-%m-%d')

# If date pattern is not found, return None
return None
except IndexError:
return None

def extract_date_alternate(filename):
try:
# Regular expression pattern to match date in DD-MMM-YYYY format
date_pattern = r'\b(\d{1,2})-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-(\d{4})\b'

# Search for date pattern in the filename
match = re.search(date_pattern, filename)

if match:
# Extract matched date string
day, month, year = match.groups()

# Convert month abbreviation to month number
month_num = {
'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04',
'May': '05', 'Jun': '06', 'Jul': '07', 'Aug': '08',
'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'
}[month]

# Format date as YYYY-MM-DD
date_str = f'{year}-{month_num}-{day}'
return date_str

# If date pattern is not found, return None
return None
except IndexError:
return None

# Function to randomly generate Severity (integer between 1 and 5)
def generate_severity():
return random.randint(1, 5)

@click.command()
@click.argument('input_file', type=click.File('r'))
@click.argument('output_file', type=click.Path(), default='')
def process_jsonl(input_file, output_file):
if not output_file:
# If output file is not specified, generate default output file name
input_filename = input_file.name
output_file = input_filename.rsplit('.', 1)[0] + '_modified.ndjson'

# Open the output file for writing
with open(output_file, 'w') as output_file:
# Read each line in the input file
for line in input_file:
# Parse the JSON object from the line
data = json.loads(line)

# Extract date from filename and add as a new key 'Date'
date = extract_date(data['Filename'])
if date:
data['Date'] = date
else:
date = extract_date_alternate(data['Key'])
if date:
data['Date'] = date

# Generate Severity and add as a new key 'Severity'
severity = generate_severity()
data['Severity'] = severity

# Randomly pick an Anatomy from the list and add as a new key 'Anatomy'
anatomy = random.choice(anatomies)
data['Anatomy'] = anatomy

disorder = random.choice(gastrointestinal_illnesses)
data['Disorder'] = disorder


# Extract patient ID from 'Key' and add as a new key 'PatientID'
patient_id = data['Key'].split('/')[-3]
data['PatientID'] = patient_id

# Write the updated JSON object to the output file
output_file.write(json.dumps(data) + '\n')

# Close the files
input_file.close()

if __name__ == '__main__':
process_jsonl()
9 changes: 2 additions & 7 deletions server/dive_server/views_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,10 +220,7 @@ def process_metadata(
modified_key_paths = [
{"root": root_name, "modified_path": base_modified_key_path}
]
print(f" Lenth of child folders: {len(childFolders)}")
print(childFolders)
for childFolder in childFolders:
print(f"Child Item: {childFolder['name']} path: {key_path}")
modified_key_paths.append(
{
"root": childFolder["name"],
Expand Down Expand Up @@ -297,7 +294,7 @@ def process_metadata(
item = metadataKeys[key]
metadataKeys[key]["unique"] = len(item["set"])
if item["type"] in ['string', 'array'] and (
item["count"] < categoricalLimit
item["unique"] < categoricalLimit
or (item["count"] <= len(item["set"]) and len(item["set"]) < categoricalLimit)
):
metadataKeys[key]["category"] = "categorical"
Expand All @@ -313,6 +310,7 @@ def process_metadata(
DIVE_MetadataKeys().createMetadataKeys(folder, user, metadataKeys)
# add metadata to root folder for
folder['meta'][DIVEMetadataMarker] = True
displayConfig['categoricalLimit'] = categoricalLimit
folder['meta'][DIVEMetadataFilter] = displayConfig
Folder().save(folder)

Expand Down Expand Up @@ -432,7 +430,6 @@ def clone_filter(
)
if metadata_items is not None:
for item in list(metadata_items):
print(item)
item_folder = Folder().load(item['DIVEDataset'], level=AccessType.READ, user=user)
crud_dataset.createSoftClone(
self.getCurrentUser(),
Expand Down Expand Up @@ -478,7 +475,6 @@ def get_filter_query(self, folder, user, filters):
query["$and"].append(
{f'metadata.{key}': {'$regex': re.escape(filter['value'])}}
)
print(query)
return query

@access.user
Expand Down Expand Up @@ -514,7 +510,6 @@ def get_metadata_filter(self, folder, keys=None):
for item in metadata_items:
if 'metadata' in item.keys():
for key in item['metadata'].keys():
print(item['metadata'][key])
if keys is None and key not in results.keys():
results[key] = set()
if item['metadata'].get(key, None) is not None and not isinstance(
Expand Down

0 comments on commit 70e13cd

Please sign in to comment.