Skip to content

Commit

Permalink
yearly stat JSON changes
Browse files Browse the repository at this point in the history
  • Loading branch information
sureshhewabi committed Feb 19, 2025
1 parent 66033d1 commit f890547
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 3 deletions.
11 changes: 9 additions & 2 deletions filedownloadstat/parquet_analyzer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import pandas as pd
import dask.dataframe as dd
from scipy.stats import rankdata

Expand Down Expand Up @@ -69,8 +70,14 @@ def persist_project_level_yearly_download_counts(self, ddf, project_level_yearly
# Compute the result
result = file_counts.compute()

# Save to JSON
result.to_json(project_level_yearly_download_counts, orient="records", lines=False)
# Convert to the desired nested JSON structure
grouped = result.groupby("accession").apply(lambda x: {
"accession": x["accession"].iloc[0],
"yearlyDownloads": x[["year", "count"]].to_dict(orient="records")
}).tolist()

# Save to JSON file without indentation
pd.DataFrame(grouped).to_json(project_level_yearly_download_counts, orient="records", lines=False)

print(f"{project_level_yearly_download_counts} file saved successfully!")

Expand Down
2 changes: 1 addition & 1 deletion scripts/.run_stat.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ nextflow -log $LOG_FILE run ${PIPELINE_BASE_DIR}main.nf \
--log_file $LOG_FILE \
--api_endpoint_file_downloads_per_project $API_ENDPOINT_FILE_DOWNLOADS_PER_PROJECT \
--api_endpoint_file_downloads_per_file $API_ENDPOINT_FILE_DOWNLOADS_PER_FILE \
--api_endpoint_header $API_ENDPOINT_HEADER -resume -with-tower
--api_endpoint_header "$API_ENDPOINT_HEADER" -resume -with-tower

0 comments on commit f890547

Please sign in to comment.