feat: add pdac_nac project

aadimator · aadimator · commit 3860a17da798 · 2024-07-30T09:46:30.000-04:00
diff --git a/notebooks/00-pdac_nac.ipynb b/notebooks/00-pdac_nac.ipynb
@@ -0,0 +1,238 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "using Muon\n",
+    "using RData\n",
+    "using Revise\n",
+    "using ISCHIA\n",
+    "using DataFrames\n",
+    "using Combinatorics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AnnData object 216180 ✕ 17764"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "spatial_object = readh5ad(\"../data/06-pdac_nac-clusters-rmv_unk.h5ad\")\n",
+    "# adata = readh5ad(\"../data/05-pdac_nac-clusters.h5ad\")\n",
+    "lr_network = load(\"../data/lr_network.rds\")\n",
+    "\n",
+    "# # Remove spots where neoadjuvant_chemo is unknown\n",
+    "# mask = .!(adata.obs.neoadjuvant_chemo .== \"Unknown\")\n",
+    "# spatial_object = @view adata[mask, :]\n",
+    "# # spatial_object = adata[mask, :]\n",
+    "\n",
+    "spatial_object"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2-element Vector{String}:\n",
+       " \"No\"\n",
+       " \"Yes\""
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "display(unique(spatial_object.obs.neoadjuvant_chemo))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gene_names = collect(spatial_object.var_names)\n",
+    "spatial_object.var.name = gene_names\n",
+    "\n",
+    "# Create LR_Pairs column\n",
+    "lr_network[!, :LR_Pairs] = string.(lr_network.from, \"_\", lr_network.to);\n",
+    "lr_network = lr_network[:, [:from, :to, :LR_Pairs]];\n",
+    "\n",
+    "# Filter lr_network to only include genes in adata\n",
+    "from_filter = in.(lr_network[!, :from], Ref(gene_names))\n",
+    "to_filter = in.(lr_network[:, :to], Ref(gene_names))\n",
+    "all_LR_network = lr_network[from_filter .& to_filter, :];"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Extract unique genes and common genes\n",
+    "all_LR_genes = unique(vcat(all_LR_network[:, :from], all_LR_network[:, :to]))\n",
+    "all_LR_genes_comm = intersect(all_LR_genes, collect(gene_names));\n",
+    "\n",
+    "# Create LR.pairs and LR.pairs.AllCombos\n",
+    "LR_pairs = all_LR_network[:, :LR_Pairs]\n",
+    "all_combos = [join(combo, \"_\") for combo in combinations(all_LR_genes_comm, 2)];"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"neoadjuvant_chemo\""
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# spatial_object = adata\n",
+    "LR_list = all_LR_genes_comm\n",
+    "LR_pairs = LR_pairs\n",
+    "exp_th = 1\n",
+    "corr_th = 0.2;\n",
+    "\n",
+    "cc_column = \"CC_k10\"\n",
+    "cc_list = [\"CC10\"]\n",
+    "# Condition = unique(spatial_object.obs[!, \"orig.ident\"])\n",
+    "condition_list = [\"Yes\", \"No\"]\n",
+    "condition_column = \"neoadjuvant_chemo\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running for CC10\n",
+      "Running for Yes\n",
+      "Preparing L-R presence/absence matrix\n",
+      "Calculating L-R pairs correlation\n",
+      "Preparing for cooccurrence\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Cooccurrence calculation starts...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32mCalculate Incidence 100%|████████████████████████████████| Time: 0:02:17\u001b[39mm\n",
+      "\u001b[32mCalculate Co-occurrences 100%|███████████████████████████| Time: 0:02:15\u001b[39m\n",
+      "\u001b[32mMain Comp 100%|██████████████████████████████████████████| Time: 0:00:43\u001b[39m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Cooccurrence calculation ended\n",
+      "\n",
+      "Summary of cooccurrence results:\n",
+      "Of 824970 species pair combinations, 573708 pairs (69.54%) were removed from the analysis because expected co-occurrence was < 1 and\n",
+      "251262 pairs were analyzed\n",
+      "\n",
+      "Cooccurrence Summary:\n",
+      "\n",
+      "Species => 1285\n",
+      "Non-random (%) => 66.3\n",
+      "Sites => 4494\n",
+      "Negative => 2665\n",
+      "Random => 84781\n",
+      "Positive => 163816\n",
+      "Unclassifiable => 0\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32mCalculate Significantly occurring pairs   2%|█           |  ETA: 1 days, 7:24:31\u001b[39mm\r"
+     ]
+    }
+   ],
+   "source": [
+    "for cc in cc_list\n",
+    "    println(\"Running for $cc\")\n",
+    "    for condition in condition_list\n",
+    "        println(\"Running for $condition\")\n",
+    "        lr_result = find_enriched_LR_pairs(\n",
+    "            spatial_object,\n",
+    "            [cc],\n",
+    "            [condition],\n",
+    "            LR_list,\n",
+    "            LR_pairs,\n",
+    "            exp_th,\n",
+    "            corr_th,\n",
+    "            cc_column=cc_column,\n",
+    "            condition_column=condition_column\n",
+    "        )\n",
+    "\n",
+    "        CSV.write(\"outputs/pdac_nac/$(cc)_lr_enrichment_$(condition).csv\", lr_result[\"enriched_LRs\"])\n",
+    "        CSV.write(\"outputs/pdac_nac/$(cc)_cooccurr_mat_$(condition).csv\", lr_result[\"cooccurrence_table\"].results)\n",
+    "    end\n",
+    "end"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Julia 1.9.3",
+   "language": "julia",
+   "name": "julia-1.9"
+  },
+  "language_info": {
+   "file_extension": ".jl",
+   "mimetype": "application/julia",
+   "name": "julia",
+   "version": "1.10.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}