Parameter evaluation refactoring and API improvements (#201)

(See change notes in PR.)
NAU-CCL · Dec 4, 2024 · 547900e · 547900e
1 parent f552f94
commit 547900e
Show file tree

Hide file tree

Showing 56 changed files with 5,079 additions and 3,605 deletions.
diff --git a/USAGE.ipynb b/USAGE.ipynb
@@ -112,16 +112,16 @@
      "output_type": "stream",
      "text": [
       "Loading epymorph.adrio.commuting_flows.Commuters:\n",
-      "  |####################| 100%  (7.974s)\n",
+      "  |####################| 100%  (8.142s)\n",
       "Loading epymorph.adrio.acs5.Population:\n",
-      "  |####################| 100%  (1.201s)\n",
+      "  |####################| 100%  (1.263s)\n",
       "Loading epymorph.adrio.us_tiger.PostalCode:\n",
-      "  |####################| 100%  (0.131s)\n",
+      "  |####################| 100%  (0.136s)\n",
       "Running simulation (BasicSimulator):\n",
       "• 2015-01-01 to 2015-05-31 (150 days)\n",
       "• 6 geo nodes\n",
-      "  |####################| 100% \n",
-      "Runtime: 0.152s\n"
+      "  |####################| 100%                     \n",
+      "Runtime: 0.148s\n"
      ]
     }
    ],
@@ -158,10 +158,10 @@
      "output_type": "stream",
      "text": [
       "Compartments is an array of shape: (300, 6, 3)\n",
-      "That's (T,N,C) -- simulation time steps, number of geo nodes, and number of IPM compartments.\n",
+      "That's (S,N,C) -- simulation time steps, number of geo nodes, and number of IPM compartments.\n",
       "\n",
       "Events is an array of shape: (300, 6, 3)\n",
-      "That's (T,N,E) -- simulation time steps, number of geo nodes, and number of IPM events.\n",
+      "That's (S,N,E) -- simulation time steps, number of geo nodes, and number of IPM events.\n",
       "\n",
       "Here are the initial conditions (SIR) for all six geo nodes:\n",
       "[[19635772    10000        0]\n",
@@ -188,15 +188,15 @@
    "source": [
     "print(f\"Compartments is an array of shape: {out.compartments.shape}\")\n",
     "print(\n",
-    "    \"That's (T,N,C) -- simulation time steps, number of geo nodes, \"\n",
+    "    \"That's (S,N,C) -- simulation time steps, number of geo nodes, \"\n",
     "    \"and number of IPM compartments.\"\n",
     ")\n",
     "\n",
     "print()\n",
     "\n",
     "print(f\"Events is an array of shape: {out.events.shape}\")\n",
     "print(\n",
-    "    \"That's (T,N,E) -- simulation time steps, number of geo nodes, \"\n",
+    "    \"That's (S,N,E) -- simulation time steps, number of geo nodes, \"\n",
     "    \"and number of IPM events.\"\n",
     ")\n",
     "\n",
@@ -219,9 +219,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The prevalence data is a 3-dimensional numpy array of integers of shape `(T,N,C)`, where `T` is the number of time steps in the simulation, `N` is the number of geo nodes, and `C` is the number of compartments in the IPM.\n",
+    "The prevalence data is a 3-dimensional numpy array of integers of shape `(S,N,C)`, where `S` is the number of time steps in the simulation, `N` is the number of geo nodes, and `C` is the number of compartments in the IPM.\n",
     "\n",
-    "The incidence data is similar, but of shape `(T,N,E)`, where `E` is the number of transition events (directed edges from one IPM compartment to another, associated with some sort of transition rate).\n",
+    "The incidence data is similar, but of shape `(S,N,E)`, where `E` is the number of transition events (directed edges from one IPM compartment to another, associated with some sort of transition rate).\n",
     "\n",
     "You may notice that our time frame says to run the simulation for 150 days. So where did 300 ticks come from? Without diving too deep, epymorph breaks up each day into parts called *tau steps* (1 or more) and runs its computations one tau step at a time. The movement model is what dictates how many tau steps there are in a day and how long each one is. In this example, there are 2 tau steps, and so $(150 \\times 2)$ simulation ticks.\n",
     "\n",

diff --git a/doc/demo/01-SIRH-IPM.ipynb b/doc/demo/01-SIRH-IPM.ipynb
diff --git a/doc/demo/02-states-GEO.ipynb b/doc/demo/02-states-GEO.ipynb
diff --git a/doc/demo/03-counties-GEO.ipynb b/doc/demo/03-counties-GEO.ipynb
diff --git a/doc/demo/04-time-varying-beta.ipynb b/doc/demo/04-time-varying-beta.ipynb
diff --git a/doc/demo/05-visualizing-mm.ipynb b/doc/demo/05-visualizing-mm.ipynb
diff --git a/doc/devlog/2023-06-30.ipynb b/doc/devlog/2023-06-30.ipynb
diff --git a/doc/devlog/2024-05-03.ipynb b/doc/devlog/2024-05-03.ipynb
@@ -23,6 +23,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import numpy as np\n",
     "import epymorph.geography.us_census as c\n",
     "import epymorph.geography.us_tiger as t\n",
     "\n",
@@ -33,14 +34,14 @@
     "\n",
     "def test_year(year: int) -> None:\n",
     "    # 1. test that we have 52 states\n",
-    "    states = c.get_us_states(year).geoid\n",
+    "    states = t.get_states(year).geoid\n",
     "\n",
     "    if len(states) != 52:\n",
     "        raise Fail(\"There weren't 52 states!\")\n",
     "\n",
     "    # 2. test that each state contains at least one county\n",
-    "    counties = c.get_us_counties(year).geoid\n",
-    "    counties_by_state = c.STATE.grouped(counties)\n",
+    "    counties = t.get_counties(year).geoid\n",
+    "    counties_by_state = c.STATE.grouped(np.array(counties))\n",
     "\n",
     "    exs = [\n",
     "        Fail(f\"State {x} does not have at least one county.\")\n",
@@ -51,8 +52,8 @@
     "        raise ExceptionGroup(\"Failed checking counties.\", exs)\n",
     "\n",
     "    # 3. test that each county contains at least one tract\n",
-    "    tracts = c.get_us_tracts(year).geoid\n",
-    "    tracts_by_county = c.COUNTY.grouped(tracts)\n",
+    "    tracts = t.get_tracts(year).geoid\n",
+    "    tracts_by_county = c.COUNTY.grouped(np.array(tracts))\n",
     "\n",
     "    exs = []\n",
     "    for x in counties:\n",
@@ -62,8 +63,8 @@
     "        raise ExceptionGroup(\"Failed checking tracts.\", exs)\n",
     "\n",
     "    # 4. test that each tract contains at least one block group\n",
-    "    cbgs = c.get_us_block_groups(year).geoid\n",
-    "    cbgs_by_tract = c.TRACT.grouped(cbgs)\n",
+    "    cbgs = t.get_block_groups(year).geoid\n",
+    "    cbgs_by_tract = c.TRACT.grouped(np.array(cbgs))\n",
     "\n",
     "    exs = []\n",
     "    for x in tracts:\n",
@@ -125,7 +126,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.10"
   }
  },
  "nbformat": 4,

diff --git a/doc/devlog/2024-06-03.ipynb b/doc/devlog/2024-06-03.ipynb
@@ -17,17 +17,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import numpy as np\n",
+    "\n",
     "from epymorph import *\n",
     "from epymorph.adrio import acs5, adrio, commuting_flows, us_tiger\n",
     "from epymorph.data_shape import DataShapeMatcher\n",
+    "from epymorph.data_type import dtype_as_np\n",
     "from epymorph.geography.us_census import (\n",
     "    BlockGroupScope,\n",
     "    CountyScope,\n",
     "    StateScope,\n",
     "    TractScope,\n",
     ")\n",
-    "from epymorph.params import ParamValue\n",
-    "from epymorph.simulator.data import evaluate_param\n",
+    "from epymorph.data.ipm.no import No as NoIpm\n",
+    "from epymorph.data.mm.no import No as NoMm\n",
     "from epymorph.util import NumpyTypeError, check_ndarray, match\n",
     "\n",
     "# This is the expected type and shape for every attribute we're going to test.\n",
@@ -46,7 +49,7 @@
     "]\n",
     "\n",
     "# And here are the ADRIOs for each of those attributes.\n",
-    "params: dict[str, ParamValue] = {\n",
+    "params: dict[str, adrio.Adrio] = {\n",
     "    \"label\": us_tiger.Name(),\n",
     "    \"population\": acs5.Population(),\n",
     "    \"population_by_age_table\": acs5.PopulationByAgeTable(),\n",
@@ -64,12 +67,21 @@
     "\n",
     "def run_test(rume: Rume, skip: tuple[str, ...] = ()):\n",
     "    for attr in (a for a in expected if a.name not in skip):\n",
-    "        actual = evaluate_param(rume, attr.name)\n",
     "        try:\n",
+    "            actual = (\n",
+    "                params[attr.name]\n",
+    "                .with_context(\n",
+    "                    dim=rume.dim,\n",
+    "                    scope=rume.scope,\n",
+    "                    params=params,\n",
+    "                    rng=np.random.default_rng(),\n",
+    "                )\n",
+    "                .evaluate()\n",
+    "            )\n",
     "            check_ndarray(\n",
     "                actual,\n",
-    "                dtype=match.dtype(attr.dtype),\n",
-    "                shape=DataShapeMatcher(attr.shape, rume.dim, True),\n",
+    "                dtype=match.dtype(dtype_as_np(attr.type)),\n",
+    "                shape=DataShapeMatcher(attr.shape, rume.dim),\n",
     "            )\n",
     "            print(f\"{attr.name}: good\")\n",
     "        except NumpyTypeError as e:\n",
@@ -79,8 +91,8 @@
     "\n",
     "def placeholder_rume(scope, time_frame):\n",
     "    return SingleStrataRume.build(\n",
-    "        ipm=ipm_library[\"no\"](),\n",
-    "        mm=mm_library[\"no\"](),\n",
+    "        ipm=NoIpm(),\n",
+    "        mm=NoMm(),\n",
     "        init=init.NoInfection(),\n",
     "        scope=scope,\n",
     "        time_frame=time_frame,\n",
@@ -113,7 +125,7 @@
    ],
    "source": [
     "rume = placeholder_rume(\n",
-    "    scope=StateScope.all(year=2020),\n",
+    "    scope=StateScope.in_states([\"NY\", \"NJ\", \"MD\", \"VA\"], year=2020),\n",
     "    time_frame=TimeFrame.year(2020),\n",
     ")\n",
     "\n",
@@ -275,7 +287,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.10"
   }
  },
  "nbformat": 4,

diff --git a/doc/devlog/2024-06-05.ipynb b/doc/devlog/2024-06-05.ipynb
@@ -57,32 +57,41 @@
      "output_type": "stream",
      "text": [
       "ADRIO data usage estimation:\n",
-      "- epymorph.adrio.lodes.Commuters will download 32.8 MB and write 32.8 MB to disk\n",
+      "- epymorph.adrio.acs5.Population (no estimate available)\n",
+      "- epymorph.adrio.lodes.Commuters will be pulled from cache\n",
       "In total we will:\n",
-      "- Download 32.8 MB, taking 32 seconds (assuming 1.0 MB/s)\n",
-      "- Write 32.8 MB to disk cache (you have 249.2 GB free space)\n"
+      "- Download no additional data\n",
+      "- Write no new data to disk cache\n"
      ]
     }
    ],
    "source": [
+    "import numpy as np\n",
+    "\n",
     "from epymorph.geography.us_census import StateScope\n",
     "from epymorph import *\n",
     "from epymorph.adrio import acs5\n",
-    "from epymorph.simulator.data import evaluate_param\n",
     "from epymorph.adrio import lodes\n",
+    "from epymorph.data.ipm.no import No as NoIpm\n",
+    "from epymorph.data.mm.no import No as NoMm\n",
     "\n",
     "\n",
-    "state_scope = StateScope.in_states_by_code([\"AZ\", \"CO\", \"NV\", \"NM\"])\n",
+    "state_scope = StateScope.in_states([\"AZ\", \"CO\", \"NV\", \"NM\"], 2020)\n",
     "time_period = 2015\n",
-    "geoids = state_scope.get_node_ids()\n",
+    "geoids = state_scope.node_ids\n",
+    "\n",
+    "commuters_adrio = lodes.Commuters(time_period)\n",
     "\n",
     "rume = SingleStrataRume.build(\n",
-    "    ipm_library[\"no\"](),\n",
-    "    mm_library[\"no\"](),\n",
-    "    init.NoInfection(),\n",
+    "    ipm=NoIpm(),\n",
+    "    mm=NoMm(),\n",
+    "    init=init.NoInfection(),\n",
     "    scope=state_scope,\n",
     "    time_frame=TimeFrame.year(time_period),\n",
-    "    params={\"population\": acs5.Population(), \"commuters\": lodes.Commuters(time_period)},\n",
+    "    params={\n",
+    "        \"population\": acs5.Population(),\n",
+    "        \"commuters\": commuters_adrio,\n",
+    "    },\n",
     ")\n",
     "\n",
     "rume.estimate_data()"
@@ -98,18 +107,18 @@
      "output_type": "stream",
      "text": [
       "Loading epymorph.adrio.lodes.Commuters:\n",
-      "  |####################| 100%  (87.504s)\n"
+      "  |####################| 100%  (7.113s)\n"
      ]
     }
    ],
    "source": [
     "with sim_messaging():\n",
-    "    commuters = evaluate_param(rume, \"commuters\")"
+    "    commuters = commuters_adrio.with_context(scope=state_scope).evaluate()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -209,9 +218,10 @@
      "output_type": "stream",
      "text": [
       "ADRIO data usage estimation:\n",
-      "- epymorph.adrio.lodes.CommutersByAge will download 0 Bytes and write 0 Bytes to disk\n",
-      "- epymorph.adrio.lodes.CommutersByAge will not require any additional data\n",
-      "- epymorph.adrio.lodes.CommutersByAge will not require any additional data\n",
+      "- epymorph.adrio.acs5.Population (no estimate available)\n",
+      "- epymorph.adrio.lodes.CommutersByAge will be pulled from cache\n",
+      "- epymorph.adrio.lodes.CommutersByAge will be pulled from cache\n",
+      "- epymorph.adrio.lodes.CommutersByAge will be pulled from cache\n",
       "In total we will:\n",
       "- Download no additional data\n",
       "- Write no new data to disk cache\n"
@@ -222,24 +232,27 @@
     "from epymorph.geography.us_census import CountyScope\n",
     "from epymorph import *\n",
     "from epymorph.adrio import acs5\n",
-    "from epymorph.simulator.data import evaluate_param\n",
     "from epymorph.adrio import lodes\n",
     "\n",
     "time_period = 2015\n",
-    "county_scope = CountyScope.in_counties([\"04013\", \"08041\", \"32003\", \"35001\"])\n",
-    "geoids = county_scope.get_node_ids()\n",
+    "county_scope = CountyScope.in_counties([\"04013\", \"08041\", \"32003\", \"35001\"], year=2020)\n",
+    "geoids = county_scope.node_ids\n",
+    "\n",
+    "commuters_29_under_adrio = lodes.CommutersByAge(time_period, \"29 and Under\")\n",
+    "commuters_30_54_adrio = lodes.CommutersByAge(time_period, \"30_54\")\n",
+    "commuters_55_over_adrio = lodes.CommutersByAge(time_period, \"55 and Over\")\n",
     "\n",
     "rume = SingleStrataRume.build(\n",
-    "    ipm_library[\"no\"](),\n",
-    "    mm_library[\"no\"](),\n",
-    "    init.NoInfection(),\n",
+    "    ipm=NoIpm(),\n",
+    "    mm=NoMm(),\n",
+    "    init=init.NoInfection(),\n",
     "    scope=county_scope,\n",
     "    time_frame=TimeFrame.year(time_period),\n",
     "    params={\n",
     "        \"population\": acs5.Population(),\n",
-    "        \"commuters_29_under\": lodes.CommutersByAge(time_period, \"29 and Under\"),\n",
-    "        \"commuters_30_54\": lodes.CommutersByAge(time_period, \"30_54\"),\n",
-    "        \"commuters_55_over\": lodes.CommutersByAge(time_period, \"55 and Over\"),\n",
+    "        \"commuters_29_under\": commuters_29_under_adrio,\n",
+    "        \"commuters_30_54\": commuters_30_54_adrio,\n",
+    "        \"commuters_55_over\": commuters_55_over_adrio,\n",
     "    },\n",
     ")\n",
     "\n",
@@ -256,19 +269,23 @@
      "output_type": "stream",
      "text": [
       "Loading epymorph.adrio.lodes.CommutersByAge:\n",
-      "  |####################| 100%  (11.563s)\n",
+      "  |####################| 100%  (5.839s)\n",
       "Loading epymorph.adrio.lodes.CommutersByAge:\n",
-      "  |####################| 100%  (18.080s)\n",
+      "  |####################| 100%  (5.998s)\n",
       "Loading epymorph.adrio.lodes.CommutersByAge:\n",
-      "  |####################| 100%  (14.555s)\n"
+      "  |####################| 100%  (5.848s)\n"
      ]
     }
    ],
    "source": [
     "with sim_messaging():\n",
-    "    commuters_29_under = evaluate_param(rume, \"commuters_29_under\")\n",
-    "    commuters_30_54 = evaluate_param(rume, \"commuters_30_54\")\n",
-    "    commuters_55_over = evaluate_param(rume, \"commuters_55_over\")"
+    "    commuters_29_under = commuters_29_under_adrio.with_context(\n",
+    "        scope=county_scope\n",
+    "    ).evaluate()\n",
+    "    commuters_30_54 = commuters_30_54_adrio.with_context(scope=county_scope).evaluate()\n",
+    "    commuters_55_over = commuters_55_over_adrio.with_context(\n",
+    "        scope=county_scope\n",
+    "    ).evaluate()"
    ]
   },
   {
@@ -287,16 +304,16 @@
       " [   119     26     41  55536]]\n",
       "\n",
       "Commuters between ages 30 and 54:\n",
-      " [[922520     20    817    354]\n",
-      " [    19 104126     27     37]\n",
-      " [   656      7 493523     72]\n",
-      " [   230     38    120 135000]]\n",
+      " [[369848     11    287    195]\n",
+      " [    12  43020      9     29]\n",
+      " [   245     11 186798     48]\n",
+      " [   119     26     41  55536]]\n",
       "\n",
       "Commuters ages 55 and over:\n",
-      " [[337141     10    324    151]\n",
-      " [    18  42355      9     16]\n",
-      " [   325      3 182295     53]\n",
-      " [    59     13     56  56352]]\n",
+      " [[369848     11    287    195]\n",
+      " [    12  43020      9     29]\n",
+      " [   245     11 186798     48]\n",
+      " [   119     26     41  55536]]\n",
       "\n"
      ]
     }
@@ -324,7 +341,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.0rc1"
+   "version": "3.11.10"
   }
  },
  "nbformat": 4,