Add import_rows to Query API (#78)

LabKey · Dec 3, 2024 · f2037cd · f2037cd
1 parent c2f2cda
commit f2037cd
Show file tree

Hide file tree

Showing 6 changed files with 187 additions and 5 deletions.
diff --git a/CHANGE.txt b/CHANGE.txt
@@ -2,6 +2,13 @@
 LabKey Python Client API News
 +++++++++++
 
+What's New in the LabKey 3.3.0 package
+==============================
+
+*Release date: 12/3/2024*
+- Add import_rows API to query module
+    - Accessible via API wrappers e.g. api.query.import_rows
+
 What's New in the LabKey 3.2.0 package
 ==============================
 

diff --git a/labkey/__init__.py b/labkey/__init__.py
@@ -14,6 +14,6 @@
 # limitations under the License.
 #
 __title__ = "labkey"
-__version__ = "3.2.0"
+__version__ = "3.3.0"
 __author__ = "LabKey"
 __license__ = "Apache License 2.0"
diff --git a/labkey/domain.py b/labkey/domain.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 #
 import functools
-from typing import Dict, List, Union, Tuple
+from typing import Dict, List, Union, Tuple, TextIO
 
 from .server_context import ServerContext
 from labkey.query import QueryFilter
@@ -483,7 +483,7 @@ def get_domain_details(
 
 
 def infer_fields(
-    server_context: ServerContext, data_file: any, container_path: str = None
+    server_context: ServerContext, data_file: TextIO, container_path: str = None
 ) -> List[PropertyDescriptor]:
     """
     Infer fields for a domain from a file

diff --git a/labkey/query.py b/labkey/query.py
@@ -41,7 +41,7 @@
 ############################################################################
 """
 import functools
-from typing import List
+from typing import List, TextIO
 
 from .server_context import ServerContext
 from .utils import waf_encode
@@ -357,6 +357,56 @@ def insert_rows(
     )
 
 
+def import_rows(
+    server_context: ServerContext,
+    schema_name: str,
+    query_name: str,
+    data_file: TextIO,
+    container_path: str = None,
+    insert_option: str = None,
+    audit_behavior: str = None,
+    import_lookup_by_alternate_key: bool = False,
+):
+    """
+    Import row(s) into a table
+    :param server_context: A LabKey server context. See utils.create_server_context.
+    :param schema_name: schema of table
+    :param query_name: table name to import into
+    :param data_file: the file containing the rows to import. The column names in the file must match the column names
+    from the LabKey server.
+    :param container_path: labkey container path if not already set in context
+    :param insert_option: Whether the import action should be done as an insert, creating new rows for each provided row
+    of the data frame, or a merge. When merging during import, any data you provide for the rows representing records
+    that already exist will replace the previous values. Note that when updating an existing record, you only need to
+    provide the columns you wish to update, existing data for other columns will be left as is. Available options are
+    "INSERT" and "MERGE". Defaults to "INSERT".
+    :param audit_behavior: Set the level of auditing details for this import action. Available options are "SUMMARY" and
+    "DETAILED". SUMMARY - Audit log reflects that a change was made, but does not mention the nature of the change.
+    DETAILED - Provides full details on what change was made, including values before and after the change. Defaults to
+    the setting as specified by the LabKey query.
+    :param import_lookup_by_alternate_key: Allows lookup target rows to be resolved by values rather than the target's
+    primary key. This option will only be available for lookups that are configured with unique column information
+    :return:
+    """
+    url = server_context.build_url("query", "import.api", container_path=container_path)
+    file_payload = {"file": data_file}
+    payload = {
+        "schemaName": schema_name,
+        "queryName": query_name,
+    }
+
+    if insert_option is not None:
+        payload["insertOption"] = insert_option
+
+    if audit_behavior is not None:
+        payload["auditBehavior"] = audit_behavior
+
+    if import_lookup_by_alternate_key is not None:
+        payload["importLookupByAlternateKey"] = import_lookup_by_alternate_key
+
+    return server_context.make_request(url, payload, method="POST", file_payload=file_payload)
+
+
 def select_rows(
     server_context: ServerContext,
     schema_name: str,
@@ -654,6 +704,28 @@ def insert_rows(
             timeout,
         )
 
+    @functools.wraps(import_rows)
+    def import_rows(
+        self,
+        schema_name: str,
+        query_name: str,
+        data_file,
+        container_path: str = None,
+        insert_option: str = None,
+        audit_behavior: str = None,
+        import_lookup_by_alternate_key: bool = False,
+    ):
+        return import_rows(
+            self.server_context,
+            schema_name,
+            query_name,
+            data_file,
+            container_path,
+            insert_option,
+            audit_behavior,
+            import_lookup_by_alternate_key,
+        )
+
     @functools.wraps(select_rows)
     def select_rows(
         self,

diff --git a/labkey/server_context.py b/labkey/server_context.py
@@ -1,3 +1,4 @@
+from typing import Dict, TextIO
 from labkey.utils import json_dumps
 from . import __version__
 import requests
@@ -176,7 +177,7 @@ def make_request(
         timeout: int = 300,
         method: str = "POST",
         non_json_response: bool = False,
-        file_payload: any = None,
+        file_payload: Dict[str, TextIO] = None,
         json: dict = None,
         allow_redirects=False,
     ) -> any:

diff --git a/test/integration/test_query.py b/test/integration/test_query.py
@@ -157,3 +157,105 @@ def test_cannot_delete_qc_state_in_use(api: APIWrapper, qc_states, study, datase
     # now clean up/stop using it
     dataset_row_to_remove = [{"lsid": inserted_lsid}]
     api.query.delete_rows(SCHEMA_NAME, QUERY_NAME, dataset_row_to_remove)
+
+LISTS_SCHEMA = "lists"
+PARENT_LIST_NAME = "parent_list"
+PARENT_LIST_DEFINITION = {
+    "kind": "IntList",
+    "domainDesign": {
+        "name": PARENT_LIST_NAME,
+        "fields": [
+            {"name": "rowId", "rangeURI": "int"},
+            {
+                "name": "name",
+                "rangeURI": "string",
+                "required": True,
+            },
+        ],
+    },
+    "indices": {
+        "columnNames": ["name"],
+        "unique": True,
+    },
+    "options": {"keyName": "rowId", "keyType": "AutoIncrementInteger"},
+}
+CHILD_LIST_NAME = "child_list"
+CHILD_LIST_DEFINITION = {
+    "kind": "IntList",
+    "domainDesign": {
+        "name": CHILD_LIST_NAME,
+        "fields": [
+            {"name": "rowId", "rangeURI": "int"},
+            {
+                "name": "name",
+                "rangeURI": "string",
+                "required": True,
+            },
+            {
+                "name": "parent",
+                "lookupQuery": "parent_list",
+                "lookupSchema": "lists",
+                "rangeURI": "int",
+            },
+        ],
+    },
+    "options": {"keyName": "rowId", "keyType": "AutoIncrementInteger"},
+}
+
+parent_data = """name
+parent_one
+parent_two
+parent_three
+"""
+
+child_data = """name,parent
+child_one,parent_one
+child_two,parent_two
+child_three,parent_three
+"""
+
+@pytest.fixture
+def parent_list_fixture(api: APIWrapper):
+    api.domain.create(PARENT_LIST_DEFINITION)
+    created_list = api.domain.get(LISTS_SCHEMA, PARENT_LIST_NAME)
+    yield created_list
+    # clean up
+    api.domain.drop(LISTS_SCHEMA, PARENT_LIST_NAME)
+
+
+@pytest.fixture
+def child_list_fixture(api: APIWrapper):
+    api.domain.create(CHILD_LIST_DEFINITION)
+    created_list = api.domain.get(LISTS_SCHEMA, CHILD_LIST_NAME)
+    yield created_list
+    # clean up
+    api.domain.drop(LISTS_SCHEMA, CHILD_LIST_NAME)
+
+
+def test_import_rows(api: APIWrapper, parent_list_fixture, child_list_fixture, tmpdir):
+    parent_data_path = tmpdir.join("parent_data.csv")
+    parent_data_path.write(parent_data)
+    child_data_path = tmpdir.join("child_data.csv")
+    child_data_path.write(child_data)
+
+    # Should succeed
+    parent_file = parent_data_path.open()
+    resp = api.query.import_rows("lists", PARENT_LIST_NAME, data_file=parent_file)
+    parent_file.close()
+    assert resp["success"] == True
+    assert resp["rowCount"] == 3
+
+    # Should fail, because data doesn't use rowIds and import_lookup_by_alternate_key defaults to False
+    child_file = child_data_path.open()
+    resp = api.query.import_rows("lists", CHILD_LIST_NAME, data_file=child_file)
+    child_file.close()
+    assert resp["success"] == False
+    assert resp["errorCount"] == 1
+    assert resp["errors"][0]["exception"] == "Could not convert value 'parent_one' (String) for Integer field 'parent'"
+
+    # Should pass, because import_lookup_by_alternate_key is True
+    child_file = child_data_path.open()
+    resp = api.query.import_rows("lists", CHILD_LIST_NAME, data_file=child_file, import_lookup_by_alternate_key=True)
+    child_file.close()
+    assert resp["success"] == True
+    assert resp["rowCount"] == 3