HumanSignal
diff --git a/‎.github/workflows/apply-linters.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/apply-linters.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/docker-build-ontop.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/docker-build-ontop.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/docker-build.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/docker-build.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/docker-release-promote.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/docker-release-promote.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/tests.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/tests.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/guide/install_enterprise_docker.md
Lines changed: 3 additions & 0 deletions b/‎docs/source/guide/install_enterprise_docker.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/source/guide/install_enterprise_k8s.md
Lines changed: 3 additions & 0 deletions b/‎docs/source/guide/install_enterprise_k8s.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/source/tags/pdf.md
Lines changed: 49 additions & 0 deletions b/‎docs/source/tags/pdf.md
Lines changed: 49 additions & 0 deletions
diff --git a/‎docs/source/templates/pdf_classification.md
Lines changed: 5 additions & 5 deletions b/‎docs/source/templates/pdf_classification.md
Lines changed: 5 additions & 5 deletions
diff --git a/‎label_studio/annotation_templates/structured-data-parsing/pdf-classification/config.xml
Lines changed: 1 addition & 1 deletion b/‎label_studio/annotation_templates/structured-data-parsing/pdf-classification/config.xml
Lines changed: 1 addition & 1 deletion
diff --git a/‎label_studio/annotation_templates/structured-data-parsing/pdf-classification/config.yml
Lines changed: 2 additions & 2 deletions b/‎label_studio/annotation_templates/structured-data-parsing/pdf-classification/config.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎label_studio/core/settings/base.py
Lines changed: 1 addition & 0 deletions b/‎label_studio/core/settings/base.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎label_studio/data_manager/actions/remove_duplicates.py
Lines changed: 2 additions & 0 deletions b/‎label_studio/data_manager/actions/remove_duplicates.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎label_studio/feature_flags.json
Lines changed: 27 additions & 0 deletions b/‎label_studio/feature_flags.json
Lines changed: 27 additions & 0 deletions
diff --git a/‎label_studio/io_storages/azure_blob/models.py
Lines changed: 3 additions & 3 deletions b/‎label_studio/io_storages/azure_blob/models.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎label_studio/io_storages/base_models.py
Lines changed: 24 additions & 15 deletions b/‎label_studio/io_storages/base_models.py
Lines changed: 24 additions & 15 deletions
diff --git a/‎label_studio/io_storages/gcs/models.py
Lines changed: 3 additions & 3 deletions b/‎label_studio/io_storages/gcs/models.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎label_studio/io_storages/localfiles/models.py
Lines changed: 5 additions & 5 deletions b/‎label_studio/io_storages/localfiles/models.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎label_studio/io_storages/redis/models.py
Lines changed: 3 additions & 2 deletions b/‎label_studio/io_storages/redis/models.py
Lines changed: 3 additions & 2 deletions
@@ -24,7 +24,7 @@ jobs:
           ref: ${{ inputs.branch_name }}
 
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: '3.12'
 
 
@@ -110,7 +110,7 @@ jobs:
             ${{ steps.calculate-docker-tags.outputs.docker-tags }}
 
       - name: Push Docker image
-        uses: docker/build-push-action@v6.16.0
+        uses: docker/build-push-action@v6.17.0
         id: docker_build_and_push
         with:
           context: .
 
@@ -130,7 +130,7 @@ jobs:
             type=raw,value=${{ steps.version.outputs.build_version }}
 
       - name: Push Docker image
-        uses: docker/build-push-action@v6.16.0
+        uses: docker/build-push-action@v6.17.0
         id: docker_build_and_push
         with:
           context: .
 
@@ -195,7 +195,7 @@ jobs:
             ${{ steps.generate-tags.outputs.ubuntu-tags }}
 
       - name: Build and Push Release Ubuntu Docker image
-        uses: docker/build-push-action@v6.16.0
+        uses: docker/build-push-action@v6.17.0
         id: docker_build
         with:
           context: ${{ steps.release_dockerfile.outputs.release_dir }}
 
@@ -140,7 +140,7 @@ jobs:
 
       - name: Upload coverage to Codecov
         if: ${{ github.event.pull_request.head.repo.fork == false && github.event.pull_request.user.login != 'dependabot[bot]' }}
-        uses: codecov/codecov-action@v5.4.2
+        uses: codecov/codecov-action@v5.4.3
         with:
           name: codecov-python-${{ matrix.python-version }}
           flags: pytests
 
@@ -19,6 +19,9 @@ See [Secure Label Studio](security.html) for more details about security and har
 
 To install Label Studio Community Edition, see [Install Label Studio](https://labelstud.io/guide/install). This page is specific to the Enterprise version of Label Studio.
 
+!!! note
+    On-prem deployments of Label Studio Enterprise are not supported for Academic licenses.  
+
 {% insertmd includes/deploy.md %}
 
 ## Install Label Studio Enterprise using Docker
 
@@ -23,6 +23,9 @@ Your Kubernetes cluster can be self-hosted or installed somewhere such as Amazon
 
 </div>
 
+!!! note
+    On-prem deployments of Label Studio Enterprise are not supported for Academic licenses. 
+
 This high-level architecture diagram that outlines the main components of a Label Studio Enterprise deployment.
 
 <img src="/images/LSE_k8s_scheme.png"/>
 
@@ -0,0 +1,49 @@
+---
+title: PDF
+type: tags
+order: 302
+meta_title: PDF Tag for loading PDF documents
+meta_description: Label Studio PDF Tag for loading PDF documents for machine learning and data science projects.
+---
+
+The `Pdf` tag displays a PDF document for labeling. Use for performing document-level annotations, transcription, and summarization.
+
+Use with the following data types: PDF.
+
+### Parameters
+
+| Param | Type | Default | Description |
+| --- | --- | --- | --- |
+| name | <code>string</code> |  | Name of the element |
+| value | <code>string</code> |  | Value of the element - field name to retrieve the PDF URL from |
+
+### Supported Control tags
+Document-level annotations are supported with Pdf tag, for example:
+
+- Document classification with [Choices](/tags/choices.html)
+- Document rating with [Rating](/tags/rating.html)
+- Transcription and summarization with [TextArea](/tags/textarea.html)
+
+### Example
+
+Labeling configuration to label PDF documents:
+
+```html
+<View>
+  <Pdf name="pdf" value="$pdf" />
+  <Choices name="choices" toName="pdf">
+    <Choice value="Legal" />
+    <Choice value="Financial" />
+    <Choice value="Technical" />
+  </Choices>
+</View>
+```
+
+**Example Input data:**
+
+```json
+{
+  "pdf": "https://app.humansignal.com/static/samples/sample.pdf"
+}
+```
+
@@ -26,11 +26,11 @@ If you want to perform PDF classification, use this template. This template prom
         <Choice value="Important article"/>
         <Choice value="Yellow press"/>
     </Choices>
-    <HyperText name="pdf" value="$pdf" inline="true"/>
+    <Pdf name="pdf" value="$pdf"/>
 </View>
 
 <!-- {
-    "pdf": "<embed src='https://app.heartex.ai/static/samples/sample.pdf' width='100%' height='600px'/>"
+    "pdf": "/static/samples/sample.pdf"
 } -->
 ```
 
@@ -56,9 +56,9 @@ Use the [Choices](/tags/choices.html) control tag to present classification opti
   </Choices>
 ```
 
-Use the [HyperText](/tags/hypertext.html) tag to render an inline version of the PDF data:
+Use the [Pdf](/tags/pdf.html) tag to render an inline version of the PDF data:
 ```xml
-<HyperText name="pdf" value="$pdf" inline="true"/>
+<Pdf name="pdf" value="$pdf"/>
 ```
 
 ### Input data
@@ -74,4 +74,4 @@ Label Studio does not support labeling PDF-formatted files directly. You should
 ## Related tags
 - [Rating](/tags/rating.html)
 - [Choices](/tags/choices.html)
-- [HyperText](/tags/hypertext.html)
+- [Pdf](/tags/pdf.html)
@@ -6,7 +6,7 @@
     <Choice value="Important article"/>
     <Choice value="Yellow press"/>
   </Choices>
-  <HyperText name="pdf" value="$pdf" inline="true"/>
+  <Pdf name="pdf" value="$pdf"/>
 </View>
 
 
 
@@ -12,13 +12,13 @@ config: |
       <Choice value="Important article"/>
       <Choice value="Yellow press"/>
     </Choices>
-    <HyperText name="pdf" value="$pdf" inline="true"/>
+    <Pdf name="pdf" value="$pdf"/>
   </View>
 
 
   <!-- {
     "data": {
-      "pdf": "<embed src='/static/samples/sample.pdf' width='100%' height='600px'/>"
+      "pdf": "/static/samples/sample.pdf"
     }
   } -->
 
@@ -494,6 +494,7 @@
         '.mp4',
         '.webm',
         '.webp',
+        '.pdf',
     ]
 )
 
 
@@ -172,6 +172,8 @@ def restore_storage_links_for_duplicated_tasks(duplicates) -> None:
                     link = storage_link_class(
                         task_id=task['id'],
                         key=link_instance.key,
+                        row_index=link_instance.row_index,
+                        row_group=link_instance.row_group,
                         storage=link_instance.storage,
                     )
                     link.save()
 
@@ -3120,6 +3120,33 @@
       "version": 2,
       "deleted": false
     },
+    "fflag_feat_root_11_support_jsonl_cloud_storage": {
+      "key": "fflag_feat_root_11_support_jsonl_cloud_storage",
+      "on": false,
+      "prerequisites": [],
+      "targets": [],
+      "contextTargets": [],
+      "rules": [],
+      "fallthrough": {
+        "variation": 0
+      },
+      "offVariation": 1,
+      "variations": [
+        true,
+        false
+      ],
+      "clientSideAvailability": {
+        "usingMobileKey": false,
+        "usingEnvironmentId": false
+      },
+      "clientSide": false,
+      "salt": "85e018dcd2e64c689a61ee7ed3c5edb2",
+      "trackEvents": false,
+      "trackEventsFallthrough": false,
+      "debugEventsUntilDate": null,
+      "version": 2,
+      "deleted": false
+    },
     "fflag_feature_all_optic_1421_cold_start_v2": {
       "key": "fflag_feature_all_optic_1421_cold_start_v2",
       "on": false,
 
@@ -209,17 +209,17 @@ def iterkeys(self):
                 continue
             yield file.name
 
-    def get_data(self, key) -> list[dict]:
+    def get_data(self, key) -> Union[dict, list[dict]]:
         if self.use_blob_urls:
             data_key = settings.DATA_UNDEFINED_NAME
-            return [{data_key: f'{self.url_scheme}://{self.container}/{key}'}]
+            return {data_key: f'{self.url_scheme}://{self.container}/{key}'}
 
         container = self.get_container()
         blob = container.download_blob(key)
         blob_str = blob.content_as_text()
         value = json.loads(blob_str)
         if isinstance(value, dict):
-            return [value]
+            return value
         elif isinstance(value, list):
             for idx, item in enumerate(value):
                 if not isinstance(item, dict):
 
@@ -341,7 +341,7 @@ def _scan_and_create_links_v2(self):
         raise NotImplementedError
 
     @classmethod
-    def add_task(cls, data, project, maximum_annotations, max_inner_id, storage, key, link_class):
+    def add_task(cls, data, project, maximum_annotations, max_inner_id, storage, key, row_index, link_class):
         # predictions
         predictions = data.get('predictions', [])
         if predictions:
@@ -375,8 +375,8 @@ def add_task(cls, data, project, maximum_annotations, max_inner_id, storage, key
                 inner_id=max_inner_id,
             )
 
-            link_class.create(task, key, storage)
-            logger.debug(f'Create {storage.__class__.__name__} link with key={key} for task={task}')
+            link_class.create(task, key, storage, row_index=row_index)
+            logger.debug(f'Create {storage.__class__.__name__} link with {key=} and {row_index=} for {task=}')
 
             raise_exception = not flag_set(
                 'ff_fix_back_dev_3342_storage_scan_with_invalid_annotations', user=AnonymousUser()
@@ -423,10 +423,10 @@ def _scan_and_create_links(self, link_class):
             logger.debug('Scanning key %s', key)
             self.info_update_progress(last_sync_count=tasks_created, tasks_existed=tasks_existed)
 
-            # skip if task already exists
-            if link_class.exists(key, self):
+            # skip if key has already been synced
+            if n_tasks_linked := link_class.n_tasks_linked(key, self):
                 logger.debug('%s link %s already exists', self.__class__.__name__, key)
-                tasks_existed += 1  # update progress counter
+                tasks_existed += n_tasks_linked  # update progress counter
                 continue
 
             logger.debug('%s: found new key %s', self.__class__.__name__, key)
@@ -441,13 +441,20 @@ def _scan_and_create_links(self, link_class):
                 )
                 continue
 
-            if not flag_set('fflag_feat_dia_2092_multitasks_per_storage_link'):
-                tasks_data = tasks_data[:1]
+            if isinstance(tasks_data, dict):
+                tasks_data = [tasks_data]
+                row_indices = [None]
+            else:
+                if not flag_set('fflag_feat_dia_2092_multitasks_per_storage_link'):
+                    tasks_data = tasks_data[:1]
+                row_indices = range(len(tasks_data))
 
-            for task_data in tasks_data:
+            for row_index, task_data in zip(row_indices, tasks_data):
                 # TODO: batch this loop body with add_task -> add_tasks in a single bulk write.
-                # Also have to handle any mismatch between len(tasks_data) and settings.WEBHOOK_BATCH_SIZE
-                task = self.add_task(task_data, self.project, maximum_annotations, max_inner_id, self, key, link_class)
+                # See DIA-2062 for prerequisites
+                task = self.add_task(
+                    task_data, self.project, maximum_annotations, max_inner_id, self, key, row_index, link_class
+                )
                 max_inner_id += 1
 
                 # update progress counters for storage info
@@ -702,12 +709,14 @@ class ImportStorageLink(models.Model):
     row_index = models.IntegerField(null=True, blank=True, help_text='Parquet row index, or JSON[L] object index')
 
     @classmethod
-    def exists(cls, key, storage):
-        return cls.objects.filter(key=key, storage=storage.id).exists()
+    def n_tasks_linked(cls, key, storage):
+        return cls.objects.filter(key=key, storage=storage.id).count()
 
     @classmethod
-    def create(cls, task, key, storage):
-        link, created = cls.objects.get_or_create(task_id=task.id, key=key, storage=storage, object_exists=True)
+    def create(cls, task, key, storage, row_index=None, row_group=None):
+        link, created = cls.objects.get_or_create(
+            task_id=task.id, key=key, row_index=row_index, row_group=row_group, storage=storage, object_exists=True
+        )
         return link
 
     class Meta:
 
@@ -180,17 +180,17 @@ def iterkeys(self):
             return_key=True,
         )
 
-    def get_data(self, key) -> list[dict]:
+    def get_data(self, key) -> Union[dict, list[dict]]:
         if self.use_blob_urls:
-            return [{settings.DATA_UNDEFINED_NAME: GCS.get_uri(self.bucket, key)}]
+            return {settings.DATA_UNDEFINED_NAME: GCS.get_uri(self.bucket, key)}
         data = GCS.read_file(
             client=self.get_client(),
             bucket_name=self.bucket,
             key=key,
             convert_to=GCS.ConvertBlobTo.JSON,
         )
         if isinstance(data, dict):
-            return [data]
+            return data
         elif isinstance(data, list):
             for idx, item in enumerate(data):
                 if not isinstance(item, dict):
 
@@ -78,16 +78,16 @@ def iterkeys(self):
                     continue
                 yield str(file)
 
-    def get_data(self, key) -> list[dict]:
+    def get_data(self, key) -> dict | list[dict]:
         path = Path(key)
         if self.use_blob_urls:
             # include self-hosted links pointed to local resources via
             # {settings.HOSTNAME}/data/local-files?d=<path/to/local/dir>
             document_root = Path(settings.LOCAL_FILES_DOCUMENT_ROOT)
             relative_path = str(path.relative_to(document_root))
-            return [
-                {settings.DATA_UNDEFINED_NAME: f'{settings.HOSTNAME}/data/local-files/?d={quote(str(relative_path))}'}
-            ]
+            return {
+                settings.DATA_UNDEFINED_NAME: f'{settings.HOSTNAME}/data/local-files/?d={quote(str(relative_path))}'
+            }
 
         try:
             with open(path, encoding='utf8') as f:
@@ -99,7 +99,7 @@ def get_data(self, key) -> list[dict]:
             )
 
         if isinstance(value, dict):
-            return [value]
+            return value
         elif isinstance(value, list):
             for idx, item in enumerate(value):
                 if not isinstance(item, dict):
 
@@ -3,6 +3,7 @@
 
 import json
 import logging
+from typing import Union
 
 import redis
 from django.db import models
@@ -89,7 +90,7 @@ def iterkeys(self):
         for key in client.keys(path + '*'):
             yield key
 
-    def get_data(self, key) -> list[dict]:
+    def get_data(self, key) -> Union[dict, list[dict]]:
         client = self.get_client()
         value_str = client.get(key)
         if not value_str:
@@ -98,7 +99,7 @@ def get_data(self, key) -> list[dict]:
             value = json.loads(value_str)
             # NOTE: this validation did not previously exist, we were accepting any JSON values
             if isinstance(value, dict):
-                return [value]
+                return value
             elif isinstance(value, list):
                 for idx, item in enumerate(value):
                     if not isinstance(item, dict):
Original file line number	Diff line number	Diff line change
`@@ -494,6 +494,7 @@`
`494`	`494`	`'.mp4',`
`495`	`495`	`'.webm',`
`496`	`496`	`'.webp',`
	`497`	`+ '.pdf',`
`497`	`498`	`]`
`498`	`499`	`)`
`499`	`500`
Original file line number	Diff line number	Diff line change
`@@ -172,6 +172,8 @@ def restore_storage_links_for_duplicated_tasks(duplicates) -> None:`
`172`	`172`	`link = storage_link_class(`
`173`	`173`	`task_id=task['id'],`
`174`	`174`	`key=link_instance.key,`
	`175`	`+ row_index=link_instance.row_index,`
	`176`	`+ row_group=link_instance.row_group,`
`175`	`177`	`storage=link_instance.storage,`
`176`	`178`	`)`
`177`	`179`	`link.save()`