opentensor · steffencruz · Dec 20, 2023 · Dec 14, 2023 · Dec 14, 2023 · Dec 14, 2023
diff --git a/README.md b/README.md
@@ -86,7 +86,7 @@ Before you proceed with the installation of the subnet, note the following:
 - `neurons/miner.py`: Use `pytesseract` for OCR, and use `OCRSynapse` to communicate with validator
 
 ### Remaining changes to be done
-In addition to the above files, we would also update the following files:
+In addition to the above files, we have also updated the following files:
 - `README.md`: This file contains the documentation for your project. Update this file to reflect your project's documentation.
 - `CONTRIBUTING.md`: This file contains the instructions for contributing to your project. Update this file to reflect your project's contribution guidelines.
 - `template/__init__.py`: This file contains the version of your project.

diff --git a/neurons/miner.py b/neurons/miner.py
@@ -1,7 +1,5 @@
 # The MIT License (MIT)
 # Copyright © 2023 Yuma Rao
-# TODO(developer): Set your name
-# Copyright © 2023 <your name>
 
 # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 # documentation files (the “Software”), to deal in the Software without restriction, including without limitation
@@ -22,18 +20,16 @@
 import bittensor as bt
 import pytesseract
 
-# Bittensor Miner Template:
+# Bittensor OCR Miner
 import ocr_subnet
 
-from ocr_subnet.utils.serialize import deserialize_image
-
 # import base miner class which takes care of most of the boilerplate
 from ocr_subnet.base.miner import BaseMinerNeuron
 
 
 class Miner(BaseMinerNeuron):
     """
-    Your miner neuron class. You should use this class to define your miner's behavior. In particular, you should replace the forward function with your own logic. You may also want to override the blacklist and priority functions according to your needs.
+    OCR miner neuron class. You may also want to override the blacklist and priority functions according to your needs.
 
     This class inherits from the BaseMinerNeuron class, which in turn inherits from BaseNeuron. The BaseNeuron class takes care of routine tasks such as setting up wallet, subtensor, metagraph, logging directory, parsing config, etc. You can override any of the methods in BaseNeuron if you need to customize the behavior.
 
@@ -45,6 +41,7 @@ def __init__(self, config=None):
 
         # TODO(developer): Anything specific to your use case you can do here
 
+
     async def forward(
         self, synapse: ocr_subnet.protocol.OCRSynapse
     ) -> ocr_subnet.protocol.OCRSynapse:
@@ -58,27 +55,35 @@ async def forward(
             ocr_subnet.protocol.OCRSynapse: The synapse object with the 'response' field set to the extracted data.
 
         """
+        # Get image data
+        image = ocr_subnet.utils.image.deserialize(base64_string=synapse.base64_image)
 
-        image = deserialize_image(base64_string=synapse.base64_image)
         # Use pytesseract to get the data
         data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
 
         response = []
-        # Loop over each item in the 'text' part of the data
         for i in range(len(data['text'])):
             if data['text'][i].strip() != '':  # This filters out empty text results
                 x1, y1, width, height = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
+                if width * height < 10:  # This filters out small boxes (likely noise)
+                    continue
+
                 x2, y2 = x1 + width, y1 + height
 
                 # Here we don't have font information, so we'll omit that.
                 # Pytesseract does not extract font family or size information.
                 entry = {
-                    'index': i,
                     'position': [x1, y1, x2, y2],
                     'text': data['text'][i]
                 }
                 response.append(entry)
 
+        # Merge together words into sections, which are on the same line (same y value) and are close together (small distance in x)
+        response = ocr_subnet.utils.process.group_and_merge_boxes(response)
+
+        # Sort sections by y, then sort by x so that they read left to right and top to bottom
+        response = sorted(response, key=lambda item: (item['position'][1], item['position'][0]))
+
         # Attach response to synapse and return it.
         synapse.response = response
 

diff --git a/neurons/validator.py b/neurons/validator.py
@@ -1,7 +1,5 @@
 # The MIT License (MIT)
 # Copyright © 2023 Yuma Rao
-# TODO(developer): Set your name
-# Copyright © 2023 <your name>
 
 # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 # documentation files (the “Software”), to deal in the Software without restriction, including without limitation
@@ -17,21 +15,20 @@
 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
-
+import os
 import time
-
-# Bittensor
+import hashlib
 import bittensor as bt
 
-from ocr_subnet.validator import forward
+import ocr_subnet
 
 # import base validator class which takes care of most of the boilerplate
 from ocr_subnet.base.validator import BaseValidatorNeuron
 
 
 class Validator(BaseValidatorNeuron):
     """
-    Your validator neuron class. You should use this class to define your validator's behavior. In particular, you should replace the forward function with your own logic.
+    OCR validator neuron class.
 
     This class inherits from the BaseValidatorNeuron class, which in turn inherits from BaseNeuron. The BaseNeuron class takes care of routine tasks such as setting up wallet, subtensor, metagraph, logging directory, parsing config, etc. You can override any of the methods in BaseNeuron if you need to customize the behavior.
 
@@ -44,19 +41,56 @@ def __init__(self, config=None):
         bt.logging.info("load_state()")
         self.load_state()
 
-        # TODO(developer): Anything specific to your use case you can do here
+        self.image_dir = './data/images/'
+        if not os.path.exists(self.image_dir):
+            os.makedirs(self.image_dir)
+
 
     async def forward(self):
         """
-        Validator forward pass. Consists of:
-        - Generating the query
-        - Querying the miners
-        - Getting the responses
-        - Rewarding the miners
-        - Updating the scores
+        The forward function is called by the validator every time step.
+
+        It consists of 3 important steps:
+        - Generate a challenge for the miners (in this case it creates a synthetic invoice image)
+        - Query the miners with the challenge
+        - Score the responses from the miners
+
+        Args:
+            self (:obj:`bittensor.neuron.Neuron`): The neuron object which contains all the necessary state for the validator.
+
         """
-        # TODO(developer): Rewrite this function based on your protocol definition.
-        return await forward(self)
+
+        # get_random_uids is an example method, but you can replace it with your own.
+        miner_uids = ocr_subnet.utils.uids.get_random_uids(self, k=self.config.neuron.sample_size)
+
+        # make a hash from the timestamp
+        filename = hashlib.md5(str(time.time()).encode()).hexdigest()
+
+        # Create a random image and load it.
+        image_data = ocr_subnet.validator.generate.invoice(path=os.path.join(self.image_dir, f"{filename}.pdf"), corrupt=True)
+
+        # Create synapse object to send to the miner and attach the image.
+        synapse = ocr_subnet.protocol.OCRSynapse(base64_image = image_data['base64_image'])
+
+        # The dendrite client queries the network.
+        responses = self.dendrite.query(
+            # Send the query to selected miner axons in the network.
+            axons=[self.metagraph.axons[uid] for uid in miner_uids],
+            # Pass the synapse to the miner.
+            synapse=synapse,
+            # Do not deserialize the response so that we have access to the raw response.
+            deserialize=False,
+        )
+
+        # Log the results for monitoring purposes.
+        bt.logging.info(f"Received responses: {responses}")
+
+        rewards = ocr_subnet.validator.reward.get_rewards(self, labels=image_data['labels'], responses=responses)
+
+        bt.logging.info(f"Scored responses: {rewards}")
+
+        # Update the scores based on the rewards. You may want to define your own update_scores function for custom behavior.
+        self.update_scores(rewards, miner_uids)
 
 
 # The main function parses the configuration and runs the validator.

diff --git a/ocr_subnet/__init__.py b/ocr_subnet/__init__.py
@@ -1,7 +1,5 @@
 # The MIT License (MIT)
 # Copyright © 2023 Yuma Rao
-# TODO(developer): Set your name
-# Copyright © 2023 <your name>
 
 # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 # documentation files (the “Software”), to deal in the Software without restriction, including without limitation
@@ -29,3 +27,4 @@
 from . import protocol
 from . import base
 from . import validator
+from . import utils
diff --git a/ocr_subnet/base/validator.py b/ocr_subnet/base/validator.py
@@ -1,7 +1,5 @@
 # The MIT License (MIT)
 # Copyright © 2023 Yuma Rao
-# TODO(developer): Set your name
-# Copyright © 2023 <your name>
 
 # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 # documentation files (the “Software”), to deal in the Software without restriction, including without limitation

diff --git a/ocr_subnet/protocol.py b/ocr_subnet/protocol.py
@@ -1,7 +1,5 @@
 # The MIT License (MIT)
 # Copyright © 2023 Yuma Rao
-# TODO(developer): Set your name
-# Copyright © 2023 <your name>
 
 # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 # documentation files (the “Software”), to deal in the Software without restriction, including without limitation
@@ -17,32 +15,31 @@
 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
-import typing
-import bittensor as bt
 
+import bittensor as bt
+from typing import Optional, List
 
 class OCRSynapse(bt.Synapse):
     """
     A simple OCR synapse protocol representation which uses bt.Synapse as its base.
     This protocol enables communication betweenthe miner and the validator.
 
     Attributes:
-    - image: A pdf image to be processed by the miner.
+    - base64_image: Base64 encoding of pdf image to be processed by the miner.
     - response: List[dict] containing data extracted from the image.
     """
 
     # Required request input, filled by sending dendrite caller. It is a base64 encoded string.
     base64_image: str
 
     # Optional request output, filled by recieving axon.
-    response: typing.Optional[typing.List[dict]] = None
+    response: Optional[List[dict]] = None
 
-    def deserialize(self) -> int:
+    def deserialize(self) -> List[dict]:
         """
-        Deserialize the miner response. This method retrieves the response from
-        the miner in the form of `response`, maybe this also takes care of casting it to List[dict]?
+        Deserialize the miner response.
 
         Returns:
-        - List[dict: The deserialized response, which is a list of dictionaries containing the extracted data.
+        - List[dict]: The deserialized response, which is a list of dictionaries containing the extracted data.
         """
         return self.response
diff --git a/ocr_subnet/utils/__init__.py b/ocr_subnet/utils/__init__.py
@@ -1,3 +1,4 @@
 from . import config
 from . import misc
 from . import uids
+from . import process
diff --git a/ocr_subnet/utils/image.py b/ocr_subnet/utils/image.py
@@ -0,0 +1,49 @@
+import io
+import fitz
+import base64
+
+from typing import List
+from PIL import Image, ImageDraw
+
+
+def serialize(image: Image, format: str="JPEG") -> str:
+    """Converts PIL image to base64 string.
+    """
+
+    buffer = io.BytesIO()
+    image.save(buffer, format=format)
+    return buffer.getvalue()
+
+
+def deserialize(base64_string: str) -> Image:
+    """Converts base64 string to PIL image.
+    """
+
+    return Image.open(io.BytesIO(base64.b64decode(base64_string)))
+
+
+def load(pdf_path: str, page: int=0, zoom_x: float=1.0, zoom_y: float=1.0) -> Image:
+    """Loads pdf image and converts to PIL image
+    """
+
+    # Read the pdf into memory
+    pdf = fitz.open(pdf_path)
+    page = pdf[page]
+
+   # Set zoom factors for x and y axis (1.0 means 100%)
+    mat = fitz.Matrix(zoom_x, zoom_y)
+    pix = page.get_pixmap(matrix=mat)
+    img_data = io.BytesIO(pix.tobytes('png'))
+
+    # convert to PIL image
+    return Image.open(img_data)
+
+def draw_boxes(image: Image, response: List[dict], color='red'):
+    """Draws boxes around text on the image
+    """
+
+    draw = ImageDraw.Draw(image)
+    for item in response:
+        draw.rectangle(item['position'], outline=color)
+
+    return image
diff --git a/ocr_subnet/utils/process.py b/ocr_subnet/utils/process.py
@@ -0,0 +1,64 @@
+# The MIT License (MIT)
+# Copyright © 2023 Yuma Rao
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
+# the Software.
+
+# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+from typing import List
+
+def group_and_merge_boxes(data: List[dict], xtol: int=25, ytol: int=5) -> List[dict]:
+    """
+    Combines boxes that are close together into a single box so that the text is grouped into sections.
+
+    Args:
+    - data (list): List of dictionaries containing the position, font and text of each section
+    - xtol (int): Maximum distance between boxes in the x direction to be considered part of the same section
+    - ytol (int): Maximum distance between boxes in the y direction to be considered part of the same section
+
+    Returns:
+    - list: List of dictionaries containing the position, font and text of each section
+    """
+    # Ensure all data items are valid and have a 'position' key
+    data = [box for box in data if box is not None and 'position' in box]
+
+    # Step 1: Group boxes by lines
+    lines = []
+    for box in data:
+        added_to_line = False
+        for line in lines:
+            if line and abs(line[0]['position'][1] - box['position'][1]) <= ytol:
+                line.append(box)
+                added_to_line = True
+                break
+        if not added_to_line:
+            lines.append([box])
+
+    # Step 2: Sort and merge within each line
+    merged_data = []
+    for line in lines:
+        line.sort(key=lambda item: item['position'][0])  # Sort by x1
+        i = 0
+        while i < len(line) - 1:
+            box1 = line[i]['position']
+            box2 = line[i + 1]['position']
+            if abs(box1[2] - box2[0]) <= xtol:  # Check horizontal proximity
+                new_box = {'position': [min(box1[0], box2[0]), min(box1[1], box2[1]), max(box1[2], box2[2]), max(box1[3], box2[3])],
+                        'text': line[i]['text'] + ' ' + line[i + 1]['text']}
+                line[i] = new_box
+                del line[i + 1]
+            else:
+                i += 1
+        merged_data.extend(line)
+
+    return merged_data
diff --git a/ocr_subnet/utils/serialize.py b/ocr_subnet/utils/serialize.py