Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename image_data to labels, fix get_rewards import error #7

Merged
merged 1 commit into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion neurons/miner.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ class Miner(BaseMinerNeuron):
def __init__(self, config=None):
super(Miner, self).__init__(config=config)

# TODO(developer): Anything specific to your use case you can do here
bt.logging.info(f'Miner running tesseract: {pytesseract.get_tesseract_version()} and pytesseract version: {pytesseract.get_version()}')



async def forward(
Expand Down
2 changes: 1 addition & 1 deletion neurons/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ async def forward(self):
"""

# get_random_uids is an example method, but you can replace it with your own.
miner_uids = ocr_subnet.utils.uids.get_random_uids(self, k=self.config.neuron.sample_size)
miner_uids = ocr_subnet.utils.uids.get_random_uids(self, k=min(self.config.neuron.sample_size, self.metagraph.n.item()))

# make a hash from the timestamp
filename = hashlib.md5(str(time.time()).encode()).hexdigest()
Expand Down
9 changes: 6 additions & 3 deletions ocr_subnet/utils/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@ def serialize(image: Image, format: str="JPEG") -> str:

buffer = io.BytesIO()
image.save(buffer, format=format)
return buffer.getvalue()
byte_string = buffer.getvalue()
base64_string = base64.b64encode(byte_string).decode()
return base64_string


def deserialize(base64_string: str) -> Image:
"""Converts base64 string to PIL image.
"""

return Image.open(io.BytesIO(base64.b64decode(base64_string)))
decoded_string = base64.b64decode(base64_string)
buffer = io.BytesIO(decoded_string)
return Image.open(buffer)


def load(pdf_path: str, page: int=0, zoom_x: float=1.0, zoom_y: float=1.0) -> Image:
Expand Down
2 changes: 1 addition & 1 deletion ocr_subnet/validator/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# from .forward import forward
from .reward import reward
from .reward import get_rewards
from .generate import invoice
40 changes: 13 additions & 27 deletions ocr_subnet/validator/reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,54 +114,40 @@ def section_reward(label: dict, pred: dict, alpha_p=1.0, alpha_f=1.0, alpha_t=1.

return reward

def sort_predictions(image_data: List[dict], predictions: List[dict], draw=False) -> List[dict]:
def sort_predictions(labels: List[dict], predictions: List[dict], draw=False) -> List[dict]:
"""
Sort the predictions to match the order of the ground truth data using the Hungarian algorithm.

Args:
- image_data (list): The ground truth data for the image.
- labels (list): The ground truth data for the image.
- predictions (list): The predicted data for the image.

Returns:
- list: The sorted predictions.
"""

# First, make sure that the predictions is at least as long as the image data
predictions += [{}] * (len(image_data) - len(predictions))
r = torch.zeros((len(image_data), len(predictions)))
predictions += [{}] * (len(labels) - len(predictions))
r = torch.zeros((len(labels), len(predictions)))
for i in range(r.shape[0]):
for j in range(r.shape[1]):
r[i,j] = section_reward(image_data[i], predictions[j])['total']
r[i,j] = section_reward(labels[i], predictions[j])['total']

# Use the Hungarian algorithm to find the best assignment
row_indices, col_indices = linear_sum_assignment(r, maximize=True)

if draw:
fig = px.imshow(r.detach().numpy(),
color_continuous_scale='Blues',
title=f'Optimal Assignment (Avg. Reward: {r[row_indices, col_indices].mean():.3f})',
width=600, height=600
)
fig.update_layout(coloraxis_showscale=False)
fig.update_yaxes(title_text='Ground Truth')
fig.update_xaxes(title_text='Predictions')

for i, j in zip(row_indices, col_indices):
fig.add_annotation(x=j, y=i, text='+', showarrow=False, font=dict(color='red', size=16))
fig.show()

sorted_predictions = [predictions[i] for i in col_indices]

return sorted_predictions


def reward(self, image_data: List[dict], response: OCRSynapse) -> float:
def reward(self, labels: List[dict], response: OCRSynapse) -> float:
"""
Reward the miner response to the OCR request. This method returns a reward
value for the miner, which is used to update the miner's score.

Args:
- image (List[dict]): The true data underlying the image sent to the miner.
- labels (List[dict]): The true data underlying the image sent to the miner.
- response (OCRSynapse): Response from the miner.

The expected fields in each section of the response are:
Expand All @@ -177,8 +163,8 @@ def reward(self, image_data: List[dict], response: OCRSynapse) -> float:
return 0.0

# Sort the predictions to match the order of the ground truth data as best as possible
predictions = sort_predictions(image_data, predictions)
predictions = sort_predictions(labels, predictions)

alpha_p = self.config.neuron.alpha_position
alpha_t = self.config.neuron.alpha_text
alpha_f = self.config.neuron.alpha_font
Expand All @@ -187,8 +173,8 @@ def reward(self, image_data: List[dict], response: OCRSynapse) -> float:

# Take mean score over all sections in document (note that we don't penalize extra sections)
section_rewards = [
section_reward(label, pred, verbose=True, alpha_f=alpha_f, alpha_p=alpha_p, alpha_t=alpha_t)
for label, pred in zip(image_data, predictions)
section_reward(label, pred, verbose=True, alpha_f=alpha_f, alpha_p=alpha_p, alpha_t=alpha_t)
for label, pred in zip(labels, predictions)
]
prediction_reward = torch.mean(torch.FloatTensor([reward['total'] for reward in section_rewards]))

Expand All @@ -200,7 +186,7 @@ def reward(self, image_data: List[dict], response: OCRSynapse) -> float:

def get_rewards(
self,
image_data: List[dict],
labels: List[dict],
responses: List[OCRSynapse],
) -> torch.FloatTensor:
"""
Expand All @@ -215,5 +201,5 @@ def get_rewards(
"""
# Get all the reward results by iteratively calling your reward() function.
return torch.FloatTensor(
[reward(self, image_data, response) for response in responses]
[reward(self, labels, response) for response in responses]
).to(self.device)
2 changes: 1 addition & 1 deletion scripts/ocr_concept.ipynb

Large diffs are not rendered by default.

Binary file modified scripts/sample_invoice.pdf
Binary file not shown.