Skip to content

Commit 75d6b8b

Browse files
authored
Merge pull request #7 from steffencruz/second-pass
Rename image_data to labels, fix get_rewards import error
2 parents 7e62819 + 9aab32e commit 75d6b8b

File tree

7 files changed

+24
-34
lines changed

7 files changed

+24
-34
lines changed

neurons/miner.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ class Miner(BaseMinerNeuron):
3939
def __init__(self, config=None):
4040
super(Miner, self).__init__(config=config)
4141

42-
# TODO(developer): Anything specific to your use case you can do here
42+
bt.logging.info(f'Miner running tesseract: {pytesseract.get_tesseract_version()} and pytesseract version: {pytesseract.get_version()}')
43+
4344

4445

4546
async def forward(

neurons/validator.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ async def forward(self):
6161
"""
6262

6363
# get_random_uids is an example method, but you can replace it with your own.
64-
miner_uids = ocr_subnet.utils.uids.get_random_uids(self, k=self.config.neuron.sample_size)
64+
miner_uids = ocr_subnet.utils.uids.get_random_uids(self, k=min(self.config.neuron.sample_size, self.metagraph.n.item()))
6565

6666
# make a hash from the timestamp
6767
filename = hashlib.md5(str(time.time()).encode()).hexdigest()

ocr_subnet/utils/image.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,17 @@ def serialize(image: Image, format: str="JPEG") -> str:
1212

1313
buffer = io.BytesIO()
1414
image.save(buffer, format=format)
15-
return buffer.getvalue()
15+
byte_string = buffer.getvalue()
16+
base64_string = base64.b64encode(byte_string).decode()
17+
return base64_string
1618

1719

1820
def deserialize(base64_string: str) -> Image:
1921
"""Converts base64 string to PIL image.
2022
"""
21-
22-
return Image.open(io.BytesIO(base64.b64decode(base64_string)))
23+
decoded_string = base64.b64decode(base64_string)
24+
buffer = io.BytesIO(decoded_string)
25+
return Image.open(buffer)
2326

2427

2528
def load(pdf_path: str, page: int=0, zoom_x: float=1.0, zoom_y: float=1.0) -> Image:

ocr_subnet/validator/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# from .forward import forward
2-
from .reward import reward
2+
from .reward import get_rewards
33
from .generate import invoice

ocr_subnet/validator/reward.py

+13-27
Original file line numberDiff line numberDiff line change
@@ -114,54 +114,40 @@ def section_reward(label: dict, pred: dict, alpha_p=1.0, alpha_f=1.0, alpha_t=1.
114114

115115
return reward
116116

117-
def sort_predictions(image_data: List[dict], predictions: List[dict], draw=False) -> List[dict]:
117+
def sort_predictions(labels: List[dict], predictions: List[dict], draw=False) -> List[dict]:
118118
"""
119119
Sort the predictions to match the order of the ground truth data using the Hungarian algorithm.
120120
121121
Args:
122-
- image_data (list): The ground truth data for the image.
122+
- labels (list): The ground truth data for the image.
123123
- predictions (list): The predicted data for the image.
124124
125125
Returns:
126126
- list: The sorted predictions.
127127
"""
128128

129129
# First, make sure that the predictions is at least as long as the image data
130-
predictions += [{}] * (len(image_data) - len(predictions))
131-
r = torch.zeros((len(image_data), len(predictions)))
130+
predictions += [{}] * (len(labels) - len(predictions))
131+
r = torch.zeros((len(labels), len(predictions)))
132132
for i in range(r.shape[0]):
133133
for j in range(r.shape[1]):
134-
r[i,j] = section_reward(image_data[i], predictions[j])['total']
134+
r[i,j] = section_reward(labels[i], predictions[j])['total']
135135

136136
# Use the Hungarian algorithm to find the best assignment
137137
row_indices, col_indices = linear_sum_assignment(r, maximize=True)
138138

139-
if draw:
140-
fig = px.imshow(r.detach().numpy(),
141-
color_continuous_scale='Blues',
142-
title=f'Optimal Assignment (Avg. Reward: {r[row_indices, col_indices].mean():.3f})',
143-
width=600, height=600
144-
)
145-
fig.update_layout(coloraxis_showscale=False)
146-
fig.update_yaxes(title_text='Ground Truth')
147-
fig.update_xaxes(title_text='Predictions')
148-
149-
for i, j in zip(row_indices, col_indices):
150-
fig.add_annotation(x=j, y=i, text='+', showarrow=False, font=dict(color='red', size=16))
151-
fig.show()
152-
153139
sorted_predictions = [predictions[i] for i in col_indices]
154140

155141
return sorted_predictions
156142

157143

158-
def reward(self, image_data: List[dict], response: OCRSynapse) -> float:
144+
def reward(self, labels: List[dict], response: OCRSynapse) -> float:
159145
"""
160146
Reward the miner response to the OCR request. This method returns a reward
161147
value for the miner, which is used to update the miner's score.
162148
163149
Args:
164-
- image (List[dict]): The true data underlying the image sent to the miner.
150+
- labels (List[dict]): The true data underlying the image sent to the miner.
165151
- response (OCRSynapse): Response from the miner.
166152
167153
The expected fields in each section of the response are:
@@ -177,8 +163,8 @@ def reward(self, image_data: List[dict], response: OCRSynapse) -> float:
177163
return 0.0
178164

179165
# Sort the predictions to match the order of the ground truth data as best as possible
180-
predictions = sort_predictions(image_data, predictions)
181-
166+
predictions = sort_predictions(labels, predictions)
167+
182168
alpha_p = self.config.neuron.alpha_position
183169
alpha_t = self.config.neuron.alpha_text
184170
alpha_f = self.config.neuron.alpha_font
@@ -187,8 +173,8 @@ def reward(self, image_data: List[dict], response: OCRSynapse) -> float:
187173

188174
# Take mean score over all sections in document (note that we don't penalize extra sections)
189175
section_rewards = [
190-
section_reward(label, pred, verbose=True, alpha_f=alpha_f, alpha_p=alpha_p, alpha_t=alpha_t)
191-
for label, pred in zip(image_data, predictions)
176+
section_reward(label, pred, verbose=True, alpha_f=alpha_f, alpha_p=alpha_p, alpha_t=alpha_t)
177+
for label, pred in zip(labels, predictions)
192178
]
193179
prediction_reward = torch.mean(torch.FloatTensor([reward['total'] for reward in section_rewards]))
194180

@@ -200,7 +186,7 @@ def reward(self, image_data: List[dict], response: OCRSynapse) -> float:
200186

201187
def get_rewards(
202188
self,
203-
image_data: List[dict],
189+
labels: List[dict],
204190
responses: List[OCRSynapse],
205191
) -> torch.FloatTensor:
206192
"""
@@ -215,5 +201,5 @@ def get_rewards(
215201
"""
216202
# Get all the reward results by iteratively calling your reward() function.
217203
return torch.FloatTensor(
218-
[reward(self, image_data, response) for response in responses]
204+
[reward(self, labels, response) for response in responses]
219205
).to(self.device)

scripts/ocr_concept.ipynb

+1-1
Large diffs are not rendered by default.

scripts/sample_invoice.pdf

0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)