-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7670d35
commit 0e161a3
Showing
34 changed files
with
1,803 additions
and
136 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"python.testing.pytestArgs": [ | ||
"tests" | ||
], | ||
"python.testing.unittestEnabled": false, | ||
"python.testing.pytestEnabled": true | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,57 +1,57 @@ | ||
###[BUILT-IN MODULES]### | ||
from typing import Tuple | ||
###[EXTERNAL MODULES]### | ||
from PIL import Image, ImageDraw | ||
###[PERSONAL MODULES]### | ||
from lib.translate import Translate | ||
from lib.vision import Vision | ||
from lib.textfit import TextFit | ||
|
||
if __name__ == '__main__': | ||
|
||
img_in = './images/sample4.jpg' | ||
|
||
fit_tool = TextFit() | ||
vision = Vision(service_account=r'') # pass here the path of your service account | ||
translate = Translate() | ||
translations = [] | ||
|
||
response: Tuple[Image.open, list] = vision.detect_text(img_in) # Image to translate | ||
for block in response[1]: | ||
translations.append({ | ||
"font_color": block.get('font_color'), | ||
"translated": translate.translate(text=block.get('text', ''), target_language='pt-br'), | ||
"text_area": block.get('area'), | ||
"text_box_size": block.get('text_box_size') | ||
}) | ||
|
||
# # added this so we don't need to send requests for google when testing | ||
# translations = [ | ||
# {'font_color': 255, 'translated': 'SE PAPI E SUU VIREM BRINCAR COMIGO ...', 'text_area': ((1368, 294), (1582, 499)), 'text_box_size': (214, 205)}, | ||
# {'font_color': 255, 'translated': 'ENTÃO QUERO FICAR AQUI.', 'text_area': ((182, 648), (395, 757)), 'text_box_size': (213, 109)}, | ||
# {'font_color': 255, 'translated': 'MAS VOCÊ NÃO TEM QUE VIR.', 'text_area': ((1005, 1014), (1131, 1243)), 'text_box_size': (126, 229)}, | ||
# {'font_color': 255, 'translated': 'O QUE VOCÊ FAZ, QUERIDA?', 'text_area': ((160, 1028), (344, 1142)), 'text_box_size': (184, 114)}, | ||
# {'font_color': 255, 'translated': 'HÃ?!', 'text_area': ((435, 1067), (594, 1121)), 'text_box_size': (159, 54)}, | ||
# {'font_color': 255, 'translated': '!!', 'text_area': ((1547, 1265), (1560, 1283)), 'text_box_size': (13, 18)}, | ||
# {'font_color': 255, 'translated': '35', 'text_area': ((1667, 1521), (1689, 1533)), 'text_box_size': (22, 12)}, | ||
# {'font_color': 255, 'translated': 'VOCÊ AMA ESSE HUMANO. MAS POR QUE...?', 'text_area': ((1034, 1655), (1282, 1764)), 'text_box_size': (248, 109)}, | ||
# {'font_color': 255, 'translated': 'MAS ... PAPI, SUU ...', 'text_area': ((1546, 1634), (1647, 1743)), 'text_box_size': (101, 109)}, | ||
# {'font_color': 255, 'translated': 'Você fez algo estranho com ela?', 'text_area': ((791, 1735), (994, 1786)), 'text_box_size': (203, 51)}, | ||
# {'font_color': 255, 'translated': 'Ah bem. isso é; üh.', 'text_area': ((597, 1781), (757, 1845)), 'text_box_size': (160, 64)}, | ||
# {'font_color': 255, 'translated': 'BEM...', 'text_area': ((131, 1814), (280, 1847)), 'text_box_size': (149, 33)}, | ||
# {'font_color': 255, 'translated': 'SIMPLESMENTE NÃO CONSIGO ENTENDER.', 'text_area': ((494, 2192), (675, 2368)), 'text_box_size': (181, 176)}, | ||
# {'font_color': 255, 'translated': 'Adivinhando com base em suas ações ...', 'text_area': ((1347, 2241), (1477, 2341)), 'text_box_size': (130, 100)} | ||
# ] | ||
|
||
# print(translations) | ||
|
||
img = response[0] | ||
# img = Image.open('./blank.png') | ||
canvas = ImageDraw.Draw(img) | ||
|
||
# Trying to fix text position ;-; | ||
for obj in translations: | ||
rows = fit_tool.fit(row_obj=obj, canvas=canvas) | ||
|
||
# Output image | ||
img.save('out.png', 'png') | ||
# ###[BUILT-IN MODULES]### | ||
# from typing import Tuple | ||
# ###[EXTERNAL MODULES]### | ||
# from PIL import Image, ImageDraw | ||
# ###[PERSONAL MODULES]### | ||
# from hime.lib.translate import Translate | ||
# from hime.lib.vision import Vision | ||
# from hime.lib.textfit import TextFit | ||
|
||
# if __name__ == '__main__': | ||
|
||
# img_in = './images/1.png' | ||
|
||
# fit_tool = TextFit() | ||
# vision = Vision(service_account=r'/home/alexandresenpai/.credentials/gcp/hime.json') # pass here the path of your service account | ||
# translate = Translate() | ||
# translations = [] | ||
|
||
# response = vision.detect_text(img_in) # Image to translate | ||
# for block in response[1]: | ||
# translations.append({ | ||
# "font_color": block.get('font_color'), | ||
# "translated": translate.translate(text=block.get('text', ''), target_language='pt-br'), | ||
# "text_area": block.get('area'), | ||
# "text_box_size": block.get('text_box_size') | ||
# }) | ||
|
||
# # # added this so we don't need to send requests for google when testing | ||
# # translations = [ | ||
# # {'font_color': 255, 'translated': 'SE PAPI E SUU VIREM BRINCAR COMIGO ...', 'text_area': ((1368, 294), (1582, 499)), 'text_box_size': (214, 205)}, | ||
# # {'font_color': 255, 'translated': 'ENTÃO QUERO FICAR AQUI.', 'text_area': ((182, 648), (395, 757)), 'text_box_size': (213, 109)}, | ||
# # {'font_color': 255, 'translated': 'MAS VOCÊ NÃO TEM QUE VIR.', 'text_area': ((1005, 1014), (1131, 1243)), 'text_box_size': (126, 229)}, | ||
# # {'font_color': 255, 'translated': 'O QUE VOCÊ FAZ, QUERIDA?', 'text_area': ((160, 1028), (344, 1142)), 'text_box_size': (184, 114)}, | ||
# # {'font_color': 255, 'translated': 'HÃ?!', 'text_area': ((435, 1067), (594, 1121)), 'text_box_size': (159, 54)}, | ||
# # {'font_color': 255, 'translated': '!!', 'text_area': ((1547, 1265), (1560, 1283)), 'text_box_size': (13, 18)}, | ||
# # {'font_color': 255, 'translated': '35', 'text_area': ((1667, 1521), (1689, 1533)), 'text_box_size': (22, 12)}, | ||
# # {'font_color': 255, 'translated': 'VOCÊ AMA ESSE HUMANO. MAS POR QUE...?', 'text_area': ((1034, 1655), (1282, 1764)), 'text_box_size': (248, 109)}, | ||
# # {'font_color': 255, 'translated': 'MAS ... PAPI, SUU ...', 'text_area': ((1546, 1634), (1647, 1743)), 'text_box_size': (101, 109)}, | ||
# # {'font_color': 255, 'translated': 'Você fez algo estranho com ela?', 'text_area': ((791, 1735), (994, 1786)), 'text_box_size': (203, 51)}, | ||
# # {'font_color': 255, 'translated': 'Ah bem. isso é; üh.', 'text_area': ((597, 1781), (757, 1845)), 'text_box_size': (160, 64)}, | ||
# # {'font_color': 255, 'translated': 'BEM...', 'text_area': ((131, 1814), (280, 1847)), 'text_box_size': (149, 33)}, | ||
# # {'font_color': 255, 'translated': 'SIMPLESMENTE NÃO CONSIGO ENTENDER.', 'text_area': ((494, 2192), (675, 2368)), 'text_box_size': (181, 176)}, | ||
# # {'font_color': 255, 'translated': 'Adivinhando com base em suas ações ...', 'text_area': ((1347, 2241), (1477, 2341)), 'text_box_size': (130, 100)} | ||
# # ] | ||
|
||
# # print(translations) | ||
|
||
# img = response[0] | ||
# # img = Image.open('./blank.png') | ||
# canvas = ImageDraw.Draw(img) | ||
|
||
# # Trying to fix text position ;-; | ||
# for obj in translations: | ||
# rows = fit_tool.fit(row_obj=obj, canvas=canvas) | ||
|
||
# # Output image | ||
# img.save('out.png', 'png') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import cv2 | ||
|
||
|
||
img = cv2.imread("./download.jpg") | ||
|
||
cv2.waitKey(0) | ||
|
||
# Grayscale | ||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | ||
|
||
# Find Canny edges | ||
edged = cv2.Canny(gray, 30, 200) | ||
cv2.waitKey(0) | ||
|
||
# Finding Contours | ||
# Use a copy of the image e.g. edged.copy() | ||
# since findContours alters the image | ||
contours, hierarchy = cv2.findContours(edged, | ||
cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | ||
|
||
print(contours) | ||
|
||
cv2.imshow('Canny Edges After Contouring', edged) | ||
cv2.waitKey(0) | ||
|
||
print("Number of Contours found = " + str(len(contours))) | ||
|
||
|
||
cv2.destroyAllWindows() |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
from PIL.Image import Image, new as NewImage | ||
from PIL.ImageDraw import ImageDraw | ||
|
||
class Canvas: | ||
|
||
def remove_text_block(self, | ||
image: Image, | ||
coords: tuple[tuple[int, int], | ||
tuple[int, int]]) -> Image: | ||
|
||
draw = ImageDraw(image, 'RGBA') | ||
draw.rectangle(coords, (255, 255, 255, 255)) | ||
|
||
image.save("blank.png") | ||
|
||
return image | ||
|
||
def text_block_aware(self, | ||
image: Image, | ||
coords: tuple[tuple[int, int], | ||
tuple[int, int]]) -> Image: | ||
|
||
draw = ImageDraw(image, 'RGBA') | ||
draw.ellipse(coords, (255, 255, 255, 0), (255, 0, 0)) | ||
|
||
image.save("ellipsis.png") | ||
|
||
return image |
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
|
||
from dataclasses import dataclass, field | ||
from io import BytesIO | ||
import math | ||
from typing import List, Tuple, Union | ||
import easyocr | ||
from PIL.Image import Image, fromarray as p_fromarray | ||
from PIL.Image import open as p_open | ||
from PIL import ImageOps | ||
|
||
import numpy as np | ||
import math | ||
|
||
import cv2 | ||
import numpy as np | ||
|
||
|
||
from deskew import determine_skew | ||
|
||
|
||
class ImagePrep: | ||
def resize(self, | ||
image: Image, | ||
scale: float = 2.5) -> Tuple[float, Image]: | ||
w_size, h_size = image.size | ||
return scale, image.resize( | ||
size=(math.ceil(w_size * scale), | ||
math.ceil(h_size * scale)) | ||
) | ||
|
||
def set_dpi(self, | ||
image: Image, | ||
dpi: int = 300) -> Image: | ||
temp_img = BytesIO() | ||
image.save(temp_img, format="png", dpi=(dpi, dpi)) | ||
return p_open(temp_img) | ||
|
||
def rotate(self, | ||
image: Image, | ||
angle: float, | ||
background: Union[int, | ||
Tuple[int, int, int]]) -> np.ndarray: | ||
img_arr = np.asarray(image) | ||
old_width, old_height = img_arr.shape[:2] | ||
angle_radian = math.radians(angle) | ||
width = abs(np.sin(angle_radian) * old_height) + abs(np.cos(angle_radian) * old_width) | ||
height = abs(np.sin(angle_radian) * old_width) + abs(np.cos(angle_radian) * old_height) | ||
|
||
image_center = tuple(np.array(img_arr.shape[1::-1]) / 2) | ||
rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0) | ||
rot_mat[1, 2] += (width - old_width) / 2 | ||
rot_mat[0, 2] += (height - old_height) / 2 | ||
return cv2.warpAffine( | ||
img_arr, | ||
rot_mat, | ||
(int(round(height)), int(round(width))), | ||
borderValue=background # type: ignore | ||
) # type: ignore | ||
|
||
def deskew(self, image: Image): | ||
grayscale = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2GRAY) | ||
angle = determine_skew(grayscale) | ||
|
||
if angle is None: | ||
raise Exception("Could not determine angle.") | ||
|
||
rotated = self.rotate(image, float(angle), (0, 0, 0)) | ||
cv2.imwrite('output.png', rotated) | ||
return p_fromarray(rotated) | ||
|
||
def convert_to_vector(self, image: Image): | ||
|
||
# Convert the image to grayscale | ||
gray_image = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2GRAY) | ||
|
||
# Apply Canny edge detection to find outlines | ||
edges = cv2.Canny(gray_image, 100, 200) | ||
|
||
# Find contours (shapes) in the image | ||
contours, _ = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) | ||
|
||
# Create a new blank image to draw the vectorized image with white background | ||
vector_image = np.ones_like(gray_image) * 255 | ||
|
||
# Draw contours on the blank image to form vectorized shapes in black color | ||
cv2.drawContours(vector_image, contours, -1, (0, 0, 0), thickness=6) | ||
|
||
cv2.imwrite("box.png", vector_image) | ||
|
||
return p_fromarray(vector_image) | ||
|
||
class TextNormalizer: | ||
def remove_spaces(self, text: str) -> str: | ||
full_txt = "" | ||
for i, c in enumerate(text): | ||
if i == 0: | ||
full_txt += c | ||
continue | ||
|
||
if c == " " and text[i-1] == " ": | ||
continue | ||
|
||
full_txt += c | ||
|
||
return full_txt.replace(" ,", ",").replace(" .", ".").strip() | ||
|
||
def remove_breaklines(self, text: str) -> str: | ||
return text.replace("\n", " ") | ||
|
||
@dataclass | ||
class Text: | ||
content: str = field(default="") | ||
top_left: Tuple[int, int] = field(default_factory=lambda: (0, 0)) | ||
bottom_right: Tuple[int, int] = field(default_factory=lambda: (0, 0)) | ||
width: int = field(default=0) | ||
height: int = field(default=0) | ||
scale_factor: float = field(default=0) | ||
|
||
EasyOCROutput = List[ | ||
Tuple[ | ||
Tuple[ | ||
Tuple[np.int32, np.int32], | ||
Tuple[np.int32, np.int32], | ||
Tuple[np.int32, np.int32], | ||
Tuple[np.int32, np.int32] | ||
], | ||
str, | ||
np.float64 | ||
] | ||
] | ||
|
||
class Vision: | ||
|
||
def __init__(self): | ||
self.scale_factor = 0.0 | ||
|
||
def preprocess_image(self, image: Image) -> np.ndarray: | ||
processor = ImagePrep() | ||
scale, image = processor.resize(image=image, scale=2.5) | ||
self.scale_factor = scale | ||
image = processor.set_dpi(image) | ||
# Convert image to grayscale | ||
image = ImageOps.grayscale(image) | ||
|
||
# Convert image to NumPy array for OpenCV operations | ||
img_array = np.asarray(image) | ||
|
||
# Apply Gaussian blur to remove noise | ||
img_array = cv2.GaussianBlur(img_array, (5, 5), 0) | ||
|
||
# Apply adaptive thresholding to create a binary image | ||
img_array = cv2.adaptiveThreshold(img_array, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) | ||
|
||
# Use OpenCV to equalize the histogram of the image | ||
img_array = cv2.equalizeHist(img_array) | ||
|
||
cv2.imwrite("filter.png", img_array) | ||
|
||
return np.asarray(image) | ||
|
||
def translate_ocr_output(self, ocr_response: EasyOCROutput) -> Text: | ||
text = Text() | ||
|
||
top_left_y_options = [] | ||
top_left_x_options = [] | ||
bottom_right_y_options = [] | ||
bottom_right_x_options = [] | ||
|
||
for finding in ocr_response: | ||
top_left, _, \ | ||
bottom_right, _ = finding[0] | ||
content, _ = finding[1:] | ||
|
||
top_left_x, top_left_y = top_left | ||
bottom_right_x, bottom_right_y = bottom_right | ||
|
||
top_left_y_options.append(top_left_y) | ||
top_left_x_options.append(top_left_x) | ||
bottom_right_y_options.append(bottom_right_y) | ||
bottom_right_x_options.append(bottom_right_x) | ||
|
||
text.content += content + " " | ||
|
||
top_left_y_options.sort() | ||
top_left_x_options.sort() | ||
|
||
bottom_right_y_options.sort(reverse=True) | ||
bottom_right_x_options.sort(reverse=True) | ||
|
||
text.top_left = (top_left_x_options[0], top_left_y_options[0]) | ||
text.bottom_right = (bottom_right_x_options[0], bottom_right_y_options[0]) | ||
|
||
text.content = text.content.strip() | ||
|
||
return text | ||
|
||
def detect_text(self, image: Image): | ||
text_norm = TextNormalizer() | ||
img = self.preprocess_image(image=image) | ||
response = easyocr.Reader(lang_list=["en"], verbose=False) | ||
text: EasyOCROutput = response.readtext(img) | ||
output = self.translate_ocr_output(text) | ||
output.scale_factor = self.scale_factor | ||
normalized = output.content | ||
normalized = text_norm.remove_breaklines(normalized) | ||
normalized = text_norm.remove_spaces(normalized) | ||
output.content = normalized | ||
return output |
Oops, something went wrong.