update

AlexandreSenpai · Mar 4, 2025 · 0e161a3 · 0e161a3
1 parent 7670d35
commit 0e161a3
Show file tree

Hide file tree

Showing 34 changed files with 1,803 additions and 136 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,7 @@
+{
+    "python.testing.pytestArgs": [
+        "tests"
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true
+}
diff --git a/CO.jpg b/CO.jpg
diff --git a/__init__.py b/__init__.py
@@ -1,57 +1,57 @@
-###[BUILT-IN MODULES]###
-from typing import Tuple
-###[EXTERNAL MODULES]###
-from PIL import Image, ImageDraw
-###[PERSONAL MODULES]###
-from lib.translate import Translate
-from lib.vision import Vision
-from lib.textfit import TextFit
-
-if __name__ == '__main__':
-
-    img_in = './images/sample4.jpg'
-
-    fit_tool = TextFit()
-    vision = Vision(service_account=r'')  # pass here the path of your service account
-    translate = Translate()
-    translations = []
-
-    response: Tuple[Image.open, list] = vision.detect_text(img_in)  # Image to translate
-    for block in response[1]:
-        translations.append({
-            "font_color": block.get('font_color'),
-            "translated": translate.translate(text=block.get('text', ''), target_language='pt-br'),
-            "text_area": block.get('area'),
-            "text_box_size": block.get('text_box_size')
-        })
-
-    # # added this so we don't need to send requests for google when testing
-    # translations = [
-    #     {'font_color': 255, 'translated': 'SE PAPI E SUU VIREM BRINCAR COMIGO ...', 'text_area': ((1368, 294), (1582, 499)), 'text_box_size': (214, 205)},
-    #     {'font_color': 255, 'translated': 'ENTÃO QUERO FICAR AQUI.', 'text_area': ((182, 648), (395, 757)), 'text_box_size': (213, 109)},
-    #     {'font_color': 255, 'translated': 'MAS VOCÊ NÃO TEM QUE VIR.', 'text_area': ((1005, 1014), (1131, 1243)), 'text_box_size': (126, 229)},
-    #     {'font_color': 255, 'translated': 'O QUE VOCÊ FAZ, QUERIDA?', 'text_area': ((160, 1028), (344, 1142)), 'text_box_size': (184, 114)},
-    #     {'font_color': 255, 'translated': 'HÃ?!', 'text_area': ((435, 1067), (594, 1121)), 'text_box_size': (159, 54)},
-    #     {'font_color': 255, 'translated': '!!', 'text_area': ((1547, 1265), (1560, 1283)), 'text_box_size': (13, 18)},
-    #     {'font_color': 255, 'translated': '35', 'text_area': ((1667, 1521), (1689, 1533)), 'text_box_size': (22, 12)},
-    #     {'font_color': 255, 'translated': 'VOCÊ AMA ESSE HUMANO. MAS POR QUE...?', 'text_area': ((1034, 1655), (1282, 1764)), 'text_box_size': (248, 109)},
-    #     {'font_color': 255, 'translated': 'MAS ... PAPI, SUU ...', 'text_area': ((1546, 1634), (1647, 1743)), 'text_box_size': (101, 109)},
-    #     {'font_color': 255, 'translated': 'Você fez algo estranho com ela?', 'text_area': ((791, 1735), (994, 1786)), 'text_box_size': (203, 51)},
-    #     {'font_color': 255, 'translated': 'Ah bem. isso é; üh.', 'text_area': ((597, 1781), (757, 1845)), 'text_box_size': (160, 64)},
-    #     {'font_color': 255, 'translated': 'BEM...', 'text_area': ((131, 1814), (280, 1847)), 'text_box_size': (149, 33)},
-    #     {'font_color': 255, 'translated': 'SIMPLESMENTE NÃO CONSIGO ENTENDER.', 'text_area': ((494, 2192), (675, 2368)), 'text_box_size': (181, 176)},
-    #     {'font_color': 255, 'translated': 'Adivinhando com base em suas ações ...', 'text_area': ((1347, 2241), (1477, 2341)), 'text_box_size': (130, 100)}
-    # ]
-
-    # print(translations)
-
-    img = response[0]
-    # img = Image.open('./blank.png')
-    canvas = ImageDraw.Draw(img)
-
-    # Trying to fix text position ;-;
-    for obj in translations:
-        rows = fit_tool.fit(row_obj=obj, canvas=canvas)
-
-    # Output image
-    img.save('out.png', 'png')
+# ###[BUILT-IN MODULES]###
+# from typing import Tuple
+# ###[EXTERNAL MODULES]###
+# from PIL import Image, ImageDraw
+# ###[PERSONAL MODULES]###
+# from hime.lib.translate import Translate
+# from hime.lib.vision import Vision
+# from hime.lib.textfit import TextFit
+
+# if __name__ == '__main__':
+
+#     img_in = './images/1.png'
+
+#     fit_tool = TextFit()
+#     vision = Vision(service_account=r'/home/alexandresenpai/.credentials/gcp/hime.json')  # pass here the path of your service account
+#     translate = Translate()
+#     translations = []
+
+#     response = vision.detect_text(img_in)  # Image to translate
+#     for block in response[1]:
+#         translations.append({
+#             "font_color": block.get('font_color'),
+#             "translated": translate.translate(text=block.get('text', ''), target_language='pt-br'),
+#             "text_area": block.get('area'),
+#             "text_box_size": block.get('text_box_size')
+#         })
+
+#     # # added this so we don't need to send requests for google when testing
+#     # translations = [
+#     #     {'font_color': 255, 'translated': 'SE PAPI E SUU VIREM BRINCAR COMIGO ...', 'text_area': ((1368, 294), (1582, 499)), 'text_box_size': (214, 205)},
+#     #     {'font_color': 255, 'translated': 'ENTÃO QUERO FICAR AQUI.', 'text_area': ((182, 648), (395, 757)), 'text_box_size': (213, 109)},
+#     #     {'font_color': 255, 'translated': 'MAS VOCÊ NÃO TEM QUE VIR.', 'text_area': ((1005, 1014), (1131, 1243)), 'text_box_size': (126, 229)},
+#     #     {'font_color': 255, 'translated': 'O QUE VOCÊ FAZ, QUERIDA?', 'text_area': ((160, 1028), (344, 1142)), 'text_box_size': (184, 114)},
+#     #     {'font_color': 255, 'translated': 'HÃ?!', 'text_area': ((435, 1067), (594, 1121)), 'text_box_size': (159, 54)},
+#     #     {'font_color': 255, 'translated': '!!', 'text_area': ((1547, 1265), (1560, 1283)), 'text_box_size': (13, 18)},
+#     #     {'font_color': 255, 'translated': '35', 'text_area': ((1667, 1521), (1689, 1533)), 'text_box_size': (22, 12)},
+#     #     {'font_color': 255, 'translated': 'VOCÊ AMA ESSE HUMANO. MAS POR QUE...?', 'text_area': ((1034, 1655), (1282, 1764)), 'text_box_size': (248, 109)},
+#     #     {'font_color': 255, 'translated': 'MAS ... PAPI, SUU ...', 'text_area': ((1546, 1634), (1647, 1743)), 'text_box_size': (101, 109)},
+#     #     {'font_color': 255, 'translated': 'Você fez algo estranho com ela?', 'text_area': ((791, 1735), (994, 1786)), 'text_box_size': (203, 51)},
+#     #     {'font_color': 255, 'translated': 'Ah bem. isso é; üh.', 'text_area': ((597, 1781), (757, 1845)), 'text_box_size': (160, 64)},
+#     #     {'font_color': 255, 'translated': 'BEM...', 'text_area': ((131, 1814), (280, 1847)), 'text_box_size': (149, 33)},
+#     #     {'font_color': 255, 'translated': 'SIMPLESMENTE NÃO CONSIGO ENTENDER.', 'text_area': ((494, 2192), (675, 2368)), 'text_box_size': (181, 176)},
+#     #     {'font_color': 255, 'translated': 'Adivinhando com base em suas ações ...', 'text_area': ((1347, 2241), (1477, 2341)), 'text_box_size': (130, 100)}
+#     # ]
+
+#     # print(translations)
+
+#     img = response[0]
+#     # img = Image.open('./blank.png')
+#     canvas = ImageDraw.Draw(img)
+
+#     # Trying to fix text position ;-;
+#     for obj in translations:
+#         rows = fit_tool.fit(row_obj=obj, canvas=canvas)
+
+#     # Output image
+#     img.save('out.png', 'png')
diff --git a/a.py b/a.py
@@ -0,0 +1,29 @@
+import cv2
+
+
+img = cv2.imread("./download.jpg")
+
+cv2.waitKey(0) 
+
+# Grayscale 
+gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 
+
+# Find Canny edges 
+edged = cv2.Canny(gray, 30, 200) 
+cv2.waitKey(0) 
+
+# Finding Contours 
+# Use a copy of the image e.g. edged.copy() 
+# since findContours alters the image 
+contours, hierarchy = cv2.findContours(edged,  
+    cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) 
+
+print(contours)
+
+cv2.imshow('Canny Edges After Contouring', edged) 
+cv2.waitKey(0) 
+
+print("Number of Contours found = " + str(len(contours))) 
+
+
+cv2.destroyAllWindows() 
diff --git a/blank.png b/blank.png
diff --git a/download.jpg b/download.jpg
diff --git a/ellipsis.png b/ellipsis.png
diff --git a/filter.png b/filter.png
diff --git a/hime/lib/canvas/__init__.py b/hime/lib/canvas/__init__.py
@@ -0,0 +1,28 @@
+from PIL.Image import Image, new as NewImage
+from PIL.ImageDraw import ImageDraw
+
+class Canvas:
+
+    def remove_text_block(self, 
+                          image: Image,
+                          coords: tuple[tuple[int, int], 
+                                        tuple[int, int]]) -> Image:
+
+        draw = ImageDraw(image, 'RGBA')
+        draw.rectangle(coords, (255, 255, 255, 255))
+
+        image.save("blank.png")
+
+        return image
+
+    def text_block_aware(self, 
+                         image: Image,
+                         coords: tuple[tuple[int, int],
+                                       tuple[int, int]]) -> Image:
+
+        draw = ImageDraw(image, 'RGBA')
+        draw.ellipse(coords, (255, 255, 255, 0), (255, 0, 0))
+
+        image.save("ellipsis.png")
+
+        return image
diff --git a/lib/textfit/__init__.py → hime/lib/textfit/__init__.py b/lib/textfit/__init__.py → hime/lib/textfit/__init__.py
diff --git a/lib/translate/__init__.py → hime/lib/translate/__init__.py b/lib/translate/__init__.py → hime/lib/translate/__init__.py
diff --git a/hime/lib/vision/__init__.py b/hime/lib/vision/__init__.py
@@ -0,0 +1,208 @@
+
+from dataclasses import dataclass, field
+from io import BytesIO
+import math
+from typing import List, Tuple, Union
+import easyocr
+from PIL.Image import Image, fromarray as p_fromarray
+from PIL.Image import open as p_open
+from PIL import ImageOps
+
+import numpy as np
+import math
+
+import cv2
+import numpy as np
+
+
+from deskew import determine_skew
+
+
+class ImagePrep:
+    def resize(self, 
+               image: Image, 
+               scale: float = 2.5) -> Tuple[float, Image]:
+        w_size, h_size = image.size
+        return scale, image.resize(
+            size=(math.ceil(w_size * scale),
+                  math.ceil(h_size * scale))
+        )
+
+    def set_dpi(self, 
+                image: Image, 
+                dpi: int = 300) -> Image:
+        temp_img = BytesIO()
+        image.save(temp_img, format="png", dpi=(dpi, dpi))
+        return p_open(temp_img)
+
+    def rotate(self,
+               image: Image,
+               angle: float,
+               background: Union[int, 
+                                 Tuple[int, int, int]]) -> np.ndarray:
+        img_arr = np.asarray(image)
+        old_width, old_height = img_arr.shape[:2]
+        angle_radian = math.radians(angle)
+        width = abs(np.sin(angle_radian) * old_height) + abs(np.cos(angle_radian) * old_width)
+        height = abs(np.sin(angle_radian) * old_width) + abs(np.cos(angle_radian) * old_height)
+
+        image_center = tuple(np.array(img_arr.shape[1::-1]) / 2)
+        rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
+        rot_mat[1, 2] += (width - old_width) / 2
+        rot_mat[0, 2] += (height - old_height) / 2
+        return cv2.warpAffine(
+            img_arr, 
+            rot_mat, 
+            (int(round(height)), int(round(width))), 
+            borderValue=background # type: ignore
+        ) # type: ignore
+
+    def deskew(self, image: Image):
+        grayscale = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2GRAY)
+        angle = determine_skew(grayscale)
+
+        if angle is None:
+            raise Exception("Could not determine angle.")
+
+        rotated = self.rotate(image, float(angle), (0, 0, 0))
+        cv2.imwrite('output.png', rotated)
+        return p_fromarray(rotated)
+
+    def convert_to_vector(self, image: Image):
+
+        # Convert the image to grayscale
+        gray_image = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2GRAY)
+
+        # Apply Canny edge detection to find outlines
+        edges = cv2.Canny(gray_image, 100, 200)
+
+        # Find contours (shapes) in the image
+        contours, _ = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
+
+        # Create a new blank image to draw the vectorized image with white background
+        vector_image = np.ones_like(gray_image) * 255
+
+        # Draw contours on the blank image to form vectorized shapes in black color
+        cv2.drawContours(vector_image, contours, -1, (0, 0, 0), thickness=6)
+
+        cv2.imwrite("box.png", vector_image)
+
+        return p_fromarray(vector_image)
+
+class TextNormalizer:
+    def remove_spaces(self, text: str) -> str:
+        full_txt = ""
+        for i, c in enumerate(text):
+            if i == 0:
+                full_txt += c
+                continue
+
+            if c == " " and text[i-1] == " ":
+                continue
+
+            full_txt += c
+
+        return full_txt.replace(" ,", ",").replace(" .", ".").strip()
+
+    def remove_breaklines(self, text: str) -> str:
+        return text.replace("\n", " ")
+
+@dataclass
+class Text:
+    content: str = field(default="")
+    top_left: Tuple[int, int] = field(default_factory=lambda: (0, 0))
+    bottom_right: Tuple[int, int] = field(default_factory=lambda: (0, 0))
+    width: int = field(default=0)
+    height: int = field(default=0)
+    scale_factor: float = field(default=0)
+
+EasyOCROutput = List[
+    Tuple[
+        Tuple[
+            Tuple[np.int32, np.int32],
+            Tuple[np.int32, np.int32],
+            Tuple[np.int32, np.int32],
+            Tuple[np.int32, np.int32]
+        ],
+        str,
+        np.float64
+    ]
+]
+
+class Vision:
+
+    def __init__(self):
+        self.scale_factor = 0.0
+
+    def preprocess_image(self, image: Image) -> np.ndarray:
+        processor = ImagePrep()
+        scale, image = processor.resize(image=image, scale=2.5)
+        self.scale_factor = scale
+        image = processor.set_dpi(image)
+        # Convert image to grayscale
+        image = ImageOps.grayscale(image)
+
+        # Convert image to NumPy array for OpenCV operations
+        img_array = np.asarray(image)
+
+        # Apply Gaussian blur to remove noise
+        img_array = cv2.GaussianBlur(img_array, (5, 5), 0)
+
+        # Apply adaptive thresholding to create a binary image
+        img_array = cv2.adaptiveThreshold(img_array, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
+
+        # Use OpenCV to equalize the histogram of the image
+        img_array = cv2.equalizeHist(img_array)
+
+        cv2.imwrite("filter.png", img_array)
+
+        return np.asarray(image)
+
+    def translate_ocr_output(self, ocr_response: EasyOCROutput) -> Text:
+        text = Text()
+
+        top_left_y_options = []
+        top_left_x_options = []
+        bottom_right_y_options = []
+        bottom_right_x_options = []
+
+        for finding in ocr_response:
+            top_left, _, \
+            bottom_right, _ = finding[0]
+            content, _ = finding[1:]
+
+            top_left_x, top_left_y = top_left
+            bottom_right_x, bottom_right_y = bottom_right
+
+            top_left_y_options.append(top_left_y)
+            top_left_x_options.append(top_left_x)
+            bottom_right_y_options.append(bottom_right_y)
+            bottom_right_x_options.append(bottom_right_x)
+
+            text.content += content + " "
+
+        top_left_y_options.sort()
+        top_left_x_options.sort()
+
+        bottom_right_y_options.sort(reverse=True)
+        bottom_right_x_options.sort(reverse=True)
+
+        text.top_left = (top_left_x_options[0], top_left_y_options[0])
+        text.bottom_right = (bottom_right_x_options[0], bottom_right_y_options[0])
+
+        text.content = text.content.strip()
+
+        return text
+
+    def detect_text(self, image: Image):
+        text_norm = TextNormalizer()
+        img = self.preprocess_image(image=image)
+        response = easyocr.Reader(lang_list=["en"], verbose=False)
+        text: EasyOCROutput = response.readtext(img)
+        output = self.translate_ocr_output(text)
+        output.scale_factor = self.scale_factor
+        normalized = output.content
+        normalized = text_norm.remove_breaklines(normalized)
+        normalized = text_norm.remove_spaces(normalized)
+        output.content = normalized
+        return output