From 0809610763801a7c73a1c8f020fee9cff3385e16 Mon Sep 17 00:00:00 2001 From: jason Date: Sun, 23 Feb 2025 13:47:32 +0800 Subject: [PATCH 1/5] =?UTF-8?q?=E6=94=AF=E6=8C=81CI=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E5=8F=91=E5=B8=83=E6=96=B0=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build-windows-cpu.yml | 92 ++++++++++++++ .github/workflows/build-windows-cuda-10.2.yml | 114 ++++++++++++++++++ .github/workflows/build-windows-cuda-11.8.yml | 95 +++++++++++++++ .github/workflows/build-windows-cuda-12.3.yml | 95 +++++++++++++++ backend/config.py | 2 + backend/ppocr/data/imaug/iaa_augment.py | 5 +- backend/tools/makedist.py | 51 ++++++-- requirements.txt | 9 +- 8 files changed, 449 insertions(+), 14 deletions(-) create mode 100644 .github/workflows/build-windows-cpu.yml create mode 100644 .github/workflows/build-windows-cuda-10.2.yml create mode 100644 .github/workflows/build-windows-cuda-11.8.yml create mode 100644 .github/workflows/build-windows-cuda-12.3.yml diff --git a/.github/workflows/build-windows-cpu.yml b/.github/workflows/build-windows-cpu.yml new file mode 100644 index 00000000..e0dc5221 --- /dev/null +++ b/.github/workflows/build-windows-cpu.yml @@ -0,0 +1,92 @@ +name: Build Windows CPU + +on: + push: + branches: + - '**' + workflow_dispatch: + inputs: + ssh: + description: 'SSH connection to Actions' + required: false + default: false + + +jobs: + build: + runs-on: windows-2019 + steps: + - uses: actions/checkout@v4 + - name: 读取 VERSION + id: version + run: | + VERSION=$(sed -n 's/^VERSION = "\(.*\)"/\1/p' backend/config.py) + echo "VERSION=$VERSION" >> $GITHUB_ENV + echo "VERSION=$VERSION" >> $GITHUB_OUTPUT + shell: bash + # - name: 检查 tag 是否已存在 + # run: | + # TAG_NAME="${VERSION}" + # if git ls-remote --tags origin | grep -q "refs/tags/$TAG_NAME"; then + # echo "Tag $TAG_NAME 已存在,发布中止" + # exit 1 + # fi + # shell: bash + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'pip' # caching pip dependencies + - run: pip install paddlepaddle==3.0.0rc1 + - run: pip install -r requirements.txt + - run: pip freeze > requirements.txt + - run: pip install QPT==1.0b8 setuptools + - name: 获取 site-packages 路径 + shell: bash + run: | + SITE_PACKAGES=$(python -c "import site, os; print(os.path.join(site.getsitepackages()[0], 'Lib', 'site-packages'))") + SITE_PACKAGES_UNIX=$(cygpath -u "$SITE_PACKAGES") + echo "site-packages路径: $SITE_PACKAGES" + echo "site-packages UNIX路径: $SITE_PACKAGES_UNIX" + echo "SITE_PACKAGES_UNIX=$SITE_PACKAGES_UNIX" >> $GITHUB_ENV + echo "SITE_PACKAGES=$SITE_PACKAGES" >> $GITHUB_ENV + - name: 修复QPT内部错误 + run: sed -i '98c\ try:\n dep = pkg.requires()\n except TypeError:\n continue' ${SITE_PACKAGES_UNIX}/qpt/kernel/qpackage.py + shell: bash + - name: Start SSH via tmate + if: (github.event.inputs.ssh == 'true' && github.event.inputs.ssh != 'false') || contains(github.event.action, 'ssh') + uses: mxschmitt/action-tmate@v3 + - run: | + python backend/tools/makedist.py && \ + mv ../vse_out ./vse_out && \ + rm -fv ./vse_out/*/opt/packages/numpy-2* + env: + QPT_Action: "True" + shell: bash + - name: 上传 Debug 文件夹到 Artifacts + uses: actions/upload-artifact@v4 + with: + name: vse-v${{ env.VERSION }}-windows-cpu-debug + path: vse_out/Debug/ + - name: 上传 Release 文件夹到 Artifacts + uses: actions/upload-artifact@v4 + with: + name: vse-v${{ env.VERSION }}-windows-cpu-release + path: vse_out/Release/ + - name: 打包 Release 文件夹 + run: | + cd vse_out/Release + 7z a -t7z -mx=9 -m0=LZMA2 -ms=on -mfb=64 -md=32m -mmt=on -v1888m vse-v${{ env.VERSION }}-windows-cpu.7z * && \ + # 检测是否只有一个分卷 + if [ -f vse-v${{ env.VERSION }}-windows-cpu.7z.001 ] && [ ! -f vse-v${{ env.VERSION }}-windows-cpu.7z.002 ]; then \ + mv vse-v${{ env.VERSION }}-windows-cpu.7z.001 vse-v${{ env.VERSION }}-windows-cpu.7z; fi + shell: bash + - name: Release + uses: softprops/action-gh-release@v1 + with: + prerelease: true + tag_name: ${{ env.VERSION }} + target_commitish: ${{ github.sha }} + name: 硬字幕提取器 ${{ env.VERSION }} + files: | + vse_out/Release/vse-v${{ env.VERSION }}-windows-cpu.7z* + \ No newline at end of file diff --git a/.github/workflows/build-windows-cuda-10.2.yml b/.github/workflows/build-windows-cuda-10.2.yml new file mode 100644 index 00000000..d0329153 --- /dev/null +++ b/.github/workflows/build-windows-cuda-10.2.yml @@ -0,0 +1,114 @@ +name: Build Windows CUDA 10.2 + +on: + push: + branches: + - '**' + workflow_dispatch: + inputs: + ssh: + description: 'SSH connection to Actions' + required: false + default: false + + +jobs: + build: + runs-on: windows-2019 + steps: + - uses: actions/checkout@v4 + - name: 读取 VERSION + id: version + run: | + VERSION=$(sed -n 's/^VERSION = "\(.*\)"/\1/p' backend/config.py) + echo "VERSION=$VERSION" >> $GITHUB_ENV + echo "VERSION=$VERSION" >> $GITHUB_OUTPUT + shell: bash + # - name: 检查 tag 是否已存在 + # run: | + # TAG_NAME="${VERSION}" + # if git ls-remote --tags origin | grep -q "refs/tags/$TAG_NAME"; then + # echo "Tag $TAG_NAME 已存在,发布中止" + # exit 1 + # fi + # shell: bash + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'pip' # caching pip dependencies + - name: 部署 CUDA + uses: Jimver/cuda-toolkit@v0.2.21 + id: cuda-toolkit + with: + cuda: '10.2.89' + - run: echo "Installed cuda version is ${{steps.cuda-toolkit.outputs.cuda}}" + - run: echo "Cuda install location ${{steps.cuda-toolkit.outputs.CUDA_PATH}}" + - run: nvcc -V + - run: git clean -df + # 最后一个支持10.2版本的Paddle + # https://www.paddlepaddle.org.cn/documentation/docs/en/2.5/install/Tables_en.html + - run: python -m pip install paddlepaddle-gpu==2.5.2.post102 -f https://www.paddlepaddle.org.cn/whl/windows/mkl/avx/stable.html + - run: pip install -r requirements.txt + - run: pip freeze > requirements.txt + - run: pip install QPT==1.0b8 setuptools + - name: 获取 site-packages 路径 + shell: bash + run: | + SITE_PACKAGES=$(python -c "import site, os; print(os.path.join(site.getsitepackages()[0], 'Lib', 'site-packages'))") + SITE_PACKAGES_UNIX=$(cygpath -u "$SITE_PACKAGES") + echo "site-packages路径: $SITE_PACKAGES" + echo "site-packages UNIX路径: $SITE_PACKAGES_UNIX" + echo "SITE_PACKAGES_UNIX=$SITE_PACKAGES_UNIX" >> $GITHUB_ENV + echo "SITE_PACKAGES=$SITE_PACKAGES" >> $GITHUB_ENV + - name: 修复QPT内部错误 + run: sed -i '98c\ try:\n dep = pkg.requires()\n except TypeError:\n continue' ${SITE_PACKAGES_UNIX}/qpt/kernel/qpackage.py + shell: bash + - name: Start SSH via tmate + if: (github.event.inputs.ssh == 'true' && github.event.inputs.ssh != 'false') || contains(github.event.action, 'ssh') + uses: mxschmitt/action-tmate@v3 + - run: | + python backend/tools/makedist.py --cuda=10.2 && \ + mv ../vse_out ./vse_out && \ + git checkout requirements.txt && \ + pip download -d ./vse_out/Debug/opt/packages/ paddlepaddle-gpu==2.5.2.post102 -f https://www.paddlepaddle.org.cn/whl/windows/mkl/avx/stable.html && \ + pip download -d ./vse_out/Debug/opt/packages/ -r requirements.txt && \ + cp -rfv ./vse_out/Debug/opt/packages/* ./vse_out/Release/opt/packages/ && \ + rm -fv ./vse_out/*/opt/packages/numpy-2* + env: + QPT_Action: "True" + CUDA_PATH_V10_2: "${{steps.cuda-toolkit.outputs.CUDA_PATH}}" + shell: bash + - name: 部署 CUDNN + shell: bash + run: | + curl -L -o cudnn.zip "https://developer.download.nvidia.cn/compute/redist/cudnn/v7.6.5/cudnn-10.2-windows10-x64-v7.6.5.32.zip" && \ + unzip -j cudnn.zip "*/bin/*" -d "./vse_out/Debug/opt/CUDA/" && \ + unzip -j cudnn.zip "*/bin/*" -d "./vse_out/Release/opt/CUDA/" && \ + rm -fv cudnn.zip + - name: 上传 Debug 文件夹到 Artifacts + uses: actions/upload-artifact@v4 + with: + name: vse-v${{ env.VERSION }}-windows-nvidia-cuda-10.2-debug + path: vse_out/Debug/ + - name: 上传 Release 文件夹到 Artifacts + uses: actions/upload-artifact@v4 + with: + name: vse-v${{ env.VERSION }}-windows-nvidia-cuda-10.2-release + path: vse_out/Release/ + - name: 打包 Release 文件夹 + run: | + cd vse_out/Release + 7z a -t7z -mx=9 -m0=LZMA2 -ms=on -mfb=64 -md=32m -mmt=on -v1888m vse-v${{ env.VERSION }}-windows-nvidia-cuda-10.2.7z * && \ + # 检测是否只有一个分卷 + if [ -f vse-v${{ env.VERSION }}-windows-nvidia-cuda-10.2.7z.001 ] && [ ! -f vse-v${{ env.VERSION }}-windows-nvidia-cuda-10.2.7z.002 ]; then \ + mv vse-v${{ env.VERSION }}-windows-nvidia-cuda-10.2.7z.001 vse-v${{ env.VERSION }}-windows-nvidia-cuda-10.2.7z; fi + shell: bash + - name: Release + uses: softprops/action-gh-release@v1 + with: + prerelease: true + tag_name: ${{ env.VERSION }} + target_commitish: ${{ github.sha }} + name: 硬字幕提取器 ${{ env.VERSION }} + files: | + vse_out/Release/vse-v${{ env.VERSION }}-windows-nvidia-cuda-10.2.7z* \ No newline at end of file diff --git a/.github/workflows/build-windows-cuda-11.8.yml b/.github/workflows/build-windows-cuda-11.8.yml new file mode 100644 index 00000000..22c2ff69 --- /dev/null +++ b/.github/workflows/build-windows-cuda-11.8.yml @@ -0,0 +1,95 @@ +name: Build Windows CUDA 11.8 + +on: + push: + branches: + - '**' + workflow_dispatch: + inputs: + ssh: + description: 'SSH connection to Actions' + required: false + default: false + + +jobs: + build: + runs-on: windows-2019 + steps: + - uses: actions/checkout@v4 + - name: 读取 VERSION + id: version + run: | + VERSION=$(sed -n 's/^VERSION = "\(.*\)"/\1/p' backend/config.py) + echo "VERSION=$VERSION" >> $GITHUB_ENV + echo "VERSION=$VERSION" >> $GITHUB_OUTPUT + shell: bash + # - name: 检查 tag 是否已存在 + # run: | + # TAG_NAME="${VERSION}" + # if git ls-remote --tags origin | grep -q "refs/tags/$TAG_NAME"; then + # echo "Tag $TAG_NAME 已存在,发布中止" + # exit 1 + # fi + # shell: bash + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'pip' # caching pip dependencies + - run: pip install paddlepaddle-gpu==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ + - run: pip install -r requirements.txt + - run: pip freeze > requirements.txt + - run: pip install QPT==1.0b8 setuptools + - name: 获取 site-packages 路径 + shell: bash + run: | + SITE_PACKAGES=$(python -c "import site, os; print(os.path.join(site.getsitepackages()[0], 'Lib', 'site-packages'))") + SITE_PACKAGES_UNIX=$(cygpath -u "$SITE_PACKAGES") + echo "site-packages路径: $SITE_PACKAGES" + echo "site-packages UNIX路径: $SITE_PACKAGES_UNIX" + echo "SITE_PACKAGES_UNIX=$SITE_PACKAGES_UNIX" >> $GITHUB_ENV + echo "SITE_PACKAGES=$SITE_PACKAGES" >> $GITHUB_ENV + - name: 修复QPT内部错误 + run: sed -i '98c\ try:\n dep = pkg.requires()\n except TypeError:\n continue' ${SITE_PACKAGES_UNIX}/qpt/kernel/qpackage.py + shell: bash + - name: Start SSH via tmate + if: (github.event.inputs.ssh == 'true' && github.event.inputs.ssh != 'false') || contains(github.event.action, 'ssh') + uses: mxschmitt/action-tmate@v3 + - run: | + python backend/tools/makedist.py && \ + mv ../vse_out ./vse_out && \ + git checkout requirements.txt && \ + pip download -d ./vse_out/Debug/opt/packages/ paddlepaddle-gpu==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ && \ + pip download -d ./vse_out/Debug/opt/packages/ -r requirements.txt && \ + cp -rfv ./vse_out/Debug/opt/packages/* ./vse_out/Release/opt/packages/ && \ + rm -fv ./vse_out/*/opt/packages/numpy-2* + env: + QPT_Action: "True" + shell: bash + - name: 上传 Debug 文件夹到 Artifacts + uses: actions/upload-artifact@v4 + with: + name: vse-v${{ env.VERSION }}-windows-nvidia-cuda-11.8-debug + path: vse_out/Debug/ + - name: 上传 Release 文件夹到 Artifacts + uses: actions/upload-artifact@v4 + with: + name: vse-v${{ env.VERSION }}-windows-nvidia-cuda-11.8-release + path: vse_out/Release/ + - name: 打包 Release 文件夹 + run: | + cd vse_out/Release + 7z a -t7z -mx=9 -m0=LZMA2 -ms=on -mfb=64 -md=32m -mmt=on -v1888m vse-v${{ env.VERSION }}-windows-nvidia-cuda-11.8.7z * && \ + # 检测是否只有一个分卷 + if [ -f vse-v${{ env.VERSION }}-windows-nvidia-cuda-11.8.7z.001 ] && [ ! -f vse-v${{ env.VERSION }}-windows-nvidia-cuda-11.8.7z.002 ]; then \ + mv vse-v${{ env.VERSION }}-windows-nvidia-cuda-11.8.7z.001 vse-v${{ env.VERSION }}-windows-nvidia-cuda-11.8.7z; fi + shell: bash + - name: Release + uses: softprops/action-gh-release@v1 + with: + prerelease: true + tag_name: ${{ env.VERSION }} + target_commitish: ${{ github.sha }} + name: 硬字幕提取器 ${{ env.VERSION }} + files: | + vse_out/Release/vse-v${{ env.VERSION }}-windows-nvidia-cuda-11.8.7z* \ No newline at end of file diff --git a/.github/workflows/build-windows-cuda-12.3.yml b/.github/workflows/build-windows-cuda-12.3.yml new file mode 100644 index 00000000..0f091a23 --- /dev/null +++ b/.github/workflows/build-windows-cuda-12.3.yml @@ -0,0 +1,95 @@ +name: Build Windows CUDA 12.3 + +on: + push: + branches: + - '**' + workflow_dispatch: + inputs: + ssh: + description: 'SSH connection to Actions' + required: false + default: false + + +jobs: + build: + runs-on: windows-2019 + steps: + - uses: actions/checkout@v4 + - name: 读取 VERSION + id: version + run: | + VERSION=$(sed -n 's/^VERSION = "\(.*\)"/\1/p' backend/config.py) + echo "VERSION=$VERSION" >> $GITHUB_ENV + echo "VERSION=$VERSION" >> $GITHUB_OUTPUT + shell: bash + # - name: 检查 tag 是否已存在 + # run: | + # TAG_NAME="${VERSION}" + # if git ls-remote --tags origin | grep -q "refs/tags/$TAG_NAME"; then + # echo "Tag $TAG_NAME 已存在,发布中止" + # exit 1 + # fi + # shell: bash + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'pip' # caching pip dependencies + - run: pip install paddlepaddle-gpu==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cu123/ + - run: pip install -r requirements.txt + - run: pip freeze > requirements.txt + - run: pip install QPT==1.0b8 setuptools + - name: 获取 site-packages 路径 + shell: bash + run: | + SITE_PACKAGES=$(python -c "import site, os; print(os.path.join(site.getsitepackages()[0], 'Lib', 'site-packages'))") + SITE_PACKAGES_UNIX=$(cygpath -u "$SITE_PACKAGES") + echo "site-packages路径: $SITE_PACKAGES" + echo "site-packages UNIX路径: $SITE_PACKAGES_UNIX" + echo "SITE_PACKAGES_UNIX=$SITE_PACKAGES_UNIX" >> $GITHUB_ENV + echo "SITE_PACKAGES=$SITE_PACKAGES" >> $GITHUB_ENV + - name: 修复QPT内部错误 + run: sed -i '98c\ try:\n dep = pkg.requires()\n except TypeError:\n continue' ${SITE_PACKAGES_UNIX}/qpt/kernel/qpackage.py + shell: bash + - name: Start SSH via tmate + if: (github.event.inputs.ssh == 'true' && github.event.inputs.ssh != 'false') || contains(github.event.action, 'ssh') + uses: mxschmitt/action-tmate@v3 + - run: | + python backend/tools/makedist.py && \ + mv ../vse_out ./vse_out && \ + git checkout requirements.txt && \ + pip download -d ./vse_out/Debug/opt/packages/ paddlepaddle-gpu==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cu123/ && \ + pip download -d ./vse_out/Debug/opt/packages/ -r requirements.txt && \ + cp -rfv ./vse_out/Debug/opt/packages/* ./vse_out/Release/opt/packages/ && \ + rm -fv ./vse_out/*/opt/packages/numpy-2* + env: + QPT_Action: "True" + shell: bash + - name: 上传 Debug 文件夹到 Artifacts + uses: actions/upload-artifact@v4 + with: + name: vse-v${{ env.VERSION }}-windows-nvidia-cuda-12.3-debug + path: vse_out/Debug/ + - name: 上传 Release 文件夹到 Artifacts + uses: actions/upload-artifact@v4 + with: + name: vse-v${{ env.VERSION }}-windows-nvidia-cuda-12.3-release + path: vse_out/Release/ + - name: 打包 Release 文件夹 + run: | + cd vse_out/Release + 7z a -t7z -mx=9 -m0=LZMA2 -ms=on -mfb=64 -md=32m -mmt=on -v1888m vse-v${{ env.VERSION }}-windows-nvidia-cuda-12.3.7z * && \ + # 检测是否只有一个分卷 + if [ -f vse-v${{ env.VERSION }}-windows-nvidia-cuda-12.3.7z.001 ] && [ ! -f vse-v${{ env.VERSION }}-windows-nvidia-cuda-12.3.7z.002 ]; then \ + mv vse-v${{ env.VERSION }}-windows-nvidia-cuda-12.3.7z.001 vse-v${{ env.VERSION }}-windows-nvidia-cuda-12.3.7z; fi + shell: bash + - name: Release + uses: softprops/action-gh-release@v1 + with: + prerelease: true + tag_name: ${{ env.VERSION }} + target_commitish: ${{ github.sha }} + name: 硬字幕提取器 ${{ env.VERSION }} + files: | + vse_out/Release/vse-v${{ env.VERSION }}-windows-nvidia-cuda-12.3.7z* \ No newline at end of file diff --git a/backend/config.py b/backend/config.py index 5576f58e..337f236d 100644 --- a/backend/config.py +++ b/backend/config.py @@ -16,6 +16,8 @@ import paddle from tools.constant import * +# 项目版本号 +VERSION = "2.0.1" # 项目的base目录 BASE_DIR = str(Path(os.path.abspath(__file__)).parent) diff --git a/backend/ppocr/data/imaug/iaa_augment.py b/backend/ppocr/data/imaug/iaa_augment.py index 0aac7877..bf6ea2bf 100644 --- a/backend/ppocr/data/imaug/iaa_augment.py +++ b/backend/ppocr/data/imaug/iaa_augment.py @@ -21,15 +21,13 @@ from __future__ import unicode_literals import numpy as np -import imgaug -import imgaug.augmenters as iaa - class AugmenterBuilder(object): def __init__(self): pass def build(self, args, root=True): + import imgaug.augmenters as iaa if args is None or len(args) == 0: return None elif isinstance(args, list): @@ -97,6 +95,7 @@ def may_augment_annotation(self, aug, data, shape): return data def may_augment_poly(self, aug, img_shape, poly): + import imgaug keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly] keypoints = aug.augment_keypoints( [imgaug.KeypointsOnImage( diff --git a/backend/tools/makedist.py b/backend/tools/makedist.py index c8fc9435..4771f33b 100644 --- a/backend/tools/makedist.py +++ b/backend/tools/makedist.py @@ -1,12 +1,49 @@ -if __name__ == '__main__': - # 导入QPT - from qpt.executor import CreateExecutableModule as CEM - import os +import argparse +import os +from qpt.executor import CreateExecutableModule as CEM +from qpt.modules.cuda import CopyCUDAPackage +from qpt.smart_opt import set_default_pip_source +from qpt.kernel.qinterpreter import PYPI_PIP_SOURCE + + +def main(): WORK_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - print(WORK_DIR) LAUNCH_PATH = os.path.join(WORK_DIR, 'gui.py') SAVE_PATH = os.path.join(os.path.dirname(WORK_DIR), 'vse_out') ICON_PATH = os.path.join(WORK_DIR, "design", "vse.ico") - module = CEM(work_dir=WORK_DIR, launcher_py_path=LAUNCH_PATH, save_path=SAVE_PATH, icon=ICON_PATH, hidden_terminal=False) - # 开始打包 + + # 解析命令行参数 + parser = argparse.ArgumentParser(description="打包程序") + parser.add_argument( + "--cuda", + nargs="?", # 可选参数值 + const="10.2", # 如果只写 --cuda,默认值是 10.2 + default=None, # 不写 --cuda,则为 None + help="是否包含CUDA模块,可指定版本,如 --cuda 或 --cuda=11.8" + ) + + args = parser.parse_args() + + sub_modules = [] + + if args.cuda: + sub_modules.append(CopyCUDAPackage(cuda_version=args.cuda)) + + if os.getenv("QPT_Action") == "True": + set_default_pip_source(PYPI_PIP_SOURCE) + + module = CEM( + work_dir=WORK_DIR, + launcher_py_path=LAUNCH_PATH, + save_path=SAVE_PATH, + icon=ICON_PATH, + hidden_terminal=False, + requirements_file="./requirements.txt", + sub_modules=sub_modules, + ) + module.make() + + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt index 3b952f7a..c579e1c0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,14 @@ opencv-python==4.10.0.84 -python-Levenshtein==0.26.0 +Levenshtein==0.26.0 pillow==10.4.0 tqdm==4.66.5 filesplit==3.0.2 pysrt==1.1.2 wordsegment==1.3.1 -scikit-image==0.24.0 +scikit-image==0.25.1 lmdb==1.5.1 -imgaug==0.4.0 pyclipper==1.3.0.post5 PySimpleGUI==4.70.1 -numpy==1.26.4 \ No newline at end of file +numpy==1.26.4 +shapely==2.0.7 +six==1.17.0 \ No newline at end of file From b5caafea4fb04c8cd1a6d674ef8a87aa8fdb446b Mon Sep 17 00:00:00 2001 From: jason Date: Sat, 22 Feb 2025 15:06:29 +0800 Subject: [PATCH 2/5] =?UTF-8?q?=E6=94=B9=E7=94=A8PaddleOCR=EF=BC=8C?= =?UTF-8?q?=E8=B7=9F=E9=9A=8F=E4=B8=8A=E6=B8=B8=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/config.py | 9 - backend/main.py | 9 +- backend/ppocr/__init__.py | 16 - backend/ppocr/data/__init__.py | 109 - backend/ppocr/data/collate_fn.py | 72 - backend/ppocr/data/imaug/ColorJitter.py | 26 - backend/ppocr/data/imaug/__init__.py | 74 - backend/ppocr/data/imaug/copy_paste.py | 170 - backend/ppocr/data/imaug/east_process.py | 436 - backend/ppocr/data/imaug/fce_aug.py | 564 -- backend/ppocr/data/imaug/fce_targets.py | 658 -- backend/ppocr/data/imaug/gen_table_mask.py | 244 - backend/ppocr/data/imaug/iaa_augment.py | 104 - backend/ppocr/data/imaug/label_ops.py | 1041 -- backend/ppocr/data/imaug/make_border_map.py | 173 - backend/ppocr/data/imaug/make_pse_gt.py | 106 - backend/ppocr/data/imaug/make_shrink_map.py | 123 - backend/ppocr/data/imaug/operators.py | 468 - backend/ppocr/data/imaug/pg_process.py | 906 -- backend/ppocr/data/imaug/randaugment.py | 143 - backend/ppocr/data/imaug/random_crop_data.py | 234 - backend/ppocr/data/imaug/rec_img_aug.py | 601 -- backend/ppocr/data/imaug/sast_process.py | 777 -- backend/ppocr/data/imaug/ssl_img_aug.py | 60 - .../data/imaug/text_image_aug/__init__.py | 17 - .../data/imaug/text_image_aug/augment.py | 120 - .../data/imaug/text_image_aug/warp_mls.py | 168 - backend/ppocr/data/imaug/vqa/__init__.py | 19 - .../ppocr/data/imaug/vqa/token/__init__.py | 17 - .../data/imaug/vqa/token/vqa_token_chunk.py | 122 - .../data/imaug/vqa/token/vqa_token_pad.py | 104 - .../imaug/vqa/token/vqa_token_relation.py | 67 - backend/ppocr/data/lmdb_dataset.py | 118 - backend/ppocr/data/pgnet_dataset.py | 106 - backend/ppocr/data/pubtab_dataset.py | 114 - backend/ppocr/data/simple_dataset.py | 151 - backend/ppocr/losses/__init__.py | 71 - backend/ppocr/losses/ace_loss.py | 52 - backend/ppocr/losses/basic_loss.py | 155 - backend/ppocr/losses/center_loss.py | 88 - backend/ppocr/losses/cls_loss.py | 30 - backend/ppocr/losses/combined_loss.py | 69 - backend/ppocr/losses/det_basic_loss.py | 153 - backend/ppocr/losses/det_db_loss.py | 76 - backend/ppocr/losses/det_east_loss.py | 63 - backend/ppocr/losses/det_fce_loss.py | 227 - backend/ppocr/losses/det_pse_loss.py | 149 - backend/ppocr/losses/det_sast_loss.py | 121 - backend/ppocr/losses/distillation_loss.py | 324 - backend/ppocr/losses/e2e_pg_loss.py | 140 - backend/ppocr/losses/kie_sdmgr_loss.py | 115 - backend/ppocr/losses/rec_aster_loss.py | 99 - backend/ppocr/losses/rec_att_loss.py | 39 - backend/ppocr/losses/rec_ctc_loss.py | 45 - backend/ppocr/losses/rec_enhanced_ctc_loss.py | 70 - backend/ppocr/losses/rec_multi_loss.py | 58 - backend/ppocr/losses/rec_nrtr_loss.py | 30 - backend/ppocr/losses/rec_pren_loss.py | 30 - backend/ppocr/losses/rec_sar_loss.py | 29 - backend/ppocr/losses/rec_srn_loss.py | 47 - backend/ppocr/losses/table_att_loss.py | 109 - .../ppocr/losses/vqa_token_layoutlm_loss.py | 42 - backend/ppocr/metrics/__init__.py | 47 - backend/ppocr/metrics/cls_metric.py | 46 - backend/ppocr/metrics/det_metric.py | 154 - backend/ppocr/metrics/distillation_metric.py | 73 - backend/ppocr/metrics/e2e_metric.py | 86 - backend/ppocr/metrics/eval_det_iou.py | 225 - backend/ppocr/metrics/kie_metric.py | 71 - backend/ppocr/metrics/rec_metric.py | 76 - backend/ppocr/metrics/table_metric.py | 51 - backend/ppocr/metrics/vqa_token_re_metric.py | 176 - backend/ppocr/metrics/vqa_token_ser_metric.py | 47 - .../ppocr/modeling/architectures/__init__.py | 32 - .../modeling/architectures/base_model.py | 100 - .../architectures/distillation_model.py | 60 - backend/ppocr/modeling/backbones/__init__.py | 64 - .../modeling/backbones/det_mobilenet_v3.py | 268 - .../ppocr/modeling/backbones/det_resnet_vd.py | 351 - .../modeling/backbones/det_resnet_vd_sast.py | 285 - .../modeling/backbones/e2e_resnet_vd_pg.py | 265 - .../modeling/backbones/kie_unet_sdmgr.py | 186 - .../backbones/rec_efficientb3_pren.py | 228 - .../ppocr/modeling/backbones/rec_micronet.py | 528 -- .../modeling/backbones/rec_mobilenet_v3.py | 138 - .../modeling/backbones/rec_mv1_enhance.py | 256 - .../ppocr/modeling/backbones/rec_nrtr_mtb.py | 48 - .../ppocr/modeling/backbones/rec_resnet_31.py | 210 - .../modeling/backbones/rec_resnet_aster.py | 143 - .../modeling/backbones/rec_resnet_fpn.py | 307 - .../ppocr/modeling/backbones/rec_resnet_vd.py | 286 - .../ppocr/modeling/backbones/rec_svtrnet.py | 584 -- .../ppocr/modeling/backbones/vqa_layoutlm.py | 172 - backend/ppocr/modeling/heads/__init__.py | 58 - backend/ppocr/modeling/heads/cls_head.py | 52 - backend/ppocr/modeling/heads/det_db_head.py | 118 - backend/ppocr/modeling/heads/det_east_head.py | 121 - backend/ppocr/modeling/heads/det_fce_head.py | 99 - backend/ppocr/modeling/heads/det_pse_head.py | 37 - backend/ppocr/modeling/heads/det_sast_head.py | 128 - backend/ppocr/modeling/heads/e2e_pg_head.py | 253 - .../ppocr/modeling/heads/kie_sdmgr_head.py | 207 - .../modeling/heads/multiheadAttention.py | 163 - .../ppocr/modeling/heads/rec_aster_head.py | 393 - backend/ppocr/modeling/heads/rec_att_head.py | 202 - backend/ppocr/modeling/heads/rec_ctc_head.py | 87 - .../ppocr/modeling/heads/rec_multi_head.py | 73 - backend/ppocr/modeling/heads/rec_nrtr_head.py | 826 -- backend/ppocr/modeling/heads/rec_pren_head.py | 34 - backend/ppocr/modeling/heads/rec_sar_head.py | 410 - backend/ppocr/modeling/heads/rec_srn_head.py | 280 - .../ppocr/modeling/heads/self_attention.py | 406 - .../ppocr/modeling/heads/table_att_head.py | 246 - backend/ppocr/modeling/necks/__init__.py | 37 - backend/ppocr/modeling/necks/db_fpn.py | 358 - backend/ppocr/modeling/necks/east_fpn.py | 188 - backend/ppocr/modeling/necks/fce_fpn.py | 280 - backend/ppocr/modeling/necks/fpn.py | 138 - backend/ppocr/modeling/necks/pg_fpn.py | 314 - backend/ppocr/modeling/necks/pren_fpn.py | 163 - backend/ppocr/modeling/necks/rnn.py | 191 - backend/ppocr/modeling/necks/sast_fpn.py | 284 - backend/ppocr/modeling/necks/table_fpn.py | 110 - backend/ppocr/modeling/transforms/__init__.py | 28 - backend/ppocr/modeling/transforms/stn.py | 135 - backend/ppocr/modeling/transforms/tps.py | 308 - .../transforms/tps_spatial_transformer.py | 156 - backend/ppocr/optimizer/__init__.py | 62 - backend/ppocr/optimizer/learning_rate.py | 310 - backend/ppocr/optimizer/lr_scheduler.py | 162 - backend/ppocr/optimizer/optimizer.py | 234 - backend/ppocr/optimizer/regularizer.py | 51 - backend/ppocr/postprocess/__init__.py | 61 - backend/ppocr/postprocess/cls_postprocess.py | 42 - backend/ppocr/postprocess/db_postprocess.py | 220 - backend/ppocr/postprocess/east_postprocess.py | 143 - backend/ppocr/postprocess/fce_postprocess.py | 241 - .../ppocr/postprocess/locality_aware_nms.py | 200 - backend/ppocr/postprocess/pg_postprocess.py | 52 - .../postprocess/pse_postprocess/__init__.py | 15 - .../postprocess/pse_postprocess/pse/README.md | 6 - .../pse_postprocess/pse/__init__.py | 29 - .../postprocess/pse_postprocess/pse/pse.pyx | 70 - .../postprocess/pse_postprocess/pse/setup.py | 14 - .../pse_postprocess/pse_postprocess.py | 118 - backend/ppocr/postprocess/rec_postprocess.py | 754 -- backend/ppocr/postprocess/sast_postprocess.py | 355 - .../vqa_token_re_layoutlm_postprocess.py | 51 - .../vqa_token_ser_layoutlm_postprocess.py | 93 - backend/ppocr/utils/__init__.py | 13 - backend/ppocr/utils/dict/ar_dict.txt | 117 - backend/ppocr/utils/dict/arabic_dict.txt | 161 - backend/ppocr/utils/dict/be_dict.txt | 145 - backend/ppocr/utils/dict/bg_dict.txt | 140 - backend/ppocr/utils/dict/ch_dict.txt | 6623 ------------- backend/ppocr/utils/dict/chinese_cht_dict.txt | 8421 ----------------- backend/ppocr/utils/dict/cyrillic_dict.txt | 163 - backend/ppocr/utils/dict/devanagari_dict.txt | 167 - backend/ppocr/utils/dict/en_dict.txt | 95 - backend/ppocr/utils/dict/es_dict.txt | 110 - backend/ppocr/utils/dict/fa_dict.txt | 136 - backend/ppocr/utils/dict/french_dict.txt | 136 - backend/ppocr/utils/dict/german_dict.txt | 143 - backend/ppocr/utils/dict/hi_dict.txt | 162 - backend/ppocr/utils/dict/it_dict.txt | 118 - backend/ppocr/utils/dict/japan_dict.txt | 4399 --------- backend/ppocr/utils/dict/ka_dict.txt | 153 - .../utils/dict/kie_dict/xfund_class_list.txt | 4 - backend/ppocr/utils/dict/kn_dict.txt | 153 - backend/ppocr/utils/dict/korean_dict.txt | 3688 -------- backend/ppocr/utils/dict/latin_dict.txt | 185 - .../dict/layout_dict/layout_cdla_dict.txt | 10 - .../layout_dict/layout_publaynet_dict.txt | 5 - .../dict/layout_dict/layout_table_dict.txt | 1 - backend/ppocr/utils/dict/mr_dict.txt | 153 - backend/ppocr/utils/dict/ne_dict.txt | 153 - backend/ppocr/utils/dict/oc_dict.txt | 96 - backend/ppocr/utils/dict/pt_dict.txt | 130 - backend/ppocr/utils/dict/pu_dict.txt | 130 - backend/ppocr/utils/dict/rs_cyrillic_dict.txt | 134 - backend/ppocr/utils/dict/rs_dict.txt | 91 - backend/ppocr/utils/dict/rs_latin_dict.txt | 91 - backend/ppocr/utils/dict/rsc_dict.txt | 134 - backend/ppocr/utils/dict/ru_dict.txt | 163 - backend/ppocr/utils/dict/spin_dict.txt | 68 - backend/ppocr/utils/dict/ta_dict.txt | 128 - backend/ppocr/utils/dict/table_dict.txt | 277 - .../dict/table_master_structure_dict.txt | 39 - .../ppocr/utils/dict/table_structure_dict.txt | 28 - .../utils/dict/table_structure_dict_ch.txt | 48 - backend/ppocr/utils/dict/te_dict.txt | 151 - backend/ppocr/utils/dict/ug_dict.txt | 114 - backend/ppocr/utils/dict/uk_dict.txt | 142 - backend/ppocr/utils/dict/ur_dict.txt | 137 - backend/ppocr/utils/dict/xi_dict.txt | 110 - backend/ppocr/utils/e2e_metric/Deteval.py | 574 -- .../ppocr/utils/e2e_metric/polygon_fast.py | 83 - .../utils/e2e_utils/extract_batchsize.py | 87 - .../utils/e2e_utils/extract_textpoint_fast.py | 457 - .../utils/e2e_utils/extract_textpoint_slow.py | 592 -- .../ppocr/utils/e2e_utils/pgnet_pp_utils.py | 162 - backend/ppocr/utils/e2e_utils/visual.py | 162 - backend/ppocr/utils/iou.py | 54 - backend/ppocr/utils/loggers/__init__.py | 3 - backend/ppocr/utils/loggers/base_logger.py | 15 - backend/ppocr/utils/loggers/loggers.py | 18 - backend/ppocr/utils/loggers/vdl_logger.py | 21 - backend/ppocr/utils/loggers/wandb_logger.py | 78 - backend/ppocr/utils/logging.py | 71 - backend/ppocr/utils/network.py | 84 - backend/ppocr/utils/poly_nms.py | 146 - backend/ppocr/utils/profiler.py | 110 - backend/ppocr/utils/save_load.py | 185 - backend/ppocr/utils/stats.py | 72 - backend/ppocr/utils/utility.py | 131 - backend/ppocr/utils/visual.py | 98 - backend/tools/eval.py | 108 - backend/tools/export_center.py | 76 - backend/tools/export_model.py | 172 - backend/tools/infer/predict_cls.py | 151 - backend/tools/infer/predict_det.py | 302 - backend/tools/infer/predict_e2e.py | 169 - backend/tools/infer/predict_rec.py | 442 - backend/tools/infer/predict_system.py | 210 - backend/tools/infer/utility.py | 645 -- backend/tools/infer_cls.py | 85 - backend/tools/infer_det.py | 134 - backend/tools/infer_e2e.py | 122 - backend/tools/infer_kie.py | 153 - backend/tools/infer_rec.py | 166 - backend/tools/infer_table.py | 107 - backend/tools/infer_vqa_token_ser.py | 135 - backend/tools/infer_vqa_token_ser_re.py | 199 - backend/tools/ocr.py | 41 +- backend/tools/program.py | 602 -- backend/tools/subtitle_ocr.py | 8 +- backend/tools/test_hubserving.py | 157 - backend/tools/train.py | 189 - 238 files changed, 28 insertions(+), 61919 deletions(-) delete mode 100755 backend/ppocr/__init__.py delete mode 100644 backend/ppocr/data/__init__.py delete mode 100644 backend/ppocr/data/collate_fn.py delete mode 100644 backend/ppocr/data/imaug/ColorJitter.py delete mode 100644 backend/ppocr/data/imaug/__init__.py delete mode 100644 backend/ppocr/data/imaug/copy_paste.py delete mode 100644 backend/ppocr/data/imaug/east_process.py delete mode 100644 backend/ppocr/data/imaug/fce_aug.py delete mode 100644 backend/ppocr/data/imaug/fce_targets.py delete mode 100644 backend/ppocr/data/imaug/gen_table_mask.py delete mode 100644 backend/ppocr/data/imaug/iaa_augment.py delete mode 100644 backend/ppocr/data/imaug/label_ops.py delete mode 100644 backend/ppocr/data/imaug/make_border_map.py delete mode 100644 backend/ppocr/data/imaug/make_pse_gt.py delete mode 100644 backend/ppocr/data/imaug/make_shrink_map.py delete mode 100644 backend/ppocr/data/imaug/operators.py delete mode 100644 backend/ppocr/data/imaug/pg_process.py delete mode 100644 backend/ppocr/data/imaug/randaugment.py delete mode 100644 backend/ppocr/data/imaug/random_crop_data.py delete mode 100644 backend/ppocr/data/imaug/rec_img_aug.py delete mode 100644 backend/ppocr/data/imaug/sast_process.py delete mode 100644 backend/ppocr/data/imaug/ssl_img_aug.py delete mode 100644 backend/ppocr/data/imaug/text_image_aug/__init__.py delete mode 100644 backend/ppocr/data/imaug/text_image_aug/augment.py delete mode 100644 backend/ppocr/data/imaug/text_image_aug/warp_mls.py delete mode 100644 backend/ppocr/data/imaug/vqa/__init__.py delete mode 100644 backend/ppocr/data/imaug/vqa/token/__init__.py delete mode 100644 backend/ppocr/data/imaug/vqa/token/vqa_token_chunk.py delete mode 100644 backend/ppocr/data/imaug/vqa/token/vqa_token_pad.py delete mode 100644 backend/ppocr/data/imaug/vqa/token/vqa_token_relation.py delete mode 100644 backend/ppocr/data/lmdb_dataset.py delete mode 100644 backend/ppocr/data/pgnet_dataset.py delete mode 100644 backend/ppocr/data/pubtab_dataset.py delete mode 100644 backend/ppocr/data/simple_dataset.py delete mode 100755 backend/ppocr/losses/__init__.py delete mode 100644 backend/ppocr/losses/ace_loss.py delete mode 100644 backend/ppocr/losses/basic_loss.py delete mode 100644 backend/ppocr/losses/center_loss.py delete mode 100755 backend/ppocr/losses/cls_loss.py delete mode 100644 backend/ppocr/losses/combined_loss.py delete mode 100644 backend/ppocr/losses/det_basic_loss.py delete mode 100755 backend/ppocr/losses/det_db_loss.py delete mode 100644 backend/ppocr/losses/det_east_loss.py delete mode 100644 backend/ppocr/losses/det_fce_loss.py delete mode 100644 backend/ppocr/losses/det_pse_loss.py delete mode 100644 backend/ppocr/losses/det_sast_loss.py delete mode 100644 backend/ppocr/losses/distillation_loss.py delete mode 100644 backend/ppocr/losses/e2e_pg_loss.py delete mode 100644 backend/ppocr/losses/kie_sdmgr_loss.py delete mode 100644 backend/ppocr/losses/rec_aster_loss.py delete mode 100644 backend/ppocr/losses/rec_att_loss.py delete mode 100755 backend/ppocr/losses/rec_ctc_loss.py delete mode 100644 backend/ppocr/losses/rec_enhanced_ctc_loss.py delete mode 100644 backend/ppocr/losses/rec_multi_loss.py delete mode 100644 backend/ppocr/losses/rec_nrtr_loss.py delete mode 100644 backend/ppocr/losses/rec_pren_loss.py delete mode 100644 backend/ppocr/losses/rec_sar_loss.py delete mode 100644 backend/ppocr/losses/rec_srn_loss.py delete mode 100644 backend/ppocr/losses/table_att_loss.py delete mode 100755 backend/ppocr/losses/vqa_token_layoutlm_loss.py delete mode 100644 backend/ppocr/metrics/__init__.py delete mode 100644 backend/ppocr/metrics/cls_metric.py delete mode 100644 backend/ppocr/metrics/det_metric.py delete mode 100644 backend/ppocr/metrics/distillation_metric.py delete mode 100644 backend/ppocr/metrics/e2e_metric.py delete mode 100644 backend/ppocr/metrics/eval_det_iou.py delete mode 100644 backend/ppocr/metrics/kie_metric.py delete mode 100644 backend/ppocr/metrics/rec_metric.py delete mode 100644 backend/ppocr/metrics/table_metric.py delete mode 100644 backend/ppocr/metrics/vqa_token_re_metric.py delete mode 100644 backend/ppocr/metrics/vqa_token_ser_metric.py delete mode 100755 backend/ppocr/modeling/architectures/__init__.py delete mode 100644 backend/ppocr/modeling/architectures/base_model.py delete mode 100644 backend/ppocr/modeling/architectures/distillation_model.py delete mode 100755 backend/ppocr/modeling/backbones/__init__.py delete mode 100755 backend/ppocr/modeling/backbones/det_mobilenet_v3.py delete mode 100644 backend/ppocr/modeling/backbones/det_resnet_vd.py delete mode 100644 backend/ppocr/modeling/backbones/det_resnet_vd_sast.py delete mode 100644 backend/ppocr/modeling/backbones/e2e_resnet_vd_pg.py delete mode 100644 backend/ppocr/modeling/backbones/kie_unet_sdmgr.py delete mode 100644 backend/ppocr/modeling/backbones/rec_efficientb3_pren.py delete mode 100644 backend/ppocr/modeling/backbones/rec_micronet.py delete mode 100644 backend/ppocr/modeling/backbones/rec_mobilenet_v3.py delete mode 100644 backend/ppocr/modeling/backbones/rec_mv1_enhance.py delete mode 100644 backend/ppocr/modeling/backbones/rec_nrtr_mtb.py delete mode 100644 backend/ppocr/modeling/backbones/rec_resnet_31.py delete mode 100644 backend/ppocr/modeling/backbones/rec_resnet_aster.py delete mode 100644 backend/ppocr/modeling/backbones/rec_resnet_fpn.py delete mode 100644 backend/ppocr/modeling/backbones/rec_resnet_vd.py delete mode 100644 backend/ppocr/modeling/backbones/rec_svtrnet.py delete mode 100644 backend/ppocr/modeling/backbones/vqa_layoutlm.py delete mode 100755 backend/ppocr/modeling/heads/__init__.py delete mode 100644 backend/ppocr/modeling/heads/cls_head.py delete mode 100644 backend/ppocr/modeling/heads/det_db_head.py delete mode 100644 backend/ppocr/modeling/heads/det_east_head.py delete mode 100644 backend/ppocr/modeling/heads/det_fce_head.py delete mode 100644 backend/ppocr/modeling/heads/det_pse_head.py delete mode 100644 backend/ppocr/modeling/heads/det_sast_head.py delete mode 100644 backend/ppocr/modeling/heads/e2e_pg_head.py delete mode 100644 backend/ppocr/modeling/heads/kie_sdmgr_head.py delete mode 100755 backend/ppocr/modeling/heads/multiheadAttention.py delete mode 100644 backend/ppocr/modeling/heads/rec_aster_head.py delete mode 100644 backend/ppocr/modeling/heads/rec_att_head.py delete mode 100755 backend/ppocr/modeling/heads/rec_ctc_head.py delete mode 100644 backend/ppocr/modeling/heads/rec_multi_head.py delete mode 100644 backend/ppocr/modeling/heads/rec_nrtr_head.py delete mode 100644 backend/ppocr/modeling/heads/rec_pren_head.py delete mode 100644 backend/ppocr/modeling/heads/rec_sar_head.py delete mode 100644 backend/ppocr/modeling/heads/rec_srn_head.py delete mode 100644 backend/ppocr/modeling/heads/self_attention.py delete mode 100644 backend/ppocr/modeling/heads/table_att_head.py delete mode 100644 backend/ppocr/modeling/necks/__init__.py delete mode 100644 backend/ppocr/modeling/necks/db_fpn.py delete mode 100644 backend/ppocr/modeling/necks/east_fpn.py delete mode 100644 backend/ppocr/modeling/necks/fce_fpn.py delete mode 100644 backend/ppocr/modeling/necks/fpn.py delete mode 100644 backend/ppocr/modeling/necks/pg_fpn.py delete mode 100644 backend/ppocr/modeling/necks/pren_fpn.py delete mode 100644 backend/ppocr/modeling/necks/rnn.py delete mode 100644 backend/ppocr/modeling/necks/sast_fpn.py delete mode 100644 backend/ppocr/modeling/necks/table_fpn.py delete mode 100755 backend/ppocr/modeling/transforms/__init__.py delete mode 100644 backend/ppocr/modeling/transforms/stn.py delete mode 100644 backend/ppocr/modeling/transforms/tps.py delete mode 100644 backend/ppocr/modeling/transforms/tps_spatial_transformer.py delete mode 100644 backend/ppocr/optimizer/__init__.py delete mode 100644 backend/ppocr/optimizer/learning_rate.py delete mode 100644 backend/ppocr/optimizer/lr_scheduler.py delete mode 100644 backend/ppocr/optimizer/optimizer.py delete mode 100644 backend/ppocr/optimizer/regularizer.py delete mode 100644 backend/ppocr/postprocess/__init__.py delete mode 100644 backend/ppocr/postprocess/cls_postprocess.py delete mode 100755 backend/ppocr/postprocess/db_postprocess.py delete mode 100755 backend/ppocr/postprocess/east_postprocess.py delete mode 100755 backend/ppocr/postprocess/fce_postprocess.py delete mode 100644 backend/ppocr/postprocess/locality_aware_nms.py delete mode 100644 backend/ppocr/postprocess/pg_postprocess.py delete mode 100644 backend/ppocr/postprocess/pse_postprocess/__init__.py delete mode 100644 backend/ppocr/postprocess/pse_postprocess/pse/README.md delete mode 100644 backend/ppocr/postprocess/pse_postprocess/pse/__init__.py delete mode 100644 backend/ppocr/postprocess/pse_postprocess/pse/pse.pyx delete mode 100644 backend/ppocr/postprocess/pse_postprocess/pse/setup.py delete mode 100755 backend/ppocr/postprocess/pse_postprocess/pse_postprocess.py delete mode 100644 backend/ppocr/postprocess/rec_postprocess.py delete mode 100755 backend/ppocr/postprocess/sast_postprocess.py delete mode 100644 backend/ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py delete mode 100644 backend/ppocr/postprocess/vqa_token_ser_layoutlm_postprocess.py delete mode 100755 backend/ppocr/utils/__init__.py delete mode 100644 backend/ppocr/utils/dict/ar_dict.txt delete mode 100644 backend/ppocr/utils/dict/arabic_dict.txt delete mode 100644 backend/ppocr/utils/dict/be_dict.txt delete mode 100644 backend/ppocr/utils/dict/bg_dict.txt delete mode 100644 backend/ppocr/utils/dict/ch_dict.txt delete mode 100644 backend/ppocr/utils/dict/chinese_cht_dict.txt delete mode 100644 backend/ppocr/utils/dict/cyrillic_dict.txt delete mode 100644 backend/ppocr/utils/dict/devanagari_dict.txt delete mode 100644 backend/ppocr/utils/dict/en_dict.txt delete mode 100644 backend/ppocr/utils/dict/es_dict.txt delete mode 100644 backend/ppocr/utils/dict/fa_dict.txt delete mode 100644 backend/ppocr/utils/dict/french_dict.txt delete mode 100644 backend/ppocr/utils/dict/german_dict.txt delete mode 100644 backend/ppocr/utils/dict/hi_dict.txt delete mode 100644 backend/ppocr/utils/dict/it_dict.txt delete mode 100644 backend/ppocr/utils/dict/japan_dict.txt delete mode 100644 backend/ppocr/utils/dict/ka_dict.txt delete mode 100644 backend/ppocr/utils/dict/kie_dict/xfund_class_list.txt delete mode 100644 backend/ppocr/utils/dict/kn_dict.txt delete mode 100644 backend/ppocr/utils/dict/korean_dict.txt delete mode 100644 backend/ppocr/utils/dict/latin_dict.txt delete mode 100644 backend/ppocr/utils/dict/layout_dict/layout_cdla_dict.txt delete mode 100644 backend/ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt delete mode 100644 backend/ppocr/utils/dict/layout_dict/layout_table_dict.txt delete mode 100644 backend/ppocr/utils/dict/mr_dict.txt delete mode 100644 backend/ppocr/utils/dict/ne_dict.txt delete mode 100644 backend/ppocr/utils/dict/oc_dict.txt delete mode 100644 backend/ppocr/utils/dict/pt_dict.txt delete mode 100644 backend/ppocr/utils/dict/pu_dict.txt delete mode 100644 backend/ppocr/utils/dict/rs_cyrillic_dict.txt delete mode 100644 backend/ppocr/utils/dict/rs_dict.txt delete mode 100644 backend/ppocr/utils/dict/rs_latin_dict.txt delete mode 100644 backend/ppocr/utils/dict/rsc_dict.txt delete mode 100644 backend/ppocr/utils/dict/ru_dict.txt delete mode 100644 backend/ppocr/utils/dict/spin_dict.txt delete mode 100644 backend/ppocr/utils/dict/ta_dict.txt delete mode 100644 backend/ppocr/utils/dict/table_dict.txt delete mode 100644 backend/ppocr/utils/dict/table_master_structure_dict.txt delete mode 100644 backend/ppocr/utils/dict/table_structure_dict.txt delete mode 100644 backend/ppocr/utils/dict/table_structure_dict_ch.txt delete mode 100644 backend/ppocr/utils/dict/te_dict.txt delete mode 100644 backend/ppocr/utils/dict/ug_dict.txt delete mode 100644 backend/ppocr/utils/dict/uk_dict.txt delete mode 100644 backend/ppocr/utils/dict/ur_dict.txt delete mode 100644 backend/ppocr/utils/dict/xi_dict.txt delete mode 100755 backend/ppocr/utils/e2e_metric/Deteval.py delete mode 100755 backend/ppocr/utils/e2e_metric/polygon_fast.py delete mode 100644 backend/ppocr/utils/e2e_utils/extract_batchsize.py delete mode 100644 backend/ppocr/utils/e2e_utils/extract_textpoint_fast.py delete mode 100644 backend/ppocr/utils/e2e_utils/extract_textpoint_slow.py delete mode 100644 backend/ppocr/utils/e2e_utils/pgnet_pp_utils.py delete mode 100644 backend/ppocr/utils/e2e_utils/visual.py delete mode 100644 backend/ppocr/utils/iou.py delete mode 100644 backend/ppocr/utils/loggers/__init__.py delete mode 100644 backend/ppocr/utils/loggers/base_logger.py delete mode 100644 backend/ppocr/utils/loggers/loggers.py delete mode 100644 backend/ppocr/utils/loggers/vdl_logger.py delete mode 100644 backend/ppocr/utils/loggers/wandb_logger.py delete mode 100644 backend/ppocr/utils/logging.py delete mode 100644 backend/ppocr/utils/network.py delete mode 100644 backend/ppocr/utils/poly_nms.py delete mode 100644 backend/ppocr/utils/profiler.py delete mode 100644 backend/ppocr/utils/save_load.py delete mode 100755 backend/ppocr/utils/stats.py delete mode 100755 backend/ppocr/utils/utility.py delete mode 100644 backend/ppocr/utils/visual.py delete mode 100755 backend/tools/eval.py delete mode 100644 backend/tools/export_center.py delete mode 100755 backend/tools/export_model.py delete mode 100755 backend/tools/infer/predict_cls.py delete mode 100755 backend/tools/infer/predict_det.py delete mode 100755 backend/tools/infer/predict_e2e.py delete mode 100755 backend/tools/infer/predict_rec.py delete mode 100755 backend/tools/infer/predict_system.py delete mode 100644 backend/tools/infer/utility.py delete mode 100755 backend/tools/infer_cls.py delete mode 100755 backend/tools/infer_det.py delete mode 100755 backend/tools/infer_e2e.py delete mode 100755 backend/tools/infer_kie.py delete mode 100755 backend/tools/infer_rec.py delete mode 100644 backend/tools/infer_table.py delete mode 100755 backend/tools/infer_vqa_token_ser.py delete mode 100755 backend/tools/infer_vqa_token_ser_re.py delete mode 100755 backend/tools/program.py delete mode 100755 backend/tools/test_hubserving.py delete mode 100755 backend/tools/train.py diff --git a/backend/config.py b/backend/config.py index 337f236d..dfdb2d06 100644 --- a/backend/config.py +++ b/backend/config.py @@ -104,8 +104,6 @@ DET_MODEL_BASE = os.path.join(BASE_DIR, 'models') # 设置文本识别模型 + 字典 REC_MODEL_BASE = os.path.join(BASE_DIR, 'models') -# 默认字典路径为中文 -DICT_BASE = os.path.join(BASE_DIR, 'ppocr', 'utils', 'dict') # V3, V4模型默认图形识别的shape为3, 48, 320 REC_IMAGE_SHAPE = '3,48,320' REC_MODEL_PATH = os.path.join(REC_MODEL_BASE, MODEL_VERSION, f'{REC_CHAR_TYPE}_rec') @@ -133,8 +131,6 @@ MULTI_LANG = LATIN_LANG + ARABIC_LANG + CYRILLIC_LANG + DEVANAGARI_LANG + \ OTHER_LANG -# 定义字典路径 -DICT_PATH = os.path.join(DICT_BASE, f'{REC_CHAR_TYPE}_dict.txt') DET_MODEL_FAST_PATH = os.path.join(DET_MODEL_BASE, MODEL_VERSION, 'ch_det_fast') @@ -153,7 +149,6 @@ # 英文模式的ch模型识别效果好于fast if REC_CHAR_TYPE == 'en': REC_MODEL_PATH = os.path.join(REC_MODEL_BASE, MODEL_VERSION, f'ch_rec') - DICT_PATH = os.path.join(DICT_BASE, f'ch_dict.txt') else: REC_MODEL_PATH = os.path.join(REC_MODEL_BASE, MODEL_VERSION, f'{REC_CHAR_TYPE}_rec') else: @@ -176,16 +171,12 @@ if REC_CHAR_TYPE in LATIN_LANG: REC_MODEL_PATH = os.path.join(REC_MODEL_BASE, MODEL_VERSION, f'latin_rec_fast') - DICT_PATH = os.path.join(DICT_BASE, f'latin_dict.txt') elif REC_CHAR_TYPE in ARABIC_LANG: REC_MODEL_PATH = os.path.join(REC_MODEL_BASE, MODEL_VERSION, f'arabic_rec_fast') - DICT_PATH = os.path.join(DICT_BASE, f'arabic_dict.txt') elif REC_CHAR_TYPE in CYRILLIC_LANG: REC_MODEL_PATH = os.path.join(REC_MODEL_BASE, MODEL_VERSION, f'cyrillic_rec_fast') - DICT_PATH = os.path.join(DICT_BASE, f'cyrillic_dict.txt') elif REC_CHAR_TYPE in DEVANAGARI_LANG: REC_MODEL_PATH = os.path.join(REC_MODEL_BASE, MODEL_VERSION, f'devanagari_rec_fast') - DICT_PATH = os.path.join(DICT_BASE, f'devanagari_dict.txt') # 定义图像识别shape if MODEL_VERSION == 'V2': diff --git a/backend/main.py b/backend/main.py index 71ac4bff..4723e592 100644 --- a/backend/main.py +++ b/backend/main.py @@ -23,10 +23,9 @@ import importlib import config from tools import reformat -from tools.infer import utility -from tools.infer.predict_det import TextDetector -from tools.ocr import OcrRecogniser, get_coordinates -from tools import subtitle_ocr + +from backend.tools.ocr import OcrRecogniser, get_coordinates +from backend.tools import subtitle_ocr import threading import platform import multiprocessing @@ -40,6 +39,8 @@ class SubtitleDetect: """ def __init__(self): + from paddleocr.tools.infer import utility + from paddleocr.tools.infer.predict_det import TextDetector # 获取参数对象 importlib.reload(config) args = utility.parse_args() diff --git a/backend/ppocr/__init__.py b/backend/ppocr/__init__.py deleted file mode 100755 index e438e531..00000000 --- a/backend/ppocr/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import warnings -warnings.filterwarnings("ignore", category=Warning) -warnings.filterwarnings("ignore", category=DeprecationWarning) diff --git a/backend/ppocr/data/__init__.py b/backend/ppocr/data/__init__.py deleted file mode 100644 index 78c32796..00000000 --- a/backend/ppocr/data/__init__.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import os -import sys -import numpy as np -import skimage -import paddle -import signal -import random - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) - -import copy -from paddle.io import Dataset, DataLoader, BatchSampler, DistributedBatchSampler -import paddle.distributed as dist - -from ppocr.data.imaug import transform, create_operators -from ppocr.data.simple_dataset import SimpleDataSet -from ppocr.data.lmdb_dataset import LMDBDataSet -from ppocr.data.pgnet_dataset import PGDataSet -from ppocr.data.pubtab_dataset import PubTabDataSet - -__all__ = ['build_dataloader', 'transform', 'create_operators'] - - -def term_mp(sig_num, frame): - """ kill all child processes - """ - pid = os.getpid() - pgid = os.getpgid(os.getpid()) - print("main proc {} exit, kill process group " "{}".format(pid, pgid)) - os.killpg(pgid, signal.SIGKILL) - - -def build_dataloader(config, mode, device, logger, seed=None): - config = copy.deepcopy(config) - - support_dict = [ - 'SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet' - ] - module_name = config[mode]['dataset']['name'] - assert module_name in support_dict, Exception( - 'DataSet only support {}'.format(support_dict)) - assert mode in ['Train', 'Eval', 'Test' - ], "Mode should be Train, Eval or Test." - - dataset = eval(module_name)(config, mode, logger, seed) - loader_config = config[mode]['loader'] - batch_size = loader_config['batch_size_per_card'] - drop_last = loader_config['drop_last'] - shuffle = loader_config['shuffle'] - num_workers = loader_config['num_workers'] - if 'use_shared_memory' in loader_config.keys(): - use_shared_memory = loader_config['use_shared_memory'] - else: - use_shared_memory = True - - if mode == "Train": - # Distribute data to multiple cards - batch_sampler = DistributedBatchSampler( - dataset=dataset, - batch_size=batch_size, - shuffle=shuffle, - drop_last=drop_last) - else: - # Distribute data to single card - batch_sampler = BatchSampler( - dataset=dataset, - batch_size=batch_size, - shuffle=shuffle, - drop_last=drop_last) - - if 'collate_fn' in loader_config: - from . import collate_fn - collate_fn = getattr(collate_fn, loader_config['collate_fn'])() - else: - collate_fn = None - data_loader = DataLoader( - dataset=dataset, - batch_sampler=batch_sampler, - places=device, - num_workers=num_workers, - return_list=True, - use_shared_memory=use_shared_memory, - collate_fn=collate_fn) - - # support exit using ctrl+c - signal.signal(signal.SIGINT, term_mp) - signal.signal(signal.SIGTERM, term_mp) - - return data_loader diff --git a/backend/ppocr/data/collate_fn.py b/backend/ppocr/data/collate_fn.py deleted file mode 100644 index 0da6060f..00000000 --- a/backend/ppocr/data/collate_fn.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle -import numbers -import numpy as np -from collections import defaultdict - - -class DictCollator(object): - """ - data batch - """ - - def __call__(self, batch): - # todo:support batch operators - data_dict = defaultdict(list) - to_tensor_keys = [] - for sample in batch: - for k, v in sample.items(): - if isinstance(v, (np.ndarray, paddle.Tensor, numbers.Number)): - if k not in to_tensor_keys: - to_tensor_keys.append(k) - data_dict[k].append(v) - for k in to_tensor_keys: - data_dict[k] = paddle.to_tensor(data_dict[k]) - return data_dict - - -class ListCollator(object): - """ - data batch - """ - - def __call__(self, batch): - # todo:support batch operators - data_dict = defaultdict(list) - to_tensor_idxs = [] - for sample in batch: - for idx, v in enumerate(sample): - if isinstance(v, (np.ndarray, paddle.Tensor, numbers.Number)): - if idx not in to_tensor_idxs: - to_tensor_idxs.append(idx) - data_dict[idx].append(v) - for idx in to_tensor_idxs: - data_dict[idx] = paddle.to_tensor(data_dict[idx]) - return list(data_dict.values()) - - -class SSLRotateCollate(object): - """ - bach: [ - [(4*3xH*W), (4,)] - [(4*3xH*W), (4,)] - ... - ] - """ - - def __call__(self, batch): - output = [np.concatenate(d, axis=0) for d in zip(*batch)] - return output diff --git a/backend/ppocr/data/imaug/ColorJitter.py b/backend/ppocr/data/imaug/ColorJitter.py deleted file mode 100644 index 4b542abc..00000000 --- a/backend/ppocr/data/imaug/ColorJitter.py +++ /dev/null @@ -1,26 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from paddle.vision.transforms import ColorJitter as pp_ColorJitter - -__all__ = ['ColorJitter'] - -class ColorJitter(object): - def __init__(self, brightness=0, contrast=0, saturation=0, hue=0,**kwargs): - self.aug = pp_ColorJitter(brightness, contrast, saturation, hue) - - def __call__(self, data): - image = data['image'] - image = self.aug(image) - data['image'] = image - return data diff --git a/backend/ppocr/data/imaug/__init__.py b/backend/ppocr/data/imaug/__init__.py deleted file mode 100644 index 548832fb..00000000 --- a/backend/ppocr/data/imaug/__init__.py +++ /dev/null @@ -1,74 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from .iaa_augment import IaaAugment -from .make_border_map import MakeBorderMap -from .make_shrink_map import MakeShrinkMap -from .random_crop_data import EastRandomCropData, RandomCropImgMask -from .make_pse_gt import MakePseGt - -from .rec_img_aug import RecAug, RecConAug, RecResizeImg, ClsResizeImg, \ - SRNRecResizeImg, NRTRRecResizeImg, SARRecResizeImg, PRENResizeImg -from .ssl_img_aug import SSLRotateResize -from .randaugment import RandAugment -from .copy_paste import CopyPaste -from .ColorJitter import ColorJitter -from .operators import * -from .label_ops import * - -from .east_process import * -from .sast_process import * -from .pg_process import * -from .gen_table_mask import * - -from .vqa import * - -from .fce_aug import * -from .fce_targets import FCENetTargets - - -def transform(data, ops=None): - """ transform """ - if ops is None: - ops = [] - for op in ops: - data = op(data) - if data is None: - return None - return data - - -def create_operators(op_param_list, global_config=None): - """ - create operators based on the config - - Args: - params(list): a dict list, used to create some operators - """ - assert isinstance(op_param_list, list), ('operator config should be a list') - ops = [] - for operator in op_param_list: - assert isinstance(operator, - dict) and len(operator) == 1, "yaml format error" - op_name = list(operator)[0] - param = {} if operator[op_name] is None else operator[op_name] - if global_config is not None: - param.update(global_config) - op = eval(op_name)(**param) - ops.append(op) - return ops diff --git a/backend/ppocr/data/imaug/copy_paste.py b/backend/ppocr/data/imaug/copy_paste.py deleted file mode 100644 index 0b3386c8..00000000 --- a/backend/ppocr/data/imaug/copy_paste.py +++ /dev/null @@ -1,170 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import copy -import cv2 -import random -import numpy as np -from PIL import Image -from shapely.geometry import Polygon - -from ppocr.data.imaug.iaa_augment import IaaAugment -from ppocr.data.imaug.random_crop_data import is_poly_outside_rect -from tools.infer.utility import get_rotate_crop_image - - -class CopyPaste(object): - def __init__(self, objects_paste_ratio=0.2, limit_paste=True, **kwargs): - self.ext_data_num = 1 - self.objects_paste_ratio = objects_paste_ratio - self.limit_paste = limit_paste - augmenter_args = [{'type': 'Resize', 'args': {'size': [0.5, 3]}}] - self.aug = IaaAugment(augmenter_args) - - def __call__(self, data): - point_num = data['polys'].shape[1] - src_img = data['image'] - src_polys = data['polys'].tolist() - src_ignores = data['ignore_tags'].tolist() - ext_data = data['ext_data'][0] - ext_image = ext_data['image'] - ext_polys = ext_data['polys'] - ext_ignores = ext_data['ignore_tags'] - - indexs = [i for i in range(len(ext_ignores)) if not ext_ignores[i]] - select_num = max( - 1, min(int(self.objects_paste_ratio * len(ext_polys)), 30)) - - random.shuffle(indexs) - select_idxs = indexs[:select_num] - select_polys = ext_polys[select_idxs] - select_ignores = ext_ignores[select_idxs] - - src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB) - ext_image = cv2.cvtColor(ext_image, cv2.COLOR_BGR2RGB) - src_img = Image.fromarray(src_img).convert('RGBA') - for poly, tag in zip(select_polys, select_ignores): - box_img = get_rotate_crop_image(ext_image, poly) - - src_img, box = self.paste_img(src_img, box_img, src_polys) - if box is not None: - box = box.tolist() - for _ in range(len(box), point_num): - box.append(box[-1]) - src_polys.append(box) - src_ignores.append(tag) - src_img = cv2.cvtColor(np.array(src_img), cv2.COLOR_RGB2BGR) - h, w = src_img.shape[:2] - src_polys = np.array(src_polys) - src_polys[:, :, 0] = np.clip(src_polys[:, :, 0], 0, w) - src_polys[:, :, 1] = np.clip(src_polys[:, :, 1], 0, h) - data['image'] = src_img - data['polys'] = src_polys - data['ignore_tags'] = np.array(src_ignores) - return data - - def paste_img(self, src_img, box_img, src_polys): - box_img_pil = Image.fromarray(box_img).convert('RGBA') - src_w, src_h = src_img.size - box_w, box_h = box_img_pil.size - - angle = np.random.randint(0, 360) - box = np.array([[[0, 0], [box_w, 0], [box_w, box_h], [0, box_h]]]) - box = rotate_bbox(box_img, box, angle)[0] - box_img_pil = box_img_pil.rotate(angle, expand=1) - box_w, box_h = box_img_pil.width, box_img_pil.height - if src_w - box_w < 0 or src_h - box_h < 0: - return src_img, None - - paste_x, paste_y = self.select_coord(src_polys, box, src_w - box_w, - src_h - box_h) - if paste_x is None: - return src_img, None - box[:, 0] += paste_x - box[:, 1] += paste_y - r, g, b, A = box_img_pil.split() - src_img.paste(box_img_pil, (paste_x, paste_y), mask=A) - - return src_img, box - - def select_coord(self, src_polys, box, endx, endy): - if self.limit_paste: - xmin, ymin, xmax, ymax = box[:, 0].min(), box[:, 1].min( - ), box[:, 0].max(), box[:, 1].max() - for _ in range(50): - paste_x = random.randint(0, endx) - paste_y = random.randint(0, endy) - xmin1 = xmin + paste_x - xmax1 = xmax + paste_x - ymin1 = ymin + paste_y - ymax1 = ymax + paste_y - - num_poly_in_rect = 0 - for poly in src_polys: - if not is_poly_outside_rect(poly, xmin1, ymin1, - xmax1 - xmin1, ymax1 - ymin1): - num_poly_in_rect += 1 - break - if num_poly_in_rect == 0: - return paste_x, paste_y - return None, None - else: - paste_x = random.randint(0, endx) - paste_y = random.randint(0, endy) - return paste_x, paste_y - - -def get_union(pD, pG): - return Polygon(pD).union(Polygon(pG)).area - - -def get_intersection_over_union(pD, pG): - return get_intersection(pD, pG) / get_union(pD, pG) - - -def get_intersection(pD, pG): - return Polygon(pD).intersection(Polygon(pG)).area - - -def rotate_bbox(img, text_polys, angle, scale=1): - """ - from https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/augment.py - Args: - img: np.ndarray - text_polys: np.ndarray N*4*2 - angle: int - scale: int - - Returns: - - """ - w = img.shape[1] - h = img.shape[0] - - rangle = np.deg2rad(angle) - nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) - nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) - rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale) - rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0])) - rot_mat[0, 2] += rot_move[0] - rot_mat[1, 2] += rot_move[1] - - # ---------------------- rotate box ---------------------- - rot_text_polys = list() - for bbox in text_polys: - point1 = np.dot(rot_mat, np.array([bbox[0, 0], bbox[0, 1], 1])) - point2 = np.dot(rot_mat, np.array([bbox[1, 0], bbox[1, 1], 1])) - point3 = np.dot(rot_mat, np.array([bbox[2, 0], bbox[2, 1], 1])) - point4 = np.dot(rot_mat, np.array([bbox[3, 0], bbox[3, 1], 1])) - rot_text_polys.append([point1, point2, point3, point4]) - return np.array(rot_text_polys, dtype=np.float32) diff --git a/backend/ppocr/data/imaug/east_process.py b/backend/ppocr/data/imaug/east_process.py deleted file mode 100644 index df08adfa..00000000 --- a/backend/ppocr/data/imaug/east_process.py +++ /dev/null @@ -1,436 +0,0 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -""" -This code is refered from: -https://github.com/songdejia/EAST/blob/master/data_utils.py -""" -import math -import cv2 -import numpy as np -import json -import sys -import os - -__all__ = ['EASTProcessTrain'] - - -class EASTProcessTrain(object): - def __init__(self, - image_shape=[512, 512], - background_ratio=0.125, - min_crop_side_ratio=0.1, - min_text_size=10, - **kwargs): - self.input_size = image_shape[1] - self.random_scale = np.array([0.5, 1, 2.0, 3.0]) - self.background_ratio = background_ratio - self.min_crop_side_ratio = min_crop_side_ratio - self.min_text_size = min_text_size - - def preprocess(self, im): - input_size = self.input_size - im_shape = im.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - im_scale = float(input_size) / float(im_size_max) - im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale) - img_mean = [0.485, 0.456, 0.406] - img_std = [0.229, 0.224, 0.225] - # im = im[:, :, ::-1].astype(np.float32) - im = im / 255 - im -= img_mean - im /= img_std - new_h, new_w, _ = im.shape - im_padded = np.zeros((input_size, input_size, 3), dtype=np.float32) - im_padded[:new_h, :new_w, :] = im - im_padded = im_padded.transpose((2, 0, 1)) - im_padded = im_padded[np.newaxis, :] - return im_padded, im_scale - - def rotate_im_poly(self, im, text_polys): - """ - rotate image with 90 / 180 / 270 degre - """ - im_w, im_h = im.shape[1], im.shape[0] - dst_im = im.copy() - dst_polys = [] - rand_degree_ratio = np.random.rand() - rand_degree_cnt = 1 - if 0.333 < rand_degree_ratio < 0.666: - rand_degree_cnt = 2 - elif rand_degree_ratio > 0.666: - rand_degree_cnt = 3 - for i in range(rand_degree_cnt): - dst_im = np.rot90(dst_im) - rot_degree = -90 * rand_degree_cnt - rot_angle = rot_degree * math.pi / 180.0 - n_poly = text_polys.shape[0] - cx, cy = 0.5 * im_w, 0.5 * im_h - ncx, ncy = 0.5 * dst_im.shape[1], 0.5 * dst_im.shape[0] - for i in range(n_poly): - wordBB = text_polys[i] - poly = [] - for j in range(4): - sx, sy = wordBB[j][0], wordBB[j][1] - dx = math.cos(rot_angle) * (sx - cx)\ - - math.sin(rot_angle) * (sy - cy) + ncx - dy = math.sin(rot_angle) * (sx - cx)\ - + math.cos(rot_angle) * (sy - cy) + ncy - poly.append([dx, dy]) - dst_polys.append(poly) - dst_polys = np.array(dst_polys, dtype=np.float32) - return dst_im, dst_polys - - def polygon_area(self, poly): - """ - compute area of a polygon - :param poly: - :return: - """ - edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]), - (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]), - (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]), - (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])] - return np.sum(edge) / 2. - - def check_and_validate_polys(self, polys, tags, img_height, img_width): - """ - check so that the text poly is in the same direction, - and also filter some invalid polygons - :param polys: - :param tags: - :return: - """ - h, w = img_height, img_width - if polys.shape[0] == 0: - return polys - polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1) - polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1) - - validated_polys = [] - validated_tags = [] - for poly, tag in zip(polys, tags): - p_area = self.polygon_area(poly) - #invalid poly - if abs(p_area) < 1: - continue - if p_area > 0: - #'poly in wrong direction' - if not tag: - tag = True #reversed cases should be ignore - poly = poly[(0, 3, 2, 1), :] - validated_polys.append(poly) - validated_tags.append(tag) - return np.array(validated_polys), np.array(validated_tags) - - def draw_img_polys(self, img, polys): - if len(img.shape) == 4: - img = np.squeeze(img, axis=0) - if img.shape[0] == 3: - img = img.transpose((1, 2, 0)) - img[:, :, 2] += 123.68 - img[:, :, 1] += 116.78 - img[:, :, 0] += 103.94 - cv2.imwrite("tmp.jpg", img) - img = cv2.imread("tmp.jpg") - for box in polys: - box = box.astype(np.int32).reshape((-1, 1, 2)) - cv2.polylines(img, [box], True, color=(255, 255, 0), thickness=2) - import random - ino = random.randint(0, 100) - cv2.imwrite("tmp_%d.jpg" % ino, img) - return - - def shrink_poly(self, poly, r): - """ - fit a poly inside the origin poly, maybe bugs here... - used for generate the score map - :param poly: the text poly - :param r: r in the paper - :return: the shrinked poly - """ - # shrink ratio - R = 0.3 - # find the longer pair - dist0 = np.linalg.norm(poly[0] - poly[1]) - dist1 = np.linalg.norm(poly[2] - poly[3]) - dist2 = np.linalg.norm(poly[0] - poly[3]) - dist3 = np.linalg.norm(poly[1] - poly[2]) - if dist0 + dist1 > dist2 + dist3: - # first move (p0, p1), (p2, p3), then (p0, p3), (p1, p2) - ## p0, p1 - theta = np.arctan2((poly[1][1] - poly[0][1]), - (poly[1][0] - poly[0][0])) - poly[0][0] += R * r[0] * np.cos(theta) - poly[0][1] += R * r[0] * np.sin(theta) - poly[1][0] -= R * r[1] * np.cos(theta) - poly[1][1] -= R * r[1] * np.sin(theta) - ## p2, p3 - theta = np.arctan2((poly[2][1] - poly[3][1]), - (poly[2][0] - poly[3][0])) - poly[3][0] += R * r[3] * np.cos(theta) - poly[3][1] += R * r[3] * np.sin(theta) - poly[2][0] -= R * r[2] * np.cos(theta) - poly[2][1] -= R * r[2] * np.sin(theta) - ## p0, p3 - theta = np.arctan2((poly[3][0] - poly[0][0]), - (poly[3][1] - poly[0][1])) - poly[0][0] += R * r[0] * np.sin(theta) - poly[0][1] += R * r[0] * np.cos(theta) - poly[3][0] -= R * r[3] * np.sin(theta) - poly[3][1] -= R * r[3] * np.cos(theta) - ## p1, p2 - theta = np.arctan2((poly[2][0] - poly[1][0]), - (poly[2][1] - poly[1][1])) - poly[1][0] += R * r[1] * np.sin(theta) - poly[1][1] += R * r[1] * np.cos(theta) - poly[2][0] -= R * r[2] * np.sin(theta) - poly[2][1] -= R * r[2] * np.cos(theta) - else: - ## p0, p3 - # print poly - theta = np.arctan2((poly[3][0] - poly[0][0]), - (poly[3][1] - poly[0][1])) - poly[0][0] += R * r[0] * np.sin(theta) - poly[0][1] += R * r[0] * np.cos(theta) - poly[3][0] -= R * r[3] * np.sin(theta) - poly[3][1] -= R * r[3] * np.cos(theta) - ## p1, p2 - theta = np.arctan2((poly[2][0] - poly[1][0]), - (poly[2][1] - poly[1][1])) - poly[1][0] += R * r[1] * np.sin(theta) - poly[1][1] += R * r[1] * np.cos(theta) - poly[2][0] -= R * r[2] * np.sin(theta) - poly[2][1] -= R * r[2] * np.cos(theta) - ## p0, p1 - theta = np.arctan2((poly[1][1] - poly[0][1]), - (poly[1][0] - poly[0][0])) - poly[0][0] += R * r[0] * np.cos(theta) - poly[0][1] += R * r[0] * np.sin(theta) - poly[1][0] -= R * r[1] * np.cos(theta) - poly[1][1] -= R * r[1] * np.sin(theta) - ## p2, p3 - theta = np.arctan2((poly[2][1] - poly[3][1]), - (poly[2][0] - poly[3][0])) - poly[3][0] += R * r[3] * np.cos(theta) - poly[3][1] += R * r[3] * np.sin(theta) - poly[2][0] -= R * r[2] * np.cos(theta) - poly[2][1] -= R * r[2] * np.sin(theta) - return poly - - def generate_quad(self, im_size, polys, tags): - """ - Generate quadrangle. - """ - h, w = im_size - poly_mask = np.zeros((h, w), dtype=np.uint8) - score_map = np.zeros((h, w), dtype=np.uint8) - # (x1, y1, ..., x4, y4, short_edge_norm) - geo_map = np.zeros((h, w, 9), dtype=np.float32) - # mask used during traning, to ignore some hard areas - training_mask = np.ones((h, w), dtype=np.uint8) - for poly_idx, poly_tag in enumerate(zip(polys, tags)): - poly = poly_tag[0] - tag = poly_tag[1] - - r = [None, None, None, None] - for i in range(4): - dist1 = np.linalg.norm(poly[i] - poly[(i + 1) % 4]) - dist2 = np.linalg.norm(poly[i] - poly[(i - 1) % 4]) - r[i] = min(dist1, dist2) - # score map - shrinked_poly = self.shrink_poly( - poly.copy(), r).astype(np.int32)[np.newaxis, :, :] - cv2.fillPoly(score_map, shrinked_poly, 1) - cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1) - # if the poly is too small, then ignore it during training - poly_h = min( - np.linalg.norm(poly[0] - poly[3]), - np.linalg.norm(poly[1] - poly[2])) - poly_w = min( - np.linalg.norm(poly[0] - poly[1]), - np.linalg.norm(poly[2] - poly[3])) - if min(poly_h, poly_w) < self.min_text_size: - cv2.fillPoly(training_mask, - poly.astype(np.int32)[np.newaxis, :, :], 0) - - if tag: - cv2.fillPoly(training_mask, - poly.astype(np.int32)[np.newaxis, :, :], 0) - - xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1)) - # geo map. - y_in_poly = xy_in_poly[:, 0] - x_in_poly = xy_in_poly[:, 1] - poly[:, 0] = np.minimum(np.maximum(poly[:, 0], 0), w) - poly[:, 1] = np.minimum(np.maximum(poly[:, 1], 0), h) - for pno in range(4): - geo_channel_beg = pno * 2 - geo_map[y_in_poly, x_in_poly, geo_channel_beg] =\ - x_in_poly - poly[pno, 0] - geo_map[y_in_poly, x_in_poly, geo_channel_beg+1] =\ - y_in_poly - poly[pno, 1] - geo_map[y_in_poly, x_in_poly, 8] = \ - 1.0 / max(min(poly_h, poly_w), 1.0) - return score_map, geo_map, training_mask - - def crop_area(self, im, polys, tags, crop_background=False, max_tries=50): - """ - make random crop from the input image - :param im: - :param polys: - :param tags: - :param crop_background: - :param max_tries: - :return: - """ - h, w, _ = im.shape - pad_h = h // 10 - pad_w = w // 10 - h_array = np.zeros((h + pad_h * 2), dtype=np.int32) - w_array = np.zeros((w + pad_w * 2), dtype=np.int32) - for poly in polys: - poly = np.round(poly, decimals=0).astype(np.int32) - minx = np.min(poly[:, 0]) - maxx = np.max(poly[:, 0]) - w_array[minx + pad_w:maxx + pad_w] = 1 - miny = np.min(poly[:, 1]) - maxy = np.max(poly[:, 1]) - h_array[miny + pad_h:maxy + pad_h] = 1 - # ensure the cropped area not across a text - h_axis = np.where(h_array == 0)[0] - w_axis = np.where(w_array == 0)[0] - if len(h_axis) == 0 or len(w_axis) == 0: - return im, polys, tags - - for i in range(max_tries): - xx = np.random.choice(w_axis, size=2) - xmin = np.min(xx) - pad_w - xmax = np.max(xx) - pad_w - xmin = np.clip(xmin, 0, w - 1) - xmax = np.clip(xmax, 0, w - 1) - yy = np.random.choice(h_axis, size=2) - ymin = np.min(yy) - pad_h - ymax = np.max(yy) - pad_h - ymin = np.clip(ymin, 0, h - 1) - ymax = np.clip(ymax, 0, h - 1) - if xmax - xmin < self.min_crop_side_ratio * w or \ - ymax - ymin < self.min_crop_side_ratio * h: - # area too small - continue - if polys.shape[0] != 0: - poly_axis_in_area = (polys[:, :, 0] >= xmin)\ - & (polys[:, :, 0] <= xmax)\ - & (polys[:, :, 1] >= ymin)\ - & (polys[:, :, 1] <= ymax) - selected_polys = np.where( - np.sum(poly_axis_in_area, axis=1) == 4)[0] - else: - selected_polys = [] - - if len(selected_polys) == 0: - # no text in this area - if crop_background: - im = im[ymin:ymax + 1, xmin:xmax + 1, :] - polys = [] - tags = [] - return im, polys, tags - else: - continue - - im = im[ymin:ymax + 1, xmin:xmax + 1, :] - polys = polys[selected_polys] - tags = tags[selected_polys] - polys[:, :, 0] -= xmin - polys[:, :, 1] -= ymin - return im, polys, tags - return im, polys, tags - - def crop_background_infor(self, im, text_polys, text_tags): - im, text_polys, text_tags = self.crop_area( - im, text_polys, text_tags, crop_background=True) - - if len(text_polys) > 0: - return None - # pad and resize image - input_size = self.input_size - im, ratio = self.preprocess(im) - score_map = np.zeros((input_size, input_size), dtype=np.float32) - geo_map = np.zeros((input_size, input_size, 9), dtype=np.float32) - training_mask = np.ones((input_size, input_size), dtype=np.float32) - return im, score_map, geo_map, training_mask - - def crop_foreground_infor(self, im, text_polys, text_tags): - im, text_polys, text_tags = self.crop_area( - im, text_polys, text_tags, crop_background=False) - - if text_polys.shape[0] == 0: - return None - #continue for all ignore case - if np.sum((text_tags * 1.0)) >= text_tags.size: - return None - # pad and resize image - input_size = self.input_size - im, ratio = self.preprocess(im) - text_polys[:, :, 0] *= ratio - text_polys[:, :, 1] *= ratio - _, _, new_h, new_w = im.shape - # print(im.shape) - # self.draw_img_polys(im, text_polys) - score_map, geo_map, training_mask = self.generate_quad( - (new_h, new_w), text_polys, text_tags) - return im, score_map, geo_map, training_mask - - def __call__(self, data): - im = data['image'] - text_polys = data['polys'] - text_tags = data['ignore_tags'] - if im is None: - return None - if text_polys.shape[0] == 0: - return None - - #add rotate cases - if np.random.rand() < 0.5: - im, text_polys = self.rotate_im_poly(im, text_polys) - h, w, _ = im.shape - text_polys, text_tags = self.check_and_validate_polys(text_polys, - text_tags, h, w) - if text_polys.shape[0] == 0: - return None - - # random scale this image - rd_scale = np.random.choice(self.random_scale) - im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale) - text_polys *= rd_scale - if np.random.rand() < self.background_ratio: - outs = self.crop_background_infor(im, text_polys, text_tags) - else: - outs = self.crop_foreground_infor(im, text_polys, text_tags) - - if outs is None: - return None - im, score_map, geo_map, training_mask = outs - score_map = score_map[np.newaxis, ::4, ::4].astype(np.float32) - geo_map = np.swapaxes(geo_map, 1, 2) - geo_map = np.swapaxes(geo_map, 1, 0) - geo_map = geo_map[:, ::4, ::4].astype(np.float32) - training_mask = training_mask[np.newaxis, ::4, ::4] - training_mask = training_mask.astype(np.float32) - - data['image'] = im[0] - data['score_map'] = score_map - data['geo_map'] = geo_map - data['training_mask'] = training_mask - return data diff --git a/backend/ppocr/data/imaug/fce_aug.py b/backend/ppocr/data/imaug/fce_aug.py deleted file mode 100644 index 66bafef1..00000000 --- a/backend/ppocr/data/imaug/fce_aug.py +++ /dev/null @@ -1,564 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/transforms.py -""" -import numpy as np -from PIL import Image, ImageDraw -import cv2 -from shapely.geometry import Polygon -import math -from ppocr.utils.poly_nms import poly_intersection - - -class RandomScaling: - def __init__(self, size=800, scale=(3. / 4, 5. / 2), **kwargs): - """Random scale the image while keeping aspect. - - Args: - size (int) : Base size before scaling. - scale (tuple(float)) : The range of scaling. - """ - assert isinstance(size, int) - assert isinstance(scale, float) or isinstance(scale, tuple) - self.size = size - self.scale = scale if isinstance(scale, tuple) \ - else (1 - scale, 1 + scale) - - def __call__(self, data): - image = data['image'] - text_polys = data['polys'] - h, w, _ = image.shape - - aspect_ratio = np.random.uniform(min(self.scale), max(self.scale)) - scales = self.size * 1.0 / max(h, w) * aspect_ratio - scales = np.array([scales, scales]) - out_size = (int(h * scales[1]), int(w * scales[0])) - image = cv2.resize(image, out_size[::-1]) - - data['image'] = image - text_polys[:, :, 0::2] = text_polys[:, :, 0::2] * scales[1] - text_polys[:, :, 1::2] = text_polys[:, :, 1::2] * scales[0] - data['polys'] = text_polys - - return data - - -class RandomCropFlip: - def __init__(self, - pad_ratio=0.1, - crop_ratio=0.5, - iter_num=1, - min_area_ratio=0.2, - **kwargs): - """Random crop and flip a patch of the image. - - Args: - crop_ratio (float): The ratio of cropping. - iter_num (int): Number of operations. - min_area_ratio (float): Minimal area ratio between cropped patch - and original image. - """ - assert isinstance(crop_ratio, float) - assert isinstance(iter_num, int) - assert isinstance(min_area_ratio, float) - - self.pad_ratio = pad_ratio - self.epsilon = 1e-2 - self.crop_ratio = crop_ratio - self.iter_num = iter_num - self.min_area_ratio = min_area_ratio - - def __call__(self, results): - for i in range(self.iter_num): - results = self.random_crop_flip(results) - - return results - - def random_crop_flip(self, results): - image = results['image'] - polygons = results['polys'] - ignore_tags = results['ignore_tags'] - if len(polygons) == 0: - return results - - if np.random.random() >= self.crop_ratio: - return results - - h, w, _ = image.shape - area = h * w - pad_h = int(h * self.pad_ratio) - pad_w = int(w * self.pad_ratio) - h_axis, w_axis = self.generate_crop_target(image, polygons, pad_h, - pad_w) - if len(h_axis) == 0 or len(w_axis) == 0: - return results - - attempt = 0 - while attempt < 50: - attempt += 1 - polys_keep = [] - polys_new = [] - ignore_tags_keep = [] - ignore_tags_new = [] - xx = np.random.choice(w_axis, size=2) - xmin = np.min(xx) - pad_w - xmax = np.max(xx) - pad_w - xmin = np.clip(xmin, 0, w - 1) - xmax = np.clip(xmax, 0, w - 1) - yy = np.random.choice(h_axis, size=2) - ymin = np.min(yy) - pad_h - ymax = np.max(yy) - pad_h - ymin = np.clip(ymin, 0, h - 1) - ymax = np.clip(ymax, 0, h - 1) - if (xmax - xmin) * (ymax - ymin) < area * self.min_area_ratio: - # area too small - continue - - pts = np.stack([[xmin, xmax, xmax, xmin], - [ymin, ymin, ymax, ymax]]).T.astype(np.int32) - pp = Polygon(pts) - fail_flag = False - for polygon, ignore_tag in zip(polygons, ignore_tags): - ppi = Polygon(polygon.reshape(-1, 2)) - ppiou, _ = poly_intersection(ppi, pp, buffer=0) - if np.abs(ppiou - float(ppi.area)) > self.epsilon and \ - np.abs(ppiou) > self.epsilon: - fail_flag = True - break - elif np.abs(ppiou - float(ppi.area)) < self.epsilon: - polys_new.append(polygon) - ignore_tags_new.append(ignore_tag) - else: - polys_keep.append(polygon) - ignore_tags_keep.append(ignore_tag) - - if fail_flag: - continue - else: - break - - cropped = image[ymin:ymax, xmin:xmax, :] - select_type = np.random.randint(3) - if select_type == 0: - img = np.ascontiguousarray(cropped[:, ::-1]) - elif select_type == 1: - img = np.ascontiguousarray(cropped[::-1, :]) - else: - img = np.ascontiguousarray(cropped[::-1, ::-1]) - image[ymin:ymax, xmin:xmax, :] = img - results['img'] = image - - if len(polys_new) != 0: - height, width, _ = cropped.shape - if select_type == 0: - for idx, polygon in enumerate(polys_new): - poly = polygon.reshape(-1, 2) - poly[:, 0] = width - poly[:, 0] + 2 * xmin - polys_new[idx] = poly - elif select_type == 1: - for idx, polygon in enumerate(polys_new): - poly = polygon.reshape(-1, 2) - poly[:, 1] = height - poly[:, 1] + 2 * ymin - polys_new[idx] = poly - else: - for idx, polygon in enumerate(polys_new): - poly = polygon.reshape(-1, 2) - poly[:, 0] = width - poly[:, 0] + 2 * xmin - poly[:, 1] = height - poly[:, 1] + 2 * ymin - polys_new[idx] = poly - polygons = polys_keep + polys_new - ignore_tags = ignore_tags_keep + ignore_tags_new - results['polys'] = np.array(polygons) - results['ignore_tags'] = ignore_tags - - return results - - def generate_crop_target(self, image, all_polys, pad_h, pad_w): - """Generate crop target and make sure not to crop the polygon - instances. - - Args: - image (ndarray): The image waited to be crop. - all_polys (list[list[ndarray]]): All polygons including ground - truth polygons and ground truth ignored polygons. - pad_h (int): Padding length of height. - pad_w (int): Padding length of width. - Returns: - h_axis (ndarray): Vertical cropping range. - w_axis (ndarray): Horizontal cropping range. - """ - h, w, _ = image.shape - h_array = np.zeros((h + pad_h * 2), dtype=np.int32) - w_array = np.zeros((w + pad_w * 2), dtype=np.int32) - - text_polys = [] - for polygon in all_polys: - rect = cv2.minAreaRect(polygon.astype(np.int32).reshape(-1, 2)) - box = cv2.boxPoints(rect) - box = np.int0(box) - text_polys.append([box[0], box[1], box[2], box[3]]) - - polys = np.array(text_polys, dtype=np.int32) - for poly in polys: - poly = np.round(poly, decimals=0).astype(np.int32) - minx = np.min(poly[:, 0]) - maxx = np.max(poly[:, 0]) - w_array[minx + pad_w:maxx + pad_w] = 1 - miny = np.min(poly[:, 1]) - maxy = np.max(poly[:, 1]) - h_array[miny + pad_h:maxy + pad_h] = 1 - - h_axis = np.where(h_array == 0)[0] - w_axis = np.where(w_array == 0)[0] - return h_axis, w_axis - - -class RandomCropPolyInstances: - """Randomly crop images and make sure to contain at least one intact - instance.""" - - def __init__(self, crop_ratio=5.0 / 8.0, min_side_ratio=0.4, **kwargs): - super().__init__() - self.crop_ratio = crop_ratio - self.min_side_ratio = min_side_ratio - - def sample_valid_start_end(self, valid_array, min_len, max_start, min_end): - - assert isinstance(min_len, int) - assert len(valid_array) > min_len - - start_array = valid_array.copy() - max_start = min(len(start_array) - min_len, max_start) - start_array[max_start:] = 0 - start_array[0] = 1 - diff_array = np.hstack([0, start_array]) - np.hstack([start_array, 0]) - region_starts = np.where(diff_array < 0)[0] - region_ends = np.where(diff_array > 0)[0] - region_ind = np.random.randint(0, len(region_starts)) - start = np.random.randint(region_starts[region_ind], - region_ends[region_ind]) - - end_array = valid_array.copy() - min_end = max(start + min_len, min_end) - end_array[:min_end] = 0 - end_array[-1] = 1 - diff_array = np.hstack([0, end_array]) - np.hstack([end_array, 0]) - region_starts = np.where(diff_array < 0)[0] - region_ends = np.where(diff_array > 0)[0] - region_ind = np.random.randint(0, len(region_starts)) - end = np.random.randint(region_starts[region_ind], - region_ends[region_ind]) - return start, end - - def sample_crop_box(self, img_size, results): - """Generate crop box and make sure not to crop the polygon instances. - - Args: - img_size (tuple(int)): The image size (h, w). - results (dict): The results dict. - """ - - assert isinstance(img_size, tuple) - h, w = img_size[:2] - - key_masks = results['polys'] - - x_valid_array = np.ones(w, dtype=np.int32) - y_valid_array = np.ones(h, dtype=np.int32) - - selected_mask = key_masks[np.random.randint(0, len(key_masks))] - selected_mask = selected_mask.reshape((-1, 2)).astype(np.int32) - max_x_start = max(np.min(selected_mask[:, 0]) - 2, 0) - min_x_end = min(np.max(selected_mask[:, 0]) + 3, w - 1) - max_y_start = max(np.min(selected_mask[:, 1]) - 2, 0) - min_y_end = min(np.max(selected_mask[:, 1]) + 3, h - 1) - - for mask in key_masks: - mask = mask.reshape((-1, 2)).astype(np.int32) - clip_x = np.clip(mask[:, 0], 0, w - 1) - clip_y = np.clip(mask[:, 1], 0, h - 1) - min_x, max_x = np.min(clip_x), np.max(clip_x) - min_y, max_y = np.min(clip_y), np.max(clip_y) - - x_valid_array[min_x - 2:max_x + 3] = 0 - y_valid_array[min_y - 2:max_y + 3] = 0 - - min_w = int(w * self.min_side_ratio) - min_h = int(h * self.min_side_ratio) - - x1, x2 = self.sample_valid_start_end(x_valid_array, min_w, max_x_start, - min_x_end) - y1, y2 = self.sample_valid_start_end(y_valid_array, min_h, max_y_start, - min_y_end) - - return np.array([x1, y1, x2, y2]) - - def crop_img(self, img, bbox): - assert img.ndim == 3 - h, w, _ = img.shape - assert 0 <= bbox[1] < bbox[3] <= h - assert 0 <= bbox[0] < bbox[2] <= w - return img[bbox[1]:bbox[3], bbox[0]:bbox[2]] - - def __call__(self, results): - image = results['image'] - polygons = results['polys'] - ignore_tags = results['ignore_tags'] - if len(polygons) < 1: - return results - - if np.random.random_sample() < self.crop_ratio: - - crop_box = self.sample_crop_box(image.shape, results) - img = self.crop_img(image, crop_box) - results['image'] = img - # crop and filter masks - x1, y1, x2, y2 = crop_box - w = max(x2 - x1, 1) - h = max(y2 - y1, 1) - polygons[:, :, 0::2] = polygons[:, :, 0::2] - x1 - polygons[:, :, 1::2] = polygons[:, :, 1::2] - y1 - - valid_masks_list = [] - valid_tags_list = [] - for ind, polygon in enumerate(polygons): - if (polygon[:, ::2] > -4).all() and ( - polygon[:, ::2] < w + 4).all() and ( - polygon[:, 1::2] > -4).all() and ( - polygon[:, 1::2] < h + 4).all(): - polygon[:, ::2] = np.clip(polygon[:, ::2], 0, w) - polygon[:, 1::2] = np.clip(polygon[:, 1::2], 0, h) - valid_masks_list.append(polygon) - valid_tags_list.append(ignore_tags[ind]) - - results['polys'] = np.array(valid_masks_list) - results['ignore_tags'] = valid_tags_list - - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - return repr_str - - -class RandomRotatePolyInstances: - def __init__(self, - rotate_ratio=0.5, - max_angle=10, - pad_with_fixed_color=False, - pad_value=(0, 0, 0), - **kwargs): - """Randomly rotate images and polygon masks. - - Args: - rotate_ratio (float): The ratio of samples to operate rotation. - max_angle (int): The maximum rotation angle. - pad_with_fixed_color (bool): The flag for whether to pad rotated - image with fixed value. If set to False, the rotated image will - be padded onto cropped image. - pad_value (tuple(int)): The color value for padding rotated image. - """ - self.rotate_ratio = rotate_ratio - self.max_angle = max_angle - self.pad_with_fixed_color = pad_with_fixed_color - self.pad_value = pad_value - - def rotate(self, center, points, theta, center_shift=(0, 0)): - # rotate points. - (center_x, center_y) = center - center_y = -center_y - x, y = points[:, ::2], points[:, 1::2] - y = -y - - theta = theta / 180 * math.pi - cos = math.cos(theta) - sin = math.sin(theta) - - x = (x - center_x) - y = (y - center_y) - - _x = center_x + x * cos - y * sin + center_shift[0] - _y = -(center_y + x * sin + y * cos) + center_shift[1] - - points[:, ::2], points[:, 1::2] = _x, _y - return points - - def cal_canvas_size(self, ori_size, degree): - assert isinstance(ori_size, tuple) - angle = degree * math.pi / 180.0 - h, w = ori_size[:2] - - cos = math.cos(angle) - sin = math.sin(angle) - canvas_h = int(w * math.fabs(sin) + h * math.fabs(cos)) - canvas_w = int(w * math.fabs(cos) + h * math.fabs(sin)) - - canvas_size = (canvas_h, canvas_w) - return canvas_size - - def sample_angle(self, max_angle): - angle = np.random.random_sample() * 2 * max_angle - max_angle - return angle - - def rotate_img(self, img, angle, canvas_size): - h, w = img.shape[:2] - rotation_matrix = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1) - rotation_matrix[0, 2] += int((canvas_size[1] - w) / 2) - rotation_matrix[1, 2] += int((canvas_size[0] - h) / 2) - - if self.pad_with_fixed_color: - target_img = cv2.warpAffine( - img, - rotation_matrix, (canvas_size[1], canvas_size[0]), - flags=cv2.INTER_NEAREST, - borderValue=self.pad_value) - else: - mask = np.zeros_like(img) - (h_ind, w_ind) = (np.random.randint(0, h * 7 // 8), - np.random.randint(0, w * 7 // 8)) - img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)] - img_cut = cv2.resize(img_cut, (canvas_size[1], canvas_size[0])) - - mask = cv2.warpAffine( - mask, - rotation_matrix, (canvas_size[1], canvas_size[0]), - borderValue=[1, 1, 1]) - target_img = cv2.warpAffine( - img, - rotation_matrix, (canvas_size[1], canvas_size[0]), - borderValue=[0, 0, 0]) - target_img = target_img + img_cut * mask - - return target_img - - def __call__(self, results): - if np.random.random_sample() < self.rotate_ratio: - image = results['image'] - polygons = results['polys'] - h, w = image.shape[:2] - - angle = self.sample_angle(self.max_angle) - canvas_size = self.cal_canvas_size((h, w), angle) - center_shift = (int((canvas_size[1] - w) / 2), int( - (canvas_size[0] - h) / 2)) - image = self.rotate_img(image, angle, canvas_size) - results['image'] = image - # rotate polygons - rotated_masks = [] - for mask in polygons: - rotated_mask = self.rotate((w / 2, h / 2), mask, angle, - center_shift) - rotated_masks.append(rotated_mask) - results['polys'] = np.array(rotated_masks) - - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - return repr_str - - -class SquareResizePad: - def __init__(self, - target_size, - pad_ratio=0.6, - pad_with_fixed_color=False, - pad_value=(0, 0, 0), - **kwargs): - """Resize or pad images to be square shape. - - Args: - target_size (int): The target size of square shaped image. - pad_with_fixed_color (bool): The flag for whether to pad rotated - image with fixed value. If set to False, the rescales image will - be padded onto cropped image. - pad_value (tuple(int)): The color value for padding rotated image. - """ - assert isinstance(target_size, int) - assert isinstance(pad_ratio, float) - assert isinstance(pad_with_fixed_color, bool) - assert isinstance(pad_value, tuple) - - self.target_size = target_size - self.pad_ratio = pad_ratio - self.pad_with_fixed_color = pad_with_fixed_color - self.pad_value = pad_value - - def resize_img(self, img, keep_ratio=True): - h, w, _ = img.shape - if keep_ratio: - t_h = self.target_size if h >= w else int(h * self.target_size / w) - t_w = self.target_size if h <= w else int(w * self.target_size / h) - else: - t_h = t_w = self.target_size - img = cv2.resize(img, (t_w, t_h)) - return img, (t_h, t_w) - - def square_pad(self, img): - h, w = img.shape[:2] - if h == w: - return img, (0, 0) - pad_size = max(h, w) - if self.pad_with_fixed_color: - expand_img = np.ones((pad_size, pad_size, 3), dtype=np.uint8) - expand_img[:] = self.pad_value - else: - (h_ind, w_ind) = (np.random.randint(0, h * 7 // 8), - np.random.randint(0, w * 7 // 8)) - img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)] - expand_img = cv2.resize(img_cut, (pad_size, pad_size)) - if h > w: - y0, x0 = 0, (h - w) // 2 - else: - y0, x0 = (w - h) // 2, 0 - expand_img[y0:y0 + h, x0:x0 + w] = img - offset = (x0, y0) - - return expand_img, offset - - def square_pad_mask(self, points, offset): - x0, y0 = offset - pad_points = points.copy() - pad_points[::2] = pad_points[::2] + x0 - pad_points[1::2] = pad_points[1::2] + y0 - return pad_points - - def __call__(self, results): - image = results['image'] - polygons = results['polys'] - h, w = image.shape[:2] - - if np.random.random_sample() < self.pad_ratio: - image, out_size = self.resize_img(image, keep_ratio=True) - image, offset = self.square_pad(image) - else: - image, out_size = self.resize_img(image, keep_ratio=False) - offset = (0, 0) - results['image'] = image - try: - polygons[:, :, 0::2] = polygons[:, :, 0::2] * out_size[ - 1] / w + offset[0] - polygons[:, :, 1::2] = polygons[:, :, 1::2] * out_size[ - 0] / h + offset[1] - except: - pass - results['polys'] = polygons - - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - return repr_str diff --git a/backend/ppocr/data/imaug/fce_targets.py b/backend/ppocr/data/imaug/fce_targets.py deleted file mode 100644 index 18184808..00000000 --- a/backend/ppocr/data/imaug/fce_targets.py +++ /dev/null @@ -1,658 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/textdet_targets/fcenet_targets.py -""" - -import cv2 -import numpy as np -from numpy.fft import fft -from numpy.linalg import norm -import sys - - -class FCENetTargets: - """Generate the ground truth targets of FCENet: Fourier Contour Embedding - for Arbitrary-Shaped Text Detection. - - [https://arxiv.org/abs/2104.10442] - - Args: - fourier_degree (int): The maximum Fourier transform degree k. - resample_step (float): The step size for resampling the text center - line (TCL). It's better not to exceed half of the minimum width. - center_region_shrink_ratio (float): The shrink ratio of text center - region. - level_size_divisors (tuple(int)): The downsample ratio on each level. - level_proportion_range (tuple(tuple(int))): The range of text sizes - assigned to each level. - """ - - def __init__(self, - fourier_degree=5, - resample_step=4.0, - center_region_shrink_ratio=0.3, - level_size_divisors=(8, 16, 32), - level_proportion_range=((0, 0.25), (0.2, 0.65), (0.55, 1.0)), - orientation_thr=2.0, - **kwargs): - - super().__init__() - assert isinstance(level_size_divisors, tuple) - assert isinstance(level_proportion_range, tuple) - assert len(level_size_divisors) == len(level_proportion_range) - self.fourier_degree = fourier_degree - self.resample_step = resample_step - self.center_region_shrink_ratio = center_region_shrink_ratio - self.level_size_divisors = level_size_divisors - self.level_proportion_range = level_proportion_range - - self.orientation_thr = orientation_thr - - def vector_angle(self, vec1, vec2): - if vec1.ndim > 1: - unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8).reshape((-1, 1)) - else: - unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8) - if vec2.ndim > 1: - unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8).reshape((-1, 1)) - else: - unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8) - return np.arccos( - np.clip( - np.sum(unit_vec1 * unit_vec2, axis=-1), -1.0, 1.0)) - - def resample_line(self, line, n): - """Resample n points on a line. - - Args: - line (ndarray): The points composing a line. - n (int): The resampled points number. - - Returns: - resampled_line (ndarray): The points composing the resampled line. - """ - - assert line.ndim == 2 - assert line.shape[0] >= 2 - assert line.shape[1] == 2 - assert isinstance(n, int) - assert n > 0 - - length_list = [ - norm(line[i + 1] - line[i]) for i in range(len(line) - 1) - ] - total_length = sum(length_list) - length_cumsum = np.cumsum([0.0] + length_list) - delta_length = total_length / (float(n) + 1e-8) - - current_edge_ind = 0 - resampled_line = [line[0]] - - for i in range(1, n): - current_line_len = i * delta_length - - while current_line_len >= length_cumsum[current_edge_ind + 1]: - current_edge_ind += 1 - current_edge_end_shift = current_line_len - length_cumsum[ - current_edge_ind] - end_shift_ratio = current_edge_end_shift / length_list[ - current_edge_ind] - current_point = line[current_edge_ind] + (line[current_edge_ind + 1] - - line[current_edge_ind] - ) * end_shift_ratio - resampled_line.append(current_point) - - resampled_line.append(line[-1]) - resampled_line = np.array(resampled_line) - - return resampled_line - - def reorder_poly_edge(self, points): - """Get the respective points composing head edge, tail edge, top - sideline and bottom sideline. - - Args: - points (ndarray): The points composing a text polygon. - - Returns: - head_edge (ndarray): The two points composing the head edge of text - polygon. - tail_edge (ndarray): The two points composing the tail edge of text - polygon. - top_sideline (ndarray): The points composing top curved sideline of - text polygon. - bot_sideline (ndarray): The points composing bottom curved sideline - of text polygon. - """ - - assert points.ndim == 2 - assert points.shape[0] >= 4 - assert points.shape[1] == 2 - - head_inds, tail_inds = self.find_head_tail(points, self.orientation_thr) - head_edge, tail_edge = points[head_inds], points[tail_inds] - - pad_points = np.vstack([points, points]) - if tail_inds[1] < 1: - tail_inds[1] = len(points) - sideline1 = pad_points[head_inds[1]:tail_inds[1]] - sideline2 = pad_points[tail_inds[1]:(head_inds[1] + len(points))] - sideline_mean_shift = np.mean( - sideline1, axis=0) - np.mean( - sideline2, axis=0) - - if sideline_mean_shift[1] > 0: - top_sideline, bot_sideline = sideline2, sideline1 - else: - top_sideline, bot_sideline = sideline1, sideline2 - - return head_edge, tail_edge, top_sideline, bot_sideline - - def find_head_tail(self, points, orientation_thr): - """Find the head edge and tail edge of a text polygon. - - Args: - points (ndarray): The points composing a text polygon. - orientation_thr (float): The threshold for distinguishing between - head edge and tail edge among the horizontal and vertical edges - of a quadrangle. - - Returns: - head_inds (list): The indexes of two points composing head edge. - tail_inds (list): The indexes of two points composing tail edge. - """ - - assert points.ndim == 2 - assert points.shape[0] >= 4 - assert points.shape[1] == 2 - assert isinstance(orientation_thr, float) - - if len(points) > 4: - pad_points = np.vstack([points, points[0]]) - edge_vec = pad_points[1:] - pad_points[:-1] - - theta_sum = [] - adjacent_vec_theta = [] - for i, edge_vec1 in enumerate(edge_vec): - adjacent_ind = [x % len(edge_vec) for x in [i - 1, i + 1]] - adjacent_edge_vec = edge_vec[adjacent_ind] - temp_theta_sum = np.sum( - self.vector_angle(edge_vec1, adjacent_edge_vec)) - temp_adjacent_theta = self.vector_angle(adjacent_edge_vec[0], - adjacent_edge_vec[1]) - theta_sum.append(temp_theta_sum) - adjacent_vec_theta.append(temp_adjacent_theta) - theta_sum_score = np.array(theta_sum) / np.pi - adjacent_theta_score = np.array(adjacent_vec_theta) / np.pi - poly_center = np.mean(points, axis=0) - edge_dist = np.maximum( - norm( - pad_points[1:] - poly_center, axis=-1), - norm( - pad_points[:-1] - poly_center, axis=-1)) - dist_score = edge_dist / np.max(edge_dist) - position_score = np.zeros(len(edge_vec)) - score = 0.5 * theta_sum_score + 0.15 * adjacent_theta_score - score += 0.35 * dist_score - if len(points) % 2 == 0: - position_score[(len(score) // 2 - 1)] += 1 - position_score[-1] += 1 - score += 0.1 * position_score - pad_score = np.concatenate([score, score]) - score_matrix = np.zeros((len(score), len(score) - 3)) - x = np.arange(len(score) - 3) / float(len(score) - 4) - gaussian = 1. / (np.sqrt(2. * np.pi) * 0.5) * np.exp(-np.power( - (x - 0.5) / 0.5, 2.) / 2) - gaussian = gaussian / np.max(gaussian) - for i in range(len(score)): - score_matrix[i, :] = score[i] + pad_score[(i + 2):(i + len( - score) - 1)] * gaussian * 0.3 - - head_start, tail_increment = np.unravel_index(score_matrix.argmax(), - score_matrix.shape) - tail_start = (head_start + tail_increment + 2) % len(points) - head_end = (head_start + 1) % len(points) - tail_end = (tail_start + 1) % len(points) - - if head_end > tail_end: - head_start, tail_start = tail_start, head_start - head_end, tail_end = tail_end, head_end - head_inds = [head_start, head_end] - tail_inds = [tail_start, tail_end] - else: - if self.vector_slope(points[1] - points[0]) + self.vector_slope( - points[3] - points[2]) < self.vector_slope(points[ - 2] - points[1]) + self.vector_slope(points[0] - points[ - 3]): - horizontal_edge_inds = [[0, 1], [2, 3]] - vertical_edge_inds = [[3, 0], [1, 2]] - else: - horizontal_edge_inds = [[3, 0], [1, 2]] - vertical_edge_inds = [[0, 1], [2, 3]] - - vertical_len_sum = norm(points[vertical_edge_inds[0][0]] - points[ - vertical_edge_inds[0][1]]) + norm(points[vertical_edge_inds[1][ - 0]] - points[vertical_edge_inds[1][1]]) - horizontal_len_sum = norm(points[horizontal_edge_inds[0][ - 0]] - points[horizontal_edge_inds[0][1]]) + norm(points[ - horizontal_edge_inds[1][0]] - points[horizontal_edge_inds[1] - [1]]) - - if vertical_len_sum > horizontal_len_sum * orientation_thr: - head_inds = horizontal_edge_inds[0] - tail_inds = horizontal_edge_inds[1] - else: - head_inds = vertical_edge_inds[0] - tail_inds = vertical_edge_inds[1] - - return head_inds, tail_inds - - def resample_sidelines(self, sideline1, sideline2, resample_step): - """Resample two sidelines to be of the same points number according to - step size. - - Args: - sideline1 (ndarray): The points composing a sideline of a text - polygon. - sideline2 (ndarray): The points composing another sideline of a - text polygon. - resample_step (float): The resampled step size. - - Returns: - resampled_line1 (ndarray): The resampled line 1. - resampled_line2 (ndarray): The resampled line 2. - """ - - assert sideline1.ndim == sideline2.ndim == 2 - assert sideline1.shape[1] == sideline2.shape[1] == 2 - assert sideline1.shape[0] >= 2 - assert sideline2.shape[0] >= 2 - assert isinstance(resample_step, float) - - length1 = sum([ - norm(sideline1[i + 1] - sideline1[i]) - for i in range(len(sideline1) - 1) - ]) - length2 = sum([ - norm(sideline2[i + 1] - sideline2[i]) - for i in range(len(sideline2) - 1) - ]) - - total_length = (length1 + length2) / 2 - resample_point_num = max(int(float(total_length) / resample_step), 1) - - resampled_line1 = self.resample_line(sideline1, resample_point_num) - resampled_line2 = self.resample_line(sideline2, resample_point_num) - - return resampled_line1, resampled_line2 - - def generate_center_region_mask(self, img_size, text_polys): - """Generate text center region mask. - - Args: - img_size (tuple): The image size of (height, width). - text_polys (list[list[ndarray]]): The list of text polygons. - - Returns: - center_region_mask (ndarray): The text center region mask. - """ - - assert isinstance(img_size, tuple) - # assert check_argument.is_2dlist(text_polys) - - h, w = img_size - - center_region_mask = np.zeros((h, w), np.uint8) - - center_region_boxes = [] - for poly in text_polys: - # assert len(poly) == 1 - polygon_points = poly.reshape(-1, 2) - _, _, top_line, bot_line = self.reorder_poly_edge(polygon_points) - resampled_top_line, resampled_bot_line = self.resample_sidelines( - top_line, bot_line, self.resample_step) - resampled_bot_line = resampled_bot_line[::-1] - center_line = (resampled_top_line + resampled_bot_line) / 2 - - line_head_shrink_len = norm(resampled_top_line[0] - - resampled_bot_line[0]) / 4.0 - line_tail_shrink_len = norm(resampled_top_line[-1] - - resampled_bot_line[-1]) / 4.0 - head_shrink_num = int(line_head_shrink_len // self.resample_step) - tail_shrink_num = int(line_tail_shrink_len // self.resample_step) - if len(center_line) > head_shrink_num + tail_shrink_num + 2: - center_line = center_line[head_shrink_num:len(center_line) - - tail_shrink_num] - resampled_top_line = resampled_top_line[head_shrink_num:len( - resampled_top_line) - tail_shrink_num] - resampled_bot_line = resampled_bot_line[head_shrink_num:len( - resampled_bot_line) - tail_shrink_num] - - for i in range(0, len(center_line) - 1): - tl = center_line[i] + (resampled_top_line[i] - center_line[i] - ) * self.center_region_shrink_ratio - tr = center_line[i + 1] + (resampled_top_line[i + 1] - - center_line[i + 1] - ) * self.center_region_shrink_ratio - br = center_line[i + 1] + (resampled_bot_line[i + 1] - - center_line[i + 1] - ) * self.center_region_shrink_ratio - bl = center_line[i] + (resampled_bot_line[i] - center_line[i] - ) * self.center_region_shrink_ratio - current_center_box = np.vstack([tl, tr, br, - bl]).astype(np.int32) - center_region_boxes.append(current_center_box) - - cv2.fillPoly(center_region_mask, center_region_boxes, 1) - return center_region_mask - - def resample_polygon(self, polygon, n=400): - """Resample one polygon with n points on its boundary. - - Args: - polygon (list[float]): The input polygon. - n (int): The number of resampled points. - Returns: - resampled_polygon (list[float]): The resampled polygon. - """ - length = [] - - for i in range(len(polygon)): - p1 = polygon[i] - if i == len(polygon) - 1: - p2 = polygon[0] - else: - p2 = polygon[i + 1] - length.append(((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)**0.5) - - total_length = sum(length) - n_on_each_line = (np.array(length) / (total_length + 1e-8)) * n - n_on_each_line = n_on_each_line.astype(np.int32) - new_polygon = [] - - for i in range(len(polygon)): - num = n_on_each_line[i] - p1 = polygon[i] - if i == len(polygon) - 1: - p2 = polygon[0] - else: - p2 = polygon[i + 1] - - if num == 0: - continue - - dxdy = (p2 - p1) / num - for j in range(num): - point = p1 + dxdy * j - new_polygon.append(point) - - return np.array(new_polygon) - - def normalize_polygon(self, polygon): - """Normalize one polygon so that its start point is at right most. - - Args: - polygon (list[float]): The origin polygon. - Returns: - new_polygon (lost[float]): The polygon with start point at right. - """ - temp_polygon = polygon - polygon.mean(axis=0) - x = np.abs(temp_polygon[:, 0]) - y = temp_polygon[:, 1] - index_x = np.argsort(x) - index_y = np.argmin(y[index_x[:8]]) - index = index_x[index_y] - new_polygon = np.concatenate([polygon[index:], polygon[:index]]) - return new_polygon - - def poly2fourier(self, polygon, fourier_degree): - """Perform Fourier transformation to generate Fourier coefficients ck - from polygon. - - Args: - polygon (ndarray): An input polygon. - fourier_degree (int): The maximum Fourier degree K. - Returns: - c (ndarray(complex)): Fourier coefficients. - """ - points = polygon[:, 0] + polygon[:, 1] * 1j - c_fft = fft(points) / len(points) - c = np.hstack((c_fft[-fourier_degree:], c_fft[:fourier_degree + 1])) - return c - - def clockwise(self, c, fourier_degree): - """Make sure the polygon reconstructed from Fourier coefficients c in - the clockwise direction. - - Args: - polygon (list[float]): The origin polygon. - Returns: - new_polygon (lost[float]): The polygon in clockwise point order. - """ - if np.abs(c[fourier_degree + 1]) > np.abs(c[fourier_degree - 1]): - return c - elif np.abs(c[fourier_degree + 1]) < np.abs(c[fourier_degree - 1]): - return c[::-1] - else: - if np.abs(c[fourier_degree + 2]) > np.abs(c[fourier_degree - 2]): - return c - else: - return c[::-1] - - def cal_fourier_signature(self, polygon, fourier_degree): - """Calculate Fourier signature from input polygon. - - Args: - polygon (ndarray): The input polygon. - fourier_degree (int): The maximum Fourier degree K. - Returns: - fourier_signature (ndarray): An array shaped (2k+1, 2) containing - real part and image part of 2k+1 Fourier coefficients. - """ - resampled_polygon = self.resample_polygon(polygon) - resampled_polygon = self.normalize_polygon(resampled_polygon) - - fourier_coeff = self.poly2fourier(resampled_polygon, fourier_degree) - fourier_coeff = self.clockwise(fourier_coeff, fourier_degree) - - real_part = np.real(fourier_coeff).reshape((-1, 1)) - image_part = np.imag(fourier_coeff).reshape((-1, 1)) - fourier_signature = np.hstack([real_part, image_part]) - - return fourier_signature - - def generate_fourier_maps(self, img_size, text_polys): - """Generate Fourier coefficient maps. - - Args: - img_size (tuple): The image size of (height, width). - text_polys (list[list[ndarray]]): The list of text polygons. - - Returns: - fourier_real_map (ndarray): The Fourier coefficient real part maps. - fourier_image_map (ndarray): The Fourier coefficient image part - maps. - """ - - assert isinstance(img_size, tuple) - - h, w = img_size - k = self.fourier_degree - real_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32) - imag_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32) - - for poly in text_polys: - mask = np.zeros((h, w), dtype=np.uint8) - polygon = np.array(poly).reshape((1, -1, 2)) - cv2.fillPoly(mask, polygon.astype(np.int32), 1) - fourier_coeff = self.cal_fourier_signature(polygon[0], k) - for i in range(-k, k + 1): - if i != 0: - real_map[i + k, :, :] = mask * fourier_coeff[i + k, 0] + ( - 1 - mask) * real_map[i + k, :, :] - imag_map[i + k, :, :] = mask * fourier_coeff[i + k, 1] + ( - 1 - mask) * imag_map[i + k, :, :] - else: - yx = np.argwhere(mask > 0.5) - k_ind = np.ones((len(yx)), dtype=np.int64) * k - y, x = yx[:, 0], yx[:, 1] - real_map[k_ind, y, x] = fourier_coeff[k, 0] - x - imag_map[k_ind, y, x] = fourier_coeff[k, 1] - y - - return real_map, imag_map - - def generate_text_region_mask(self, img_size, text_polys): - """Generate text center region mask and geometry attribute maps. - - Args: - img_size (tuple): The image size (height, width). - text_polys (list[list[ndarray]]): The list of text polygons. - - Returns: - text_region_mask (ndarray): The text region mask. - """ - - assert isinstance(img_size, tuple) - - h, w = img_size - text_region_mask = np.zeros((h, w), dtype=np.uint8) - - for poly in text_polys: - polygon = np.array(poly, dtype=np.int32).reshape((1, -1, 2)) - cv2.fillPoly(text_region_mask, polygon, 1) - - return text_region_mask - - def generate_effective_mask(self, mask_size: tuple, polygons_ignore): - """Generate effective mask by setting the ineffective regions to 0 and - effective regions to 1. - - Args: - mask_size (tuple): The mask size. - polygons_ignore (list[[ndarray]]: The list of ignored text - polygons. - - Returns: - mask (ndarray): The effective mask of (height, width). - """ - - mask = np.ones(mask_size, dtype=np.uint8) - - for poly in polygons_ignore: - instance = poly.reshape(-1, 2).astype(np.int32).reshape(1, -1, 2) - cv2.fillPoly(mask, instance, 0) - - return mask - - def generate_level_targets(self, img_size, text_polys, ignore_polys): - """Generate ground truth target on each level. - - Args: - img_size (list[int]): Shape of input image. - text_polys (list[list[ndarray]]): A list of ground truth polygons. - ignore_polys (list[list[ndarray]]): A list of ignored polygons. - Returns: - level_maps (list(ndarray)): A list of ground target on each level. - """ - h, w = img_size - lv_size_divs = self.level_size_divisors - lv_proportion_range = self.level_proportion_range - lv_text_polys = [[] for i in range(len(lv_size_divs))] - lv_ignore_polys = [[] for i in range(len(lv_size_divs))] - level_maps = [] - for poly in text_polys: - polygon = np.array(poly, dtype=np.int).reshape((1, -1, 2)) - _, _, box_w, box_h = cv2.boundingRect(polygon) - proportion = max(box_h, box_w) / (h + 1e-8) - - for ind, proportion_range in enumerate(lv_proportion_range): - if proportion_range[0] < proportion < proportion_range[1]: - lv_text_polys[ind].append(poly / lv_size_divs[ind]) - - for ignore_poly in ignore_polys: - polygon = np.array(ignore_poly, dtype=np.int).reshape((1, -1, 2)) - _, _, box_w, box_h = cv2.boundingRect(polygon) - proportion = max(box_h, box_w) / (h + 1e-8) - - for ind, proportion_range in enumerate(lv_proportion_range): - if proportion_range[0] < proportion < proportion_range[1]: - lv_ignore_polys[ind].append(ignore_poly / lv_size_divs[ind]) - - for ind, size_divisor in enumerate(lv_size_divs): - current_level_maps = [] - level_img_size = (h // size_divisor, w // size_divisor) - - text_region = self.generate_text_region_mask( - level_img_size, lv_text_polys[ind])[None] - current_level_maps.append(text_region) - - center_region = self.generate_center_region_mask( - level_img_size, lv_text_polys[ind])[None] - current_level_maps.append(center_region) - - effective_mask = self.generate_effective_mask( - level_img_size, lv_ignore_polys[ind])[None] - current_level_maps.append(effective_mask) - - fourier_real_map, fourier_image_maps = self.generate_fourier_maps( - level_img_size, lv_text_polys[ind]) - current_level_maps.append(fourier_real_map) - current_level_maps.append(fourier_image_maps) - - level_maps.append(np.concatenate(current_level_maps)) - - return level_maps - - def generate_targets(self, results): - """Generate the ground truth targets for FCENet. - - Args: - results (dict): The input result dictionary. - - Returns: - results (dict): The output result dictionary. - """ - - assert isinstance(results, dict) - image = results['image'] - polygons = results['polys'] - ignore_tags = results['ignore_tags'] - h, w, _ = image.shape - - polygon_masks = [] - polygon_masks_ignore = [] - for tag, polygon in zip(ignore_tags, polygons): - if tag is True: - polygon_masks_ignore.append(polygon) - else: - polygon_masks.append(polygon) - - level_maps = self.generate_level_targets((h, w), polygon_masks, - polygon_masks_ignore) - - mapping = { - 'p3_maps': level_maps[0], - 'p4_maps': level_maps[1], - 'p5_maps': level_maps[2] - } - for key, value in mapping.items(): - results[key] = value - - return results - - def __call__(self, results): - results = self.generate_targets(results) - return results diff --git a/backend/ppocr/data/imaug/gen_table_mask.py b/backend/ppocr/data/imaug/gen_table_mask.py deleted file mode 100644 index 08e35d5d..00000000 --- a/backend/ppocr/data/imaug/gen_table_mask.py +++ /dev/null @@ -1,244 +0,0 @@ -""" -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import sys -import six -import cv2 -import numpy as np - - -class GenTableMask(object): - """ gen table mask """ - - def __init__(self, shrink_h_max, shrink_w_max, mask_type=0, **kwargs): - self.shrink_h_max = 5 - self.shrink_w_max = 5 - self.mask_type = mask_type - - def projection(self, erosion, h, w, spilt_threshold=0): - # 水平投影 - projection_map = np.ones_like(erosion) - project_val_array = [0 for _ in range(0, h)] - - for j in range(0, h): - for i in range(0, w): - if erosion[j, i] == 255: - project_val_array[j] += 1 - # 根据数组,获取切割点 - start_idx = 0 # 记录进入字符区的索引 - end_idx = 0 # 记录进入空白区域的索引 - in_text = False # 是否遍历到了字符区内 - box_list = [] - for i in range(len(project_val_array)): - if in_text == False and project_val_array[i] > spilt_threshold: # 进入字符区了 - in_text = True - start_idx = i - elif project_val_array[i] <= spilt_threshold and in_text == True: # 进入空白区了 - end_idx = i - in_text = False - if end_idx - start_idx <= 2: - continue - box_list.append((start_idx, end_idx + 1)) - - if in_text: - box_list.append((start_idx, h - 1)) - # 绘制投影直方图 - for j in range(0, h): - for i in range(0, project_val_array[j]): - projection_map[j, i] = 0 - return box_list, projection_map - - def projection_cx(self, box_img): - box_gray_img = cv2.cvtColor(box_img, cv2.COLOR_BGR2GRAY) - h, w = box_gray_img.shape - # 灰度图片进行二值化处理 - ret, thresh1 = cv2.threshold(box_gray_img, 200, 255, cv2.THRESH_BINARY_INV) - # 纵向腐蚀 - if h < w: - kernel = np.ones((2, 1), np.uint8) - erode = cv2.erode(thresh1, kernel, iterations=1) - else: - erode = thresh1 - # 水平膨胀 - kernel = np.ones((1, 5), np.uint8) - erosion = cv2.dilate(erode, kernel, iterations=1) - # 水平投影 - projection_map = np.ones_like(erosion) - project_val_array = [0 for _ in range(0, h)] - - for j in range(0, h): - for i in range(0, w): - if erosion[j, i] == 255: - project_val_array[j] += 1 - # 根据数组,获取切割点 - start_idx = 0 # 记录进入字符区的索引 - end_idx = 0 # 记录进入空白区域的索引 - in_text = False # 是否遍历到了字符区内 - box_list = [] - spilt_threshold = 0 - for i in range(len(project_val_array)): - if in_text == False and project_val_array[i] > spilt_threshold: # 进入字符区了 - in_text = True - start_idx = i - elif project_val_array[i] <= spilt_threshold and in_text == True: # 进入空白区了 - end_idx = i - in_text = False - if end_idx - start_idx <= 2: - continue - box_list.append((start_idx, end_idx + 1)) - - if in_text: - box_list.append((start_idx, h - 1)) - # 绘制投影直方图 - for j in range(0, h): - for i in range(0, project_val_array[j]): - projection_map[j, i] = 0 - split_bbox_list = [] - if len(box_list) > 1: - for i, (h_start, h_end) in enumerate(box_list): - if i == 0: - h_start = 0 - if i == len(box_list): - h_end = h - word_img = erosion[h_start:h_end + 1, :] - word_h, word_w = word_img.shape - w_split_list, w_projection_map = self.projection(word_img.T, word_w, word_h) - w_start, w_end = w_split_list[0][0], w_split_list[-1][1] - if h_start > 0: - h_start -= 1 - h_end += 1 - word_img = box_img[h_start:h_end + 1:, w_start:w_end + 1, :] - split_bbox_list.append([w_start, h_start, w_end, h_end]) - else: - split_bbox_list.append([0, 0, w, h]) - return split_bbox_list - - def shrink_bbox(self, bbox): - left, top, right, bottom = bbox - sh_h = min(max(int((bottom - top) * 0.1), 1), self.shrink_h_max) - sh_w = min(max(int((right - left) * 0.1), 1), self.shrink_w_max) - left_new = left + sh_w - right_new = right - sh_w - top_new = top + sh_h - bottom_new = bottom - sh_h - if left_new >= right_new: - left_new = left - right_new = right - if top_new >= bottom_new: - top_new = top - bottom_new = bottom - return [left_new, top_new, right_new, bottom_new] - - def __call__(self, data): - img = data['image'] - cells = data['cells'] - height, width = img.shape[0:2] - if self.mask_type == 1: - mask_img = np.zeros((height, width), dtype=np.float32) - else: - mask_img = np.zeros((height, width, 3), dtype=np.float32) - cell_num = len(cells) - for cno in range(cell_num): - if "bbox" in cells[cno]: - bbox = cells[cno]['bbox'] - left, top, right, bottom = bbox - box_img = img[top:bottom, left:right, :].copy() - split_bbox_list = self.projection_cx(box_img) - for sno in range(len(split_bbox_list)): - split_bbox_list[sno][0] += left - split_bbox_list[sno][1] += top - split_bbox_list[sno][2] += left - split_bbox_list[sno][3] += top - - for sno in range(len(split_bbox_list)): - left, top, right, bottom = split_bbox_list[sno] - left, top, right, bottom = self.shrink_bbox([left, top, right, bottom]) - if self.mask_type == 1: - mask_img[top:bottom, left:right] = 1.0 - data['mask_img'] = mask_img - else: - mask_img[top:bottom, left:right, :] = (255, 255, 255) - data['image'] = mask_img - return data - -class ResizeTableImage(object): - def __init__(self, max_len, **kwargs): - super(ResizeTableImage, self).__init__() - self.max_len = max_len - - def get_img_bbox(self, cells): - bbox_list = [] - if len(cells) == 0: - return bbox_list - cell_num = len(cells) - for cno in range(cell_num): - if "bbox" in cells[cno]: - bbox = cells[cno]['bbox'] - bbox_list.append(bbox) - return bbox_list - - def resize_img_table(self, img, bbox_list, max_len): - height, width = img.shape[0:2] - ratio = max_len / (max(height, width) * 1.0) - resize_h = int(height * ratio) - resize_w = int(width * ratio) - img_new = cv2.resize(img, (resize_w, resize_h)) - bbox_list_new = [] - for bno in range(len(bbox_list)): - left, top, right, bottom = bbox_list[bno].copy() - left = int(left * ratio) - top = int(top * ratio) - right = int(right * ratio) - bottom = int(bottom * ratio) - bbox_list_new.append([left, top, right, bottom]) - return img_new, bbox_list_new - - def __call__(self, data): - img = data['image'] - if 'cells' not in data: - cells = [] - else: - cells = data['cells'] - bbox_list = self.get_img_bbox(cells) - img_new, bbox_list_new = self.resize_img_table(img, bbox_list, self.max_len) - data['image'] = img_new - cell_num = len(cells) - bno = 0 - for cno in range(cell_num): - if "bbox" in data['cells'][cno]: - data['cells'][cno]['bbox'] = bbox_list_new[bno] - bno += 1 - data['max_len'] = self.max_len - return data - -class PaddingTableImage(object): - def __init__(self, **kwargs): - super(PaddingTableImage, self).__init__() - - def __call__(self, data): - img = data['image'] - max_len = data['max_len'] - padding_img = np.zeros((max_len, max_len, 3), dtype=np.float32) - height, width = img.shape[0:2] - padding_img[0:height, 0:width, :] = img.copy() - data['image'] = padding_img - return data - \ No newline at end of file diff --git a/backend/ppocr/data/imaug/iaa_augment.py b/backend/ppocr/data/imaug/iaa_augment.py deleted file mode 100644 index bf6ea2bf..00000000 --- a/backend/ppocr/data/imaug/iaa_augment.py +++ /dev/null @@ -1,104 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/iaa_augment.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np - -class AugmenterBuilder(object): - def __init__(self): - pass - - def build(self, args, root=True): - import imgaug.augmenters as iaa - if args is None or len(args) == 0: - return None - elif isinstance(args, list): - if root: - sequence = [self.build(value, root=False) for value in args] - return iaa.Sequential(sequence) - else: - return getattr(iaa, args[0])( - *[self.to_tuple_if_list(a) for a in args[1:]]) - elif isinstance(args, dict): - cls = getattr(iaa, args['type']) - return cls(**{ - k: self.to_tuple_if_list(v) - for k, v in args['args'].items() - }) - else: - raise RuntimeError('unknown augmenter arg: ' + str(args)) - - def to_tuple_if_list(self, obj): - if isinstance(obj, list): - return tuple(obj) - return obj - - -class IaaAugment(): - def __init__(self, augmenter_args=None, **kwargs): - if augmenter_args is None: - augmenter_args = [{ - 'type': 'Fliplr', - 'args': { - 'p': 0.5 - } - }, { - 'type': 'Affine', - 'args': { - 'rotate': [-10, 10] - } - }, { - 'type': 'Resize', - 'args': { - 'size': [0.5, 3] - } - }] - self.augmenter = AugmenterBuilder().build(augmenter_args) - - def __call__(self, data): - image = data['image'] - shape = image.shape - - if self.augmenter: - aug = self.augmenter.to_deterministic() - data['image'] = aug.augment_image(image) - data = self.may_augment_annotation(aug, data, shape) - return data - - def may_augment_annotation(self, aug, data, shape): - if aug is None: - return data - - line_polys = [] - for poly in data['polys']: - new_poly = self.may_augment_poly(aug, shape, poly) - line_polys.append(new_poly) - data['polys'] = np.array(line_polys) - return data - - def may_augment_poly(self, aug, img_shape, poly): - import imgaug - keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly] - keypoints = aug.augment_keypoints( - [imgaug.KeypointsOnImage( - keypoints, shape=img_shape)])[0].keypoints - poly = [(p.x, p.y) for p in keypoints] - return poly diff --git a/backend/ppocr/data/imaug/label_ops.py b/backend/ppocr/data/imaug/label_ops.py deleted file mode 100644 index c9bc2e77..00000000 --- a/backend/ppocr/data/imaug/label_ops.py +++ /dev/null @@ -1,1041 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import copy -import numpy as np -import string -from shapely.geometry import LineString, Point, Polygon -import json -import copy - -from ppocr.utils.logging import get_logger - - -class ClsLabelEncode(object): - def __init__(self, label_list, **kwargs): - self.label_list = label_list - - def __call__(self, data): - label = data['label'] - if label not in self.label_list: - return None - label = self.label_list.index(label) - data['label'] = label - return data - - -class DetLabelEncode(object): - def __init__(self, **kwargs): - pass - - def __call__(self, data): - label = data['label'] - label = json.loads(label) - nBox = len(label) - boxes, txts, txt_tags = [], [], [] - for bno in range(0, nBox): - box = label[bno]['points'] - txt = label[bno]['transcription'] - boxes.append(box) - txts.append(txt) - if txt in ['*', '###']: - txt_tags.append(True) - else: - txt_tags.append(False) - if len(boxes) == 0: - return None - boxes = self.expand_points_num(boxes) - boxes = np.array(boxes, dtype=np.float32) - txt_tags = np.array(txt_tags, dtype=np.bool) - - data['polys'] = boxes - data['texts'] = txts - data['ignore_tags'] = txt_tags - return data - - def order_points_clockwise(self, pts): - rect = np.zeros((4, 2), dtype="float32") - s = pts.sum(axis=1) - rect[0] = pts[np.argmin(s)] - rect[2] = pts[np.argmax(s)] - diff = np.diff(pts, axis=1) - rect[1] = pts[np.argmin(diff)] - rect[3] = pts[np.argmax(diff)] - return rect - - def expand_points_num(self, boxes): - max_points_num = 0 - for box in boxes: - if len(box) > max_points_num: - max_points_num = len(box) - ex_boxes = [] - for box in boxes: - ex_box = box + [box[-1]] * (max_points_num - len(box)) - ex_boxes.append(ex_box) - return ex_boxes - - -class BaseRecLabelEncode(object): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False): - - self.max_text_len = max_text_length - self.beg_str = "sos" - self.end_str = "eos" - self.lower = False - - if character_dict_path is None: - logger = get_logger() - logger.warning( - "The character_dict_path is None, model can only recognize number and lower letters" - ) - self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" - dict_character = list(self.character_str) - self.lower = True - else: - self.character_str = [] - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n").strip("\r\n") - self.character_str.append(line) - if use_space_char: - self.character_str.append(" ") - dict_character = list(self.character_str) - dict_character = self.add_special_char(dict_character) - self.dict = {} - for i, char in enumerate(dict_character): - self.dict[char] = i - self.character = dict_character - - def add_special_char(self, dict_character): - return dict_character - - def encode(self, text): - """convert text-label into text-index. - input: - text: text labels of each image. [batch_size] - - output: - text: concatenated text index for CTCLoss. - [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)] - length: length of each text. [batch_size] - """ - if len(text) == 0 or len(text) > self.max_text_len: - return None - if self.lower: - text = text.lower() - text_list = [] - for char in text: - if char not in self.dict: - # logger = get_logger() - # logger.warning('{} is not in dict'.format(char)) - continue - text_list.append(self.dict[char]) - if len(text_list) == 0: - return None - return text_list - - -class NRTRLabelEncode(BaseRecLabelEncode): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False, - **kwargs): - - super(NRTRLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def __call__(self, data): - text = data['label'] - text = self.encode(text) - if text is None: - return None - if len(text) >= self.max_text_len - 1: - return None - data['length'] = np.array(len(text)) - text.insert(0, 2) - text.append(3) - text = text + [0] * (self.max_text_len - len(text)) - data['label'] = np.array(text) - return data - - def add_special_char(self, dict_character): - dict_character = ['blank', '', '', ''] + dict_character - return dict_character - - -class CTCLabelEncode(BaseRecLabelEncode): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False, - **kwargs): - super(CTCLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def __call__(self, data): - text = data['label'] - text = self.encode(text) - if text is None: - return None - data['length'] = np.array(len(text)) - text = text + [0] * (self.max_text_len - len(text)) - data['label'] = np.array(text) - - label = [0] * len(self.character) - for x in text: - label[x] += 1 - data['label_ace'] = np.array(label) - return data - - def add_special_char(self, dict_character): - dict_character = ['blank'] + dict_character - return dict_character - - -class E2ELabelEncodeTest(BaseRecLabelEncode): - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False, - **kwargs): - super(E2ELabelEncodeTest, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def __call__(self, data): - import json - padnum = len(self.dict) - label = data['label'] - label = json.loads(label) - nBox = len(label) - boxes, txts, txt_tags = [], [], [] - for bno in range(0, nBox): - box = label[bno]['points'] - txt = label[bno]['transcription'] - boxes.append(box) - txts.append(txt) - if txt in ['*', '###']: - txt_tags.append(True) - else: - txt_tags.append(False) - boxes = np.array(boxes, dtype=np.float32) - txt_tags = np.array(txt_tags, dtype=np.bool) - data['polys'] = boxes - data['ignore_tags'] = txt_tags - temp_texts = [] - for text in txts: - text = text.lower() - text = self.encode(text) - if text is None: - return None - text = text + [padnum] * (self.max_text_len - len(text) - ) # use 36 to pad - temp_texts.append(text) - data['texts'] = np.array(temp_texts) - return data - - -class E2ELabelEncodeTrain(object): - def __init__(self, **kwargs): - pass - - def __call__(self, data): - import json - label = data['label'] - label = json.loads(label) - nBox = len(label) - boxes, txts, txt_tags = [], [], [] - for bno in range(0, nBox): - box = label[bno]['points'] - txt = label[bno]['transcription'] - boxes.append(box) - txts.append(txt) - if txt in ['*', '###']: - txt_tags.append(True) - else: - txt_tags.append(False) - boxes = np.array(boxes, dtype=np.float32) - txt_tags = np.array(txt_tags, dtype=np.bool) - - data['polys'] = boxes - data['texts'] = txts - data['ignore_tags'] = txt_tags - return data - - -class KieLabelEncode(object): - def __init__(self, character_dict_path, norm=10, directed=False, **kwargs): - super(KieLabelEncode, self).__init__() - self.dict = dict({'': 0}) - with open(character_dict_path, 'r', encoding='utf-8') as fr: - idx = 1 - for line in fr: - char = line.strip() - self.dict[char] = idx - idx += 1 - self.norm = norm - self.directed = directed - - def compute_relation(self, boxes): - """Compute relation between every two boxes.""" - x1s, y1s = boxes[:, 0:1], boxes[:, 1:2] - x2s, y2s = boxes[:, 4:5], boxes[:, 5:6] - ws, hs = x2s - x1s + 1, np.maximum(y2s - y1s + 1, 1) - dxs = (x1s[:, 0][None] - x1s) / self.norm - dys = (y1s[:, 0][None] - y1s) / self.norm - xhhs, xwhs = hs[:, 0][None] / hs, ws[:, 0][None] / hs - whs = ws / hs + np.zeros_like(xhhs) - relations = np.stack([dxs, dys, whs, xhhs, xwhs], -1) - bboxes = np.concatenate([x1s, y1s, x2s, y2s], -1).astype(np.float32) - return relations, bboxes - - def pad_text_indices(self, text_inds): - """Pad text index to same length.""" - max_len = 300 - recoder_len = max([len(text_ind) for text_ind in text_inds]) - padded_text_inds = -np.ones((len(text_inds), max_len), np.int32) - for idx, text_ind in enumerate(text_inds): - padded_text_inds[idx, :len(text_ind)] = np.array(text_ind) - return padded_text_inds, recoder_len - - def list_to_numpy(self, ann_infos): - """Convert bboxes, relations, texts and labels to ndarray.""" - boxes, text_inds = ann_infos['points'], ann_infos['text_inds'] - boxes = np.array(boxes, np.int32) - relations, bboxes = self.compute_relation(boxes) - - labels = ann_infos.get('labels', None) - if labels is not None: - labels = np.array(labels, np.int32) - edges = ann_infos.get('edges', None) - if edges is not None: - labels = labels[:, None] - edges = np.array(edges) - edges = (edges[:, None] == edges[None, :]).astype(np.int32) - if self.directed: - edges = (edges & labels == 1).astype(np.int32) - np.fill_diagonal(edges, -1) - labels = np.concatenate([labels, edges], -1) - padded_text_inds, recoder_len = self.pad_text_indices(text_inds) - max_num = 300 - temp_bboxes = np.zeros([max_num, 4]) - h, _ = bboxes.shape - temp_bboxes[:h, :] = bboxes - - temp_relations = np.zeros([max_num, max_num, 5]) - temp_relations[:h, :h, :] = relations - - temp_padded_text_inds = np.zeros([max_num, max_num]) - temp_padded_text_inds[:h, :] = padded_text_inds - - temp_labels = np.zeros([max_num, max_num]) - temp_labels[:h, :h + 1] = labels - - tag = np.array([h, recoder_len]) - return dict( - image=ann_infos['image'], - points=temp_bboxes, - relations=temp_relations, - texts=temp_padded_text_inds, - labels=temp_labels, - tag=tag) - - def convert_canonical(self, points_x, points_y): - - assert len(points_x) == 4 - assert len(points_y) == 4 - - points = [Point(points_x[i], points_y[i]) for i in range(4)] - - polygon = Polygon([(p.x, p.y) for p in points]) - min_x, min_y, _, _ = polygon.bounds - points_to_lefttop = [ - LineString([points[i], Point(min_x, min_y)]) for i in range(4) - ] - distances = np.array([line.length for line in points_to_lefttop]) - sort_dist_idx = np.argsort(distances) - lefttop_idx = sort_dist_idx[0] - - if lefttop_idx == 0: - point_orders = [0, 1, 2, 3] - elif lefttop_idx == 1: - point_orders = [1, 2, 3, 0] - elif lefttop_idx == 2: - point_orders = [2, 3, 0, 1] - else: - point_orders = [3, 0, 1, 2] - - sorted_points_x = [points_x[i] for i in point_orders] - sorted_points_y = [points_y[j] for j in point_orders] - - return sorted_points_x, sorted_points_y - - def sort_vertex(self, points_x, points_y): - - assert len(points_x) == 4 - assert len(points_y) == 4 - - x = np.array(points_x) - y = np.array(points_y) - center_x = np.sum(x) * 0.25 - center_y = np.sum(y) * 0.25 - - x_arr = np.array(x - center_x) - y_arr = np.array(y - center_y) - - angle = np.arctan2(y_arr, x_arr) * 180.0 / np.pi - sort_idx = np.argsort(angle) - - sorted_points_x, sorted_points_y = [], [] - for i in range(4): - sorted_points_x.append(points_x[sort_idx[i]]) - sorted_points_y.append(points_y[sort_idx[i]]) - - return self.convert_canonical(sorted_points_x, sorted_points_y) - - def __call__(self, data): - import json - label = data['label'] - annotations = json.loads(label) - boxes, texts, text_inds, labels, edges = [], [], [], [], [] - for ann in annotations: - box = ann['points'] - x_list = [box[i][0] for i in range(4)] - y_list = [box[i][1] for i in range(4)] - sorted_x_list, sorted_y_list = self.sort_vertex(x_list, y_list) - sorted_box = [] - for x, y in zip(sorted_x_list, sorted_y_list): - sorted_box.append(x) - sorted_box.append(y) - boxes.append(sorted_box) - text = ann['transcription'] - texts.append(ann['transcription']) - text_ind = [self.dict[c] for c in text if c in self.dict] - text_inds.append(text_ind) - labels.append(ann['label']) - edges.append(ann.get('edge', 0)) - ann_infos = dict( - image=data['image'], - points=boxes, - texts=texts, - text_inds=text_inds, - edges=edges, - labels=labels) - - return self.list_to_numpy(ann_infos) - - -class AttnLabelEncode(BaseRecLabelEncode): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False, - **kwargs): - super(AttnLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def add_special_char(self, dict_character): - self.beg_str = "sos" - self.end_str = "eos" - dict_character = [self.beg_str] + dict_character + [self.end_str] - return dict_character - - def __call__(self, data): - text = data['label'] - text = self.encode(text) - if text is None: - return None - if len(text) >= self.max_text_len: - return None - data['length'] = np.array(len(text)) - text = [0] + text + [len(self.character) - 1] + [0] * (self.max_text_len - - len(text) - 2) - data['label'] = np.array(text) - return data - - def get_ignored_tokens(self): - beg_idx = self.get_beg_end_flag_idx("beg") - end_idx = self.get_beg_end_flag_idx("end") - return [beg_idx, end_idx] - - def get_beg_end_flag_idx(self, beg_or_end): - if beg_or_end == "beg": - idx = np.array(self.dict[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict[self.end_str]) - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx" \ - % beg_or_end - return idx - - -class SEEDLabelEncode(BaseRecLabelEncode): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False, - **kwargs): - super(SEEDLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def add_special_char(self, dict_character): - self.padding = "padding" - self.end_str = "eos" - self.unknown = "unknown" - dict_character = dict_character + [ - self.end_str, self.padding, self.unknown - ] - return dict_character - - def __call__(self, data): - text = data['label'] - text = self.encode(text) - if text is None: - return None - if len(text) >= self.max_text_len: - return None - data['length'] = np.array(len(text)) + 1 # conclude eos - text = text + [len(self.character) - 3] + [len(self.character) - 2] * ( - self.max_text_len - len(text) - 1) - data['label'] = np.array(text) - return data - - -class SRNLabelEncode(BaseRecLabelEncode): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length=25, - character_dict_path=None, - use_space_char=False, - **kwargs): - super(SRNLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def add_special_char(self, dict_character): - dict_character = dict_character + [self.beg_str, self.end_str] - return dict_character - - def __call__(self, data): - text = data['label'] - text = self.encode(text) - char_num = len(self.character) - if text is None: - return None - if len(text) > self.max_text_len: - return None - data['length'] = np.array(len(text)) - text = text + [char_num - 1] * (self.max_text_len - len(text)) - data['label'] = np.array(text) - return data - - def get_ignored_tokens(self): - beg_idx = self.get_beg_end_flag_idx("beg") - end_idx = self.get_beg_end_flag_idx("end") - return [beg_idx, end_idx] - - def get_beg_end_flag_idx(self, beg_or_end): - if beg_or_end == "beg": - idx = np.array(self.dict[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict[self.end_str]) - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx" \ - % beg_or_end - return idx - - -class TableLabelEncode(object): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length, - max_elem_length, - max_cell_num, - character_dict_path, - span_weight=1.0, - **kwargs): - self.max_text_length = max_text_length - self.max_elem_length = max_elem_length - self.max_cell_num = max_cell_num - list_character, list_elem = self.load_char_elem_dict( - character_dict_path) - list_character = self.add_special_char(list_character) - list_elem = self.add_special_char(list_elem) - self.dict_character = {} - for i, char in enumerate(list_character): - self.dict_character[char] = i - self.dict_elem = {} - for i, elem in enumerate(list_elem): - self.dict_elem[elem] = i - self.span_weight = span_weight - - def load_char_elem_dict(self, character_dict_path): - list_character = [] - list_elem = [] - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - substr = lines[0].decode('utf-8').strip("\r\n").split("\t") - character_num = int(substr[0]) - elem_num = int(substr[1]) - for cno in range(1, 1 + character_num): - character = lines[cno].decode('utf-8').strip("\r\n") - list_character.append(character) - for eno in range(1 + character_num, 1 + character_num + elem_num): - elem = lines[eno].decode('utf-8').strip("\r\n") - list_elem.append(elem) - return list_character, list_elem - - def add_special_char(self, list_character): - self.beg_str = "sos" - self.end_str = "eos" - list_character = [self.beg_str] + list_character + [self.end_str] - return list_character - - def get_span_idx_list(self): - span_idx_list = [] - for elem in self.dict_elem: - if 'span' in elem: - span_idx_list.append(self.dict_elem[elem]) - return span_idx_list - - def __call__(self, data): - cells = data['cells'] - structure = data['structure']['tokens'] - structure = self.encode(structure, 'elem') - if structure is None: - return None - elem_num = len(structure) - structure = [0] + structure + [len(self.dict_elem) - 1] - structure = structure + [0] * (self.max_elem_length + 2 - len(structure) - ) - structure = np.array(structure) - data['structure'] = structure - elem_char_idx1 = self.dict_elem[''] - elem_char_idx2 = self.dict_elem[' 0: - span_weight = len(td_idx_list) * 1.0 / len(span_idx_list) - span_weight = min(max(span_weight, 1.0), self.span_weight) - for cno in range(len(cells)): - if 'bbox' in cells[cno]: - bbox = cells[cno]['bbox'].copy() - bbox[0] = bbox[0] * 1.0 / img_width - bbox[1] = bbox[1] * 1.0 / img_height - bbox[2] = bbox[2] * 1.0 / img_width - bbox[3] = bbox[3] * 1.0 / img_height - td_idx = td_idx_list[cno] - bbox_list[td_idx] = bbox - bbox_list_mask[td_idx] = 1.0 - cand_span_idx = td_idx + 1 - if cand_span_idx < (self.max_elem_length + 2): - if structure[cand_span_idx] in span_idx_list: - structure_mask[cand_span_idx] = span_weight - - data['bbox_list'] = bbox_list - data['bbox_list_mask'] = bbox_list_mask - data['structure_mask'] = structure_mask - char_beg_idx = self.get_beg_end_flag_idx('beg', 'char') - char_end_idx = self.get_beg_end_flag_idx('end', 'char') - elem_beg_idx = self.get_beg_end_flag_idx('beg', 'elem') - elem_end_idx = self.get_beg_end_flag_idx('end', 'elem') - data['sp_tokens'] = np.array([ - char_beg_idx, char_end_idx, elem_beg_idx, elem_end_idx, - elem_char_idx1, elem_char_idx2, self.max_text_length, - self.max_elem_length, self.max_cell_num, elem_num - ]) - return data - - def encode(self, text, char_or_elem): - """convert text-label into text-index. - """ - if char_or_elem == "char": - max_len = self.max_text_length - current_dict = self.dict_character - else: - max_len = self.max_elem_length - current_dict = self.dict_elem - if len(text) > max_len: - return None - if len(text) == 0: - if char_or_elem == "char": - return [self.dict_character['space']] - else: - return None - text_list = [] - for char in text: - if char not in current_dict: - return None - text_list.append(current_dict[char]) - if len(text_list) == 0: - if char_or_elem == "char": - return [self.dict_character['space']] - else: - return None - return text_list - - def get_ignored_tokens(self, char_or_elem): - beg_idx = self.get_beg_end_flag_idx("beg", char_or_elem) - end_idx = self.get_beg_end_flag_idx("end", char_or_elem) - return [beg_idx, end_idx] - - def get_beg_end_flag_idx(self, beg_or_end, char_or_elem): - if char_or_elem == "char": - if beg_or_end == "beg": - idx = np.array(self.dict_character[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict_character[self.end_str]) - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx of char" \ - % beg_or_end - elif char_or_elem == "elem": - if beg_or_end == "beg": - idx = np.array(self.dict_elem[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict_elem[self.end_str]) - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx of elem" \ - % beg_or_end - else: - assert False, "Unsupport type %s in char_or_elem" \ - % char_or_elem - return idx - - -class SARLabelEncode(BaseRecLabelEncode): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False, - **kwargs): - super(SARLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def add_special_char(self, dict_character): - beg_end_str = "" - unknown_str = "" - padding_str = "" - dict_character = dict_character + [unknown_str] - self.unknown_idx = len(dict_character) - 1 - dict_character = dict_character + [beg_end_str] - self.start_idx = len(dict_character) - 1 - self.end_idx = len(dict_character) - 1 - dict_character = dict_character + [padding_str] - self.padding_idx = len(dict_character) - 1 - - return dict_character - - def __call__(self, data): - text = data['label'] - text = self.encode(text) - if text is None: - return None - if len(text) >= self.max_text_len - 1: - return None - data['length'] = np.array(len(text)) - target = [self.start_idx] + text + [self.end_idx] - padded_text = [self.padding_idx for _ in range(self.max_text_len)] - - padded_text[:len(target)] = target - data['label'] = np.array(padded_text) - return data - - def get_ignored_tokens(self): - return [self.padding_idx] - - -class PRENLabelEncode(BaseRecLabelEncode): - def __init__(self, - max_text_length, - character_dict_path, - use_space_char=False, - **kwargs): - super(PRENLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def add_special_char(self, dict_character): - padding_str = '' # 0 - end_str = '' # 1 - unknown_str = '' # 2 - - dict_character = [padding_str, end_str, unknown_str] + dict_character - self.padding_idx = 0 - self.end_idx = 1 - self.unknown_idx = 2 - - return dict_character - - def encode(self, text): - if len(text) == 0 or len(text) >= self.max_text_len: - return None - if self.lower: - text = text.lower() - text_list = [] - for char in text: - if char not in self.dict: - text_list.append(self.unknown_idx) - else: - text_list.append(self.dict[char]) - text_list.append(self.end_idx) - if len(text_list) < self.max_text_len: - text_list += [self.padding_idx] * ( - self.max_text_len - len(text_list)) - return text_list - - def __call__(self, data): - text = data['label'] - encoded_text = self.encode(text) - if encoded_text is None: - return None - data['label'] = np.array(encoded_text) - return data - - -class VQATokenLabelEncode(object): - """ - Label encode for NLP VQA methods - """ - - def __init__(self, - class_path, - contains_re=False, - add_special_ids=False, - algorithm='LayoutXLM', - infer_mode=False, - ocr_engine=None, - **kwargs): - super(VQATokenLabelEncode, self).__init__() - from paddlenlp.transformers import LayoutXLMTokenizer, LayoutLMTokenizer, LayoutLMv2Tokenizer - from ppocr.utils.utility import load_vqa_bio_label_maps - tokenizer_dict = { - 'LayoutXLM': { - 'class': LayoutXLMTokenizer, - 'pretrained_model': 'layoutxlm-base-uncased' - }, - 'LayoutLM': { - 'class': LayoutLMTokenizer, - 'pretrained_model': 'layoutlm-base-uncased' - }, - 'LayoutLMv2': { - 'class': LayoutLMv2Tokenizer, - 'pretrained_model': 'layoutlmv2-base-uncased' - } - } - self.contains_re = contains_re - tokenizer_config = tokenizer_dict[algorithm] - self.tokenizer = tokenizer_config['class'].from_pretrained( - tokenizer_config['pretrained_model']) - self.label2id_map, id2label_map = load_vqa_bio_label_maps(class_path) - self.add_special_ids = add_special_ids - self.infer_mode = infer_mode - self.ocr_engine = ocr_engine - - def __call__(self, data): - # load bbox and label info - ocr_info = self._load_ocr_info(data) - - height, width, _ = data['image'].shape - - words_list = [] - bbox_list = [] - input_ids_list = [] - token_type_ids_list = [] - segment_offset_id = [] - gt_label_list = [] - - entities = [] - - # for re - train_re = self.contains_re and not self.infer_mode - if train_re: - relations = [] - id2label = {} - entity_id_to_index_map = {} - empty_entity = set() - - data['ocr_info'] = copy.deepcopy(ocr_info) - - for info in ocr_info: - if train_re: - # for re - if len(info["text"]) == 0: - empty_entity.add(info["id"]) - continue - id2label[info["id"]] = info["label"] - relations.extend([tuple(sorted(l)) for l in info["linking"]]) - # smooth_box - bbox = self._smooth_box(info["bbox"], height, width) - - text = info["text"] - encode_res = self.tokenizer.encode( - text, pad_to_max_seq_len=False, return_attention_mask=True) - - if not self.add_special_ids: - # TODO: use tok.all_special_ids to remove - encode_res["input_ids"] = encode_res["input_ids"][1:-1] - encode_res["token_type_ids"] = encode_res["token_type_ids"][1: - -1] - encode_res["attention_mask"] = encode_res["attention_mask"][1: - -1] - # parse label - if not self.infer_mode: - label = info['label'] - gt_label = self._parse_label(label, encode_res) - - # construct entities for re - if train_re: - if gt_label[0] != self.label2id_map["O"]: - entity_id_to_index_map[info["id"]] = len(entities) - label = label.upper() - entities.append({ - "start": len(input_ids_list), - "end": - len(input_ids_list) + len(encode_res["input_ids"]), - "label": label.upper(), - }) - else: - entities.append({ - "start": len(input_ids_list), - "end": len(input_ids_list) + len(encode_res["input_ids"]), - "label": 'O', - }) - input_ids_list.extend(encode_res["input_ids"]) - token_type_ids_list.extend(encode_res["token_type_ids"]) - bbox_list.extend([bbox] * len(encode_res["input_ids"])) - words_list.append(text) - segment_offset_id.append(len(input_ids_list)) - if not self.infer_mode: - gt_label_list.extend(gt_label) - - data['input_ids'] = input_ids_list - data['token_type_ids'] = token_type_ids_list - data['bbox'] = bbox_list - data['attention_mask'] = [1] * len(input_ids_list) - data['labels'] = gt_label_list - data['segment_offset_id'] = segment_offset_id - data['tokenizer_params'] = dict( - padding_side=self.tokenizer.padding_side, - pad_token_type_id=self.tokenizer.pad_token_type_id, - pad_token_id=self.tokenizer.pad_token_id) - data['entities'] = entities - - if train_re: - data['relations'] = relations - data['id2label'] = id2label - data['empty_entity'] = empty_entity - data['entity_id_to_index_map'] = entity_id_to_index_map - return data - - def _load_ocr_info(self, data): - def trans_poly_to_bbox(poly): - x1 = np.min([p[0] for p in poly]) - x2 = np.max([p[0] for p in poly]) - y1 = np.min([p[1] for p in poly]) - y2 = np.max([p[1] for p in poly]) - return [x1, y1, x2, y2] - - if self.infer_mode: - ocr_result = self.ocr_engine.ocr(data['image'], cls=False) - ocr_info = [] - for res in ocr_result: - ocr_info.append({ - "text": res[1][0], - "bbox": trans_poly_to_bbox(res[0]), - "poly": res[0], - }) - return ocr_info - else: - info = data['label'] - # read text info - info_dict = json.loads(info) - return info_dict["ocr_info"] - - def _smooth_box(self, bbox, height, width): - bbox[0] = int(bbox[0] * 1000.0 / width) - bbox[2] = int(bbox[2] * 1000.0 / width) - bbox[1] = int(bbox[1] * 1000.0 / height) - bbox[3] = int(bbox[3] * 1000.0 / height) - return bbox - - def _parse_label(self, label, encode_res): - gt_label = [] - if label.lower() == "other": - gt_label.extend([0] * len(encode_res["input_ids"])) - else: - gt_label.append(self.label2id_map[("b-" + label).upper()]) - gt_label.extend([self.label2id_map[("i-" + label).upper()]] * - (len(encode_res["input_ids"]) - 1)) - return gt_label - - -class MultiLabelEncode(BaseRecLabelEncode): - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False, - **kwargs): - super(MultiLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - self.ctc_encode = CTCLabelEncode(max_text_length, character_dict_path, - use_space_char, **kwargs) - self.sar_encode = SARLabelEncode(max_text_length, character_dict_path, - use_space_char, **kwargs) - - def __call__(self, data): - - data_ctc = copy.deepcopy(data) - data_sar = copy.deepcopy(data) - data_out = dict() - data_out['img_path'] = data.get('img_path', None) - data_out['image'] = data['image'] - ctc = self.ctc_encode.__call__(data_ctc) - sar = self.sar_encode.__call__(data_sar) - if ctc is None or sar is None: - return None - data_out['label_ctc'] = ctc['label'] - data_out['label_sar'] = sar['label'] - data_out['length'] = ctc['length'] - return data_out diff --git a/backend/ppocr/data/imaug/make_border_map.py b/backend/ppocr/data/imaug/make_border_map.py deleted file mode 100644 index abab3836..00000000 --- a/backend/ppocr/data/imaug/make_border_map.py +++ /dev/null @@ -1,173 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/make_border_map.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np -import cv2 - -np.seterr(divide='ignore', invalid='ignore') -import pyclipper -from shapely.geometry import Polygon -import sys -import warnings - -warnings.simplefilter("ignore") - -__all__ = ['MakeBorderMap'] - - -class MakeBorderMap(object): - def __init__(self, - shrink_ratio=0.4, - thresh_min=0.3, - thresh_max=0.7, - **kwargs): - self.shrink_ratio = shrink_ratio - self.thresh_min = thresh_min - self.thresh_max = thresh_max - - def __call__(self, data): - - img = data['image'] - text_polys = data['polys'] - ignore_tags = data['ignore_tags'] - - canvas = np.zeros(img.shape[:2], dtype=np.float32) - mask = np.zeros(img.shape[:2], dtype=np.float32) - - for i in range(len(text_polys)): - if ignore_tags[i]: - continue - self.draw_border_map(text_polys[i], canvas, mask=mask) - canvas = canvas * (self.thresh_max - self.thresh_min) + self.thresh_min - - data['threshold_map'] = canvas - data['threshold_mask'] = mask - return data - - def draw_border_map(self, polygon, canvas, mask): - polygon = np.array(polygon) - assert polygon.ndim == 2 - assert polygon.shape[1] == 2 - - polygon_shape = Polygon(polygon) - if polygon_shape.area <= 0: - return - distance = polygon_shape.area * ( - 1 - np.power(self.shrink_ratio, 2)) / polygon_shape.length - subject = [tuple(l) for l in polygon] - padding = pyclipper.PyclipperOffset() - padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) - - padded_polygon = np.array(padding.Execute(distance)[0]) - cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0) - - xmin = padded_polygon[:, 0].min() - xmax = padded_polygon[:, 0].max() - ymin = padded_polygon[:, 1].min() - ymax = padded_polygon[:, 1].max() - width = xmax - xmin + 1 - height = ymax - ymin + 1 - - polygon[:, 0] = polygon[:, 0] - xmin - polygon[:, 1] = polygon[:, 1] - ymin - - xs = np.broadcast_to( - np.linspace( - 0, width - 1, num=width).reshape(1, width), (height, width)) - ys = np.broadcast_to( - np.linspace( - 0, height - 1, num=height).reshape(height, 1), (height, width)) - - distance_map = np.zeros( - (polygon.shape[0], height, width), dtype=np.float32) - for i in range(polygon.shape[0]): - j = (i + 1) % polygon.shape[0] - absolute_distance = self._distance(xs, ys, polygon[i], polygon[j]) - distance_map[i] = np.clip(absolute_distance / distance, 0, 1) - distance_map = distance_map.min(axis=0) - - xmin_valid = min(max(0, xmin), canvas.shape[1] - 1) - xmax_valid = min(max(0, xmax), canvas.shape[1] - 1) - ymin_valid = min(max(0, ymin), canvas.shape[0] - 1) - ymax_valid = min(max(0, ymax), canvas.shape[0] - 1) - canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax( - 1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height, - xmin_valid - xmin:xmax_valid - xmax + width], - canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1]) - - def _distance(self, xs, ys, point_1, point_2): - ''' - compute the distance from point to a line - ys: coordinates in the first axis - xs: coordinates in the second axis - point_1, point_2: (x, y), the end of the line - ''' - height, width = xs.shape[:2] - square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[ - 1]) - square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[ - 1]) - square_distance = np.square(point_1[0] - point_2[0]) + np.square( - point_1[1] - point_2[1]) - - cosin = (square_distance - square_distance_1 - square_distance_2) / ( - 2 * np.sqrt(square_distance_1 * square_distance_2)) - square_sin = 1 - np.square(cosin) - square_sin = np.nan_to_num(square_sin) - result = np.sqrt(square_distance_1 * square_distance_2 * square_sin / - square_distance) - - result[cosin < - 0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin - < 0] - # self.extend_line(point_1, point_2, result) - return result - - def extend_line(self, point_1, point_2, result, shrink_ratio): - ex_point_1 = (int( - round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))), - int( - round(point_1[1] + (point_1[1] - point_2[1]) * ( - 1 + shrink_ratio)))) - cv2.line( - result, - tuple(ex_point_1), - tuple(point_1), - 4096.0, - 1, - lineType=cv2.LINE_AA, - shift=0) - ex_point_2 = (int( - round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))), - int( - round(point_2[1] + (point_2[1] - point_1[1]) * ( - 1 + shrink_ratio)))) - cv2.line( - result, - tuple(ex_point_2), - tuple(point_2), - 4096.0, - 1, - lineType=cv2.LINE_AA, - shift=0) - return ex_point_1, ex_point_2 diff --git a/backend/ppocr/data/imaug/make_pse_gt.py b/backend/ppocr/data/imaug/make_pse_gt.py deleted file mode 100644 index 255d076b..00000000 --- a/backend/ppocr/data/imaug/make_pse_gt.py +++ /dev/null @@ -1,106 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import cv2 -import numpy as np -import pyclipper -from shapely.geometry import Polygon - -__all__ = ['MakePseGt'] - - -class MakePseGt(object): - def __init__(self, kernel_num=7, size=640, min_shrink_ratio=0.4, **kwargs): - self.kernel_num = kernel_num - self.min_shrink_ratio = min_shrink_ratio - self.size = size - - def __call__(self, data): - - image = data['image'] - text_polys = data['polys'] - ignore_tags = data['ignore_tags'] - - h, w, _ = image.shape - short_edge = min(h, w) - if short_edge < self.size: - # keep short_size >= self.size - scale = self.size / short_edge - image = cv2.resize(image, dsize=None, fx=scale, fy=scale) - text_polys *= scale - - gt_kernels = [] - for i in range(1, self.kernel_num + 1): - # s1->sn, from big to small - rate = 1.0 - (1.0 - self.min_shrink_ratio) / (self.kernel_num - 1 - ) * i - text_kernel, ignore_tags = self.generate_kernel( - image.shape[0:2], rate, text_polys, ignore_tags) - gt_kernels.append(text_kernel) - - training_mask = np.ones(image.shape[0:2], dtype='uint8') - for i in range(text_polys.shape[0]): - if ignore_tags[i]: - cv2.fillPoly(training_mask, - text_polys[i].astype(np.int32)[np.newaxis, :, :], - 0) - - gt_kernels = np.array(gt_kernels) - gt_kernels[gt_kernels > 0] = 1 - - data['image'] = image - data['polys'] = text_polys - data['gt_kernels'] = gt_kernels[0:] - data['gt_text'] = gt_kernels[0] - data['mask'] = training_mask.astype('float32') - return data - - def generate_kernel(self, - img_size, - shrink_ratio, - text_polys, - ignore_tags=None): - """ - Refer to part of the code: - https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/textdet_targets/base_textdet_targets.py - """ - - h, w = img_size - text_kernel = np.zeros((h, w), dtype=np.float32) - for i, poly in enumerate(text_polys): - polygon = Polygon(poly) - distance = polygon.area * (1 - shrink_ratio * shrink_ratio) / ( - polygon.length + 1e-6) - subject = [tuple(l) for l in poly] - pco = pyclipper.PyclipperOffset() - pco.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) - shrinked = np.array(pco.Execute(-distance)) - - if len(shrinked) == 0 or shrinked.size == 0: - if ignore_tags is not None: - ignore_tags[i] = True - continue - try: - shrinked = np.array(shrinked[0]).reshape(-1, 2) - except: - if ignore_tags is not None: - ignore_tags[i] = True - continue - cv2.fillPoly(text_kernel, [shrinked.astype(np.int32)], i + 1) - return text_kernel, ignore_tags diff --git a/backend/ppocr/data/imaug/make_shrink_map.py b/backend/ppocr/data/imaug/make_shrink_map.py deleted file mode 100644 index 6c65c20e..00000000 --- a/backend/ppocr/data/imaug/make_shrink_map.py +++ /dev/null @@ -1,123 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/make_shrink_map.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np -import cv2 -from shapely.geometry import Polygon -import pyclipper - -__all__ = ['MakeShrinkMap'] - - -class MakeShrinkMap(object): - r''' - Making binary mask from detection data with ICDAR format. - Typically following the process of class `MakeICDARData`. - ''' - - def __init__(self, min_text_size=8, shrink_ratio=0.4, **kwargs): - self.min_text_size = min_text_size - self.shrink_ratio = shrink_ratio - - def __call__(self, data): - image = data['image'] - text_polys = data['polys'] - ignore_tags = data['ignore_tags'] - - h, w = image.shape[:2] - text_polys, ignore_tags = self.validate_polygons(text_polys, - ignore_tags, h, w) - gt = np.zeros((h, w), dtype=np.float32) - mask = np.ones((h, w), dtype=np.float32) - for i in range(len(text_polys)): - polygon = text_polys[i] - height = max(polygon[:, 1]) - min(polygon[:, 1]) - width = max(polygon[:, 0]) - min(polygon[:, 0]) - if ignore_tags[i] or min(height, width) < self.min_text_size: - cv2.fillPoly(mask, - polygon.astype(np.int32)[np.newaxis, :, :], 0) - ignore_tags[i] = True - else: - polygon_shape = Polygon(polygon) - subject = [tuple(l) for l in polygon] - padding = pyclipper.PyclipperOffset() - padding.AddPath(subject, pyclipper.JT_ROUND, - pyclipper.ET_CLOSEDPOLYGON) - shrinked = [] - - # Increase the shrink ratio every time we get multiple polygon returned back - possible_ratios = np.arange(self.shrink_ratio, 1, - self.shrink_ratio) - np.append(possible_ratios, 1) - # print(possible_ratios) - for ratio in possible_ratios: - # print(f"Change shrink ratio to {ratio}") - distance = polygon_shape.area * ( - 1 - np.power(ratio, 2)) / polygon_shape.length - shrinked = padding.Execute(-distance) - if len(shrinked) == 1: - break - - if shrinked == []: - cv2.fillPoly(mask, - polygon.astype(np.int32)[np.newaxis, :, :], 0) - ignore_tags[i] = True - continue - - for each_shirnk in shrinked: - shirnk = np.array(each_shirnk).reshape(-1, 2) - cv2.fillPoly(gt, [shirnk.astype(np.int32)], 1) - - data['shrink_map'] = gt - data['shrink_mask'] = mask - return data - - def validate_polygons(self, polygons, ignore_tags, h, w): - ''' - polygons (numpy.array, required): of shape (num_instances, num_points, 2) - ''' - if len(polygons) == 0: - return polygons, ignore_tags - assert len(polygons) == len(ignore_tags) - for polygon in polygons: - polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1) - polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1) - - for i in range(len(polygons)): - area = self.polygon_area(polygons[i]) - if abs(area) < 1: - ignore_tags[i] = True - if area > 0: - polygons[i] = polygons[i][::-1, :] - return polygons, ignore_tags - - def polygon_area(self, polygon): - """ - compute polygon area - """ - area = 0 - q = polygon[-1] - for p in polygon: - area += p[0] * q[1] - p[1] * q[0] - q = p - return area / 2.0 diff --git a/backend/ppocr/data/imaug/operators.py b/backend/ppocr/data/imaug/operators.py deleted file mode 100644 index 09736515..00000000 --- a/backend/ppocr/data/imaug/operators.py +++ /dev/null @@ -1,468 +0,0 @@ -""" -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import sys -import six -import cv2 -import numpy as np -import math - - -class DecodeImage(object): - """ decode image """ - - def __init__(self, - img_mode='RGB', - channel_first=False, - ignore_orientation=False, - **kwargs): - self.img_mode = img_mode - self.channel_first = channel_first - self.ignore_orientation = ignore_orientation - - def __call__(self, data): - img = data['image'] - if six.PY2: - assert type(img) is str and len( - img) > 0, "invalid input 'img' in DecodeImage" - else: - assert type(img) is bytes and len( - img) > 0, "invalid input 'img' in DecodeImage" - img = np.frombuffer(img, dtype='uint8') - if self.ignore_orientation: - img = cv2.imdecode(img, cv2.IMREAD_IGNORE_ORIENTATION | - cv2.IMREAD_COLOR) - else: - img = cv2.imdecode(img, 1) - if img is None: - return None - if self.img_mode == 'GRAY': - img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - elif self.img_mode == 'RGB': - assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) - img = img[:, :, ::-1] - - if self.channel_first: - img = img.transpose((2, 0, 1)) - - data['image'] = img - return data - - -class NRTRDecodeImage(object): - """ decode image """ - - def __init__(self, img_mode='RGB', channel_first=False, **kwargs): - self.img_mode = img_mode - self.channel_first = channel_first - - def __call__(self, data): - img = data['image'] - if six.PY2: - assert type(img) is str and len( - img) > 0, "invalid input 'img' in DecodeImage" - else: - assert type(img) is bytes and len( - img) > 0, "invalid input 'img' in DecodeImage" - img = np.frombuffer(img, dtype='uint8') - - img = cv2.imdecode(img, 1) - - if img is None: - return None - if self.img_mode == 'GRAY': - img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - elif self.img_mode == 'RGB': - assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) - img = img[:, :, ::-1] - img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - if self.channel_first: - img = img.transpose((2, 0, 1)) - data['image'] = img - return data - - -class NormalizeImage(object): - """ normalize image such as substract mean, divide std - """ - - def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs): - if isinstance(scale, str): - scale = eval(scale) - self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) - mean = mean if mean is not None else [0.485, 0.456, 0.406] - std = std if std is not None else [0.229, 0.224, 0.225] - - shape = (3, 1, 1) if order == 'chw' else (1, 1, 3) - self.mean = np.array(mean).reshape(shape).astype('float32') - self.std = np.array(std).reshape(shape).astype('float32') - - def __call__(self, data): - img = data['image'] - from PIL import Image - if isinstance(img, Image.Image): - img = np.array(img) - assert isinstance(img, - np.ndarray), "invalid input 'img' in NormalizeImage" - data['image'] = ( - img.astype('float32') * self.scale - self.mean) / self.std - return data - - -class ToCHWImage(object): - """ convert hwc image to chw image - """ - - def __init__(self, **kwargs): - pass - - def __call__(self, data): - img = data['image'] - from PIL import Image - if isinstance(img, Image.Image): - img = np.array(img) - data['image'] = img.transpose((2, 0, 1)) - return data - - -class Fasttext(object): - def __init__(self, path="None", **kwargs): - import fasttext - self.fast_model = fasttext.load_model(path) - - def __call__(self, data): - label = data['label'] - fast_label = self.fast_model[label] - data['fast_label'] = fast_label - return data - - -class KeepKeys(object): - def __init__(self, keep_keys, **kwargs): - self.keep_keys = keep_keys - - def __call__(self, data): - data_list = [] - for key in self.keep_keys: - data_list.append(data[key]) - return data_list - - -class Pad(object): - def __init__(self, size=None, size_div=32, **kwargs): - if size is not None and not isinstance(size, (int, list, tuple)): - raise TypeError("Type of target_size is invalid. Now is {}".format( - type(size))) - if isinstance(size, int): - size = [size, size] - self.size = size - self.size_div = size_div - - def __call__(self, data): - - img = data['image'] - img_h, img_w = img.shape[0], img.shape[1] - if self.size: - resize_h2, resize_w2 = self.size - assert ( - img_h < resize_h2 and img_w < resize_w2 - ), '(h, w) of target size should be greater than (img_h, img_w)' - else: - resize_h2 = max( - int(math.ceil(img.shape[0] / self.size_div) * self.size_div), - self.size_div) - resize_w2 = max( - int(math.ceil(img.shape[1] / self.size_div) * self.size_div), - self.size_div) - img = cv2.copyMakeBorder( - img, - 0, - resize_h2 - img_h, - 0, - resize_w2 - img_w, - cv2.BORDER_CONSTANT, - value=0) - data['image'] = img - return data - - -class Resize(object): - def __init__(self, size=(640, 640), **kwargs): - self.size = size - - def resize_image(self, img): - resize_h, resize_w = self.size - ori_h, ori_w = img.shape[:2] # (h, w, c) - ratio_h = float(resize_h) / ori_h - ratio_w = float(resize_w) / ori_w - img = cv2.resize(img, (int(resize_w), int(resize_h))) - return img, [ratio_h, ratio_w] - - def __call__(self, data): - img = data['image'] - if 'polys' in data: - text_polys = data['polys'] - - img_resize, [ratio_h, ratio_w] = self.resize_image(img) - if 'polys' in data: - new_boxes = [] - for box in text_polys: - new_box = [] - for cord in box: - new_box.append([cord[0] * ratio_w, cord[1] * ratio_h]) - new_boxes.append(new_box) - data['polys'] = np.array(new_boxes, dtype=np.float32) - data['image'] = img_resize - return data - - -class DetResizeForTest(object): - def __init__(self, **kwargs): - super(DetResizeForTest, self).__init__() - self.resize_type = 0 - if 'image_shape' in kwargs: - self.image_shape = kwargs['image_shape'] - self.resize_type = 1 - elif 'limit_side_len' in kwargs: - self.limit_side_len = kwargs['limit_side_len'] - self.limit_type = kwargs.get('limit_type', 'min') - elif 'resize_long' in kwargs: - self.resize_type = 2 - self.resize_long = kwargs.get('resize_long', 960) - else: - self.limit_side_len = 736 - self.limit_type = 'min' - - def __call__(self, data): - img = data['image'] - src_h, src_w, _ = img.shape - - if self.resize_type == 0: - # img, shape = self.resize_image_type0(img) - img, [ratio_h, ratio_w] = self.resize_image_type0(img) - elif self.resize_type == 2: - img, [ratio_h, ratio_w] = self.resize_image_type2(img) - else: - # img, shape = self.resize_image_type1(img) - img, [ratio_h, ratio_w] = self.resize_image_type1(img) - data['image'] = img - data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w]) - return data - - def resize_image_type1(self, img): - resize_h, resize_w = self.image_shape - ori_h, ori_w = img.shape[:2] # (h, w, c) - ratio_h = float(resize_h) / ori_h - ratio_w = float(resize_w) / ori_w - img = cv2.resize(img, (int(resize_w), int(resize_h))) - # return img, np.array([ori_h, ori_w]) - return img, [ratio_h, ratio_w] - - def resize_image_type0(self, img): - """ - resize image to a size multiple of 32 which is required by the network - args: - img(array): array with shape [h, w, c] - return(tuple): - img, (ratio_h, ratio_w) - """ - limit_side_len = self.limit_side_len - h, w, c = img.shape - - # limit the max side - if self.limit_type == 'max': - if max(h, w) > limit_side_len: - if h > w: - ratio = float(limit_side_len) / h - else: - ratio = float(limit_side_len) / w - else: - ratio = 1. - elif self.limit_type == 'min': - if min(h, w) < limit_side_len: - if h < w: - ratio = float(limit_side_len) / h - else: - ratio = float(limit_side_len) / w - else: - ratio = 1. - elif self.limit_type == 'resize_long': - ratio = float(limit_side_len) / max(h, w) - else: - raise Exception('not support limit type, image ') - resize_h = int(h * ratio) - resize_w = int(w * ratio) - - resize_h = max(int(round(resize_h / 32) * 32), 32) - resize_w = max(int(round(resize_w / 32) * 32), 32) - - try: - if int(resize_w) <= 0 or int(resize_h) <= 0: - return None, (None, None) - img = cv2.resize(img, (int(resize_w), int(resize_h))) - except: - print(img.shape, resize_w, resize_h) - sys.exit(0) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - return img, [ratio_h, ratio_w] - - def resize_image_type2(self, img): - h, w, _ = img.shape - - resize_w = w - resize_h = h - - if resize_h > resize_w: - ratio = float(self.resize_long) / resize_h - else: - ratio = float(self.resize_long) / resize_w - - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - max_stride = 128 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - img = cv2.resize(img, (int(resize_w), int(resize_h))) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - - return img, [ratio_h, ratio_w] - - -class E2EResizeForTest(object): - def __init__(self, **kwargs): - super(E2EResizeForTest, self).__init__() - self.max_side_len = kwargs['max_side_len'] - self.valid_set = kwargs['valid_set'] - - def __call__(self, data): - img = data['image'] - src_h, src_w, _ = img.shape - if self.valid_set == 'totaltext': - im_resized, [ratio_h, ratio_w] = self.resize_image_for_totaltext( - img, max_side_len=self.max_side_len) - else: - im_resized, (ratio_h, ratio_w) = self.resize_image( - img, max_side_len=self.max_side_len) - data['image'] = im_resized - data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w]) - return data - - def resize_image_for_totaltext(self, im, max_side_len=512): - - h, w, _ = im.shape - resize_w = w - resize_h = h - ratio = 1.25 - if h * ratio > max_side_len: - ratio = float(max_side_len) / resize_h - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - max_stride = 128 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - im = cv2.resize(im, (int(resize_w), int(resize_h))) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - return im, (ratio_h, ratio_w) - - def resize_image(self, im, max_side_len=512): - """ - resize image to a size multiple of max_stride which is required by the network - :param im: the resized image - :param max_side_len: limit of max image size to avoid out of memory in gpu - :return: the resized image and the resize ratio - """ - h, w, _ = im.shape - - resize_w = w - resize_h = h - - # Fix the longer side - if resize_h > resize_w: - ratio = float(max_side_len) / resize_h - else: - ratio = float(max_side_len) / resize_w - - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - max_stride = 128 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - im = cv2.resize(im, (int(resize_w), int(resize_h))) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - - return im, (ratio_h, ratio_w) - - -class KieResize(object): - def __init__(self, **kwargs): - super(KieResize, self).__init__() - self.max_side, self.min_side = kwargs['img_scale'][0], kwargs[ - 'img_scale'][1] - - def __call__(self, data): - img = data['image'] - points = data['points'] - src_h, src_w, _ = img.shape - im_resized, scale_factor, [ratio_h, ratio_w - ], [new_h, new_w] = self.resize_image(img) - resize_points = self.resize_boxes(img, points, scale_factor) - data['ori_image'] = img - data['ori_boxes'] = points - data['points'] = resize_points - data['image'] = im_resized - data['shape'] = np.array([new_h, new_w]) - return data - - def resize_image(self, img): - norm_img = np.zeros([1024, 1024, 3], dtype='float32') - scale = [512, 1024] - h, w = img.shape[:2] - max_long_edge = max(scale) - max_short_edge = min(scale) - scale_factor = min(max_long_edge / max(h, w), - max_short_edge / min(h, w)) - resize_w, resize_h = int(w * float(scale_factor) + 0.5), int(h * float( - scale_factor) + 0.5) - max_stride = 32 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - im = cv2.resize(img, (resize_w, resize_h)) - new_h, new_w = im.shape[:2] - w_scale = new_w / w - h_scale = new_h / h - scale_factor = np.array( - [w_scale, h_scale, w_scale, h_scale], dtype=np.float32) - norm_img[:new_h, :new_w, :] = im - return norm_img, scale_factor, [h_scale, w_scale], [new_h, new_w] - - def resize_boxes(self, im, points, scale_factor): - points = points * scale_factor - img_shape = im.shape[:2] - points[:, 0::2] = np.clip(points[:, 0::2], 0, img_shape[1]) - points[:, 1::2] = np.clip(points[:, 1::2], 0, img_shape[0]) - return points diff --git a/backend/ppocr/data/imaug/pg_process.py b/backend/ppocr/data/imaug/pg_process.py deleted file mode 100644 index 53031064..00000000 --- a/backend/ppocr/data/imaug/pg_process.py +++ /dev/null @@ -1,906 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import cv2 -import numpy as np - -__all__ = ['PGProcessTrain'] - - -class PGProcessTrain(object): - def __init__(self, - character_dict_path, - max_text_length, - max_text_nums, - tcl_len, - batch_size=14, - min_crop_size=24, - min_text_size=4, - max_text_size=512, - **kwargs): - self.tcl_len = tcl_len - self.max_text_length = max_text_length - self.max_text_nums = max_text_nums - self.batch_size = batch_size - self.min_crop_size = min_crop_size - self.min_text_size = min_text_size - self.max_text_size = max_text_size - self.Lexicon_Table = self.get_dict(character_dict_path) - self.pad_num = len(self.Lexicon_Table) - self.img_id = 0 - - def get_dict(self, character_dict_path): - character_str = "" - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n").strip("\r\n") - character_str += line - dict_character = list(character_str) - return dict_character - - def quad_area(self, poly): - """ - compute area of a polygon - :param poly: - :return: - """ - edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]), - (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]), - (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]), - (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])] - return np.sum(edge) / 2. - - def gen_quad_from_poly(self, poly): - """ - Generate min area quad from poly. - """ - point_num = poly.shape[0] - min_area_quad = np.zeros((4, 2), dtype=np.float32) - rect = cv2.minAreaRect(poly.astype( - np.int32)) # (center (x,y), (width, height), angle of rotation) - box = np.array(cv2.boxPoints(rect)) - - first_point_idx = 0 - min_dist = 1e4 - for i in range(4): - dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \ - np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \ - np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \ - np.linalg.norm(box[(i + 3) % 4] - poly[-1]) - if dist < min_dist: - min_dist = dist - first_point_idx = i - for i in range(4): - min_area_quad[i] = box[(first_point_idx + i) % 4] - - return min_area_quad - - def check_and_validate_polys(self, polys, tags, im_size): - """ - check so that the text poly is in the same direction, - and also filter some invalid polygons - :param polys: - :param tags: - :return: - """ - (h, w) = im_size - if polys.shape[0] == 0: - return polys, np.array([]), np.array([]) - polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1) - polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1) - - validated_polys = [] - validated_tags = [] - hv_tags = [] - for poly, tag in zip(polys, tags): - quad = self.gen_quad_from_poly(poly) - p_area = self.quad_area(quad) - if abs(p_area) < 1: - print('invalid poly') - continue - if p_area > 0: - if tag == False: - print('poly in wrong direction') - tag = True # reversed cases should be ignore - poly = poly[(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, - 1), :] - quad = quad[(0, 3, 2, 1), :] - - len_w = np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[3] - - quad[2]) - len_h = np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - - quad[2]) - hv_tag = 1 - - if len_w * 2.0 < len_h: - hv_tag = 0 - - validated_polys.append(poly) - validated_tags.append(tag) - hv_tags.append(hv_tag) - return np.array(validated_polys), np.array(validated_tags), np.array( - hv_tags) - - def crop_area(self, - im, - polys, - tags, - hv_tags, - txts, - crop_background=False, - max_tries=25): - """ - make random crop from the input image - :param im: - :param polys: [b,4,2] - :param tags: - :param crop_background: - :param max_tries: 50 -> 25 - :return: - """ - h, w, _ = im.shape - pad_h = h // 10 - pad_w = w // 10 - h_array = np.zeros((h + pad_h * 2), dtype=np.int32) - w_array = np.zeros((w + pad_w * 2), dtype=np.int32) - for poly in polys: - poly = np.round(poly, decimals=0).astype(np.int32) - minx = np.min(poly[:, 0]) - maxx = np.max(poly[:, 0]) - w_array[minx + pad_w:maxx + pad_w] = 1 - miny = np.min(poly[:, 1]) - maxy = np.max(poly[:, 1]) - h_array[miny + pad_h:maxy + pad_h] = 1 - # ensure the cropped area not across a text - h_axis = np.where(h_array == 0)[0] - w_axis = np.where(w_array == 0)[0] - if len(h_axis) == 0 or len(w_axis) == 0: - return im, polys, tags, hv_tags, txts - for i in range(max_tries): - xx = np.random.choice(w_axis, size=2) - xmin = np.min(xx) - pad_w - xmax = np.max(xx) - pad_w - xmin = np.clip(xmin, 0, w - 1) - xmax = np.clip(xmax, 0, w - 1) - yy = np.random.choice(h_axis, size=2) - ymin = np.min(yy) - pad_h - ymax = np.max(yy) - pad_h - ymin = np.clip(ymin, 0, h - 1) - ymax = np.clip(ymax, 0, h - 1) - if xmax - xmin < self.min_crop_size or \ - ymax - ymin < self.min_crop_size: - continue - if polys.shape[0] != 0: - poly_axis_in_area = (polys[:, :, 0] >= xmin) & (polys[:, :, 0] <= xmax) \ - & (polys[:, :, 1] >= ymin) & (polys[:, :, 1] <= ymax) - selected_polys = np.where( - np.sum(poly_axis_in_area, axis=1) == 4)[0] - else: - selected_polys = [] - if len(selected_polys) == 0: - # no text in this area - if crop_background: - txts_tmp = [] - for selected_poly in selected_polys: - txts_tmp.append(txts[selected_poly]) - txts = txts_tmp - return im[ymin: ymax + 1, xmin: xmax + 1, :], \ - polys[selected_polys], tags[selected_polys], hv_tags[selected_polys], txts - else: - continue - im = im[ymin:ymax + 1, xmin:xmax + 1, :] - polys = polys[selected_polys] - tags = tags[selected_polys] - hv_tags = hv_tags[selected_polys] - txts_tmp = [] - for selected_poly in selected_polys: - txts_tmp.append(txts[selected_poly]) - txts = txts_tmp - polys[:, :, 0] -= xmin - polys[:, :, 1] -= ymin - return im, polys, tags, hv_tags, txts - - return im, polys, tags, hv_tags, txts - - def fit_and_gather_tcl_points_v2(self, - min_area_quad, - poly, - max_h, - max_w, - fixed_point_num=64, - img_id=0, - reference_height=3): - """ - Find the center point of poly as key_points, then fit and gather. - """ - key_point_xys = [] - point_num = poly.shape[0] - for idx in range(point_num // 2): - center_point = (poly[idx] + poly[point_num - 1 - idx]) / 2.0 - key_point_xys.append(center_point) - - tmp_image = np.zeros( - shape=( - max_h, - max_w, ), dtype='float32') - cv2.polylines(tmp_image, [np.array(key_point_xys).astype('int32')], - False, 1.0) - ys, xs = np.where(tmp_image > 0) - xy_text = np.array(list(zip(xs, ys)), dtype='float32') - - left_center_pt = ( - (min_area_quad[0] - min_area_quad[1]) / 2.0).reshape(1, 2) - right_center_pt = ( - (min_area_quad[1] - min_area_quad[2]) / 2.0).reshape(1, 2) - proj_unit_vec = (right_center_pt - left_center_pt) / ( - np.linalg.norm(right_center_pt - left_center_pt) + 1e-6) - proj_unit_vec_tile = np.tile(proj_unit_vec, - (xy_text.shape[0], 1)) # (n, 2) - left_center_pt_tile = np.tile(left_center_pt, - (xy_text.shape[0], 1)) # (n, 2) - xy_text_to_left_center = xy_text - left_center_pt_tile - proj_value = np.sum(xy_text_to_left_center * proj_unit_vec_tile, axis=1) - xy_text = xy_text[np.argsort(proj_value)] - - # convert to np and keep the num of point not greater then fixed_point_num - pos_info = np.array(xy_text).reshape(-1, 2)[:, ::-1] # xy-> yx - point_num = len(pos_info) - if point_num > fixed_point_num: - keep_ids = [ - int((point_num * 1.0 / fixed_point_num) * x) - for x in range(fixed_point_num) - ] - pos_info = pos_info[keep_ids, :] - - keep = int(min(len(pos_info), fixed_point_num)) - if np.random.rand() < 0.2 and reference_height >= 3: - dl = (np.random.rand(keep) - 0.5) * reference_height * 0.3 - random_float = np.array([1, 0]).reshape([1, 2]) * dl.reshape( - [keep, 1]) - pos_info += random_float - pos_info[:, 0] = np.clip(pos_info[:, 0], 0, max_h - 1) - pos_info[:, 1] = np.clip(pos_info[:, 1], 0, max_w - 1) - - # padding to fixed length - pos_l = np.zeros((self.tcl_len, 3), dtype=np.int32) - pos_l[:, 0] = np.ones((self.tcl_len, )) * img_id - pos_m = np.zeros((self.tcl_len, 1), dtype=np.float32) - pos_l[:keep, 1:] = np.round(pos_info).astype(np.int32) - pos_m[:keep] = 1.0 - return pos_l, pos_m - - def generate_direction_map(self, poly_quads, n_char, direction_map): - """ - """ - width_list = [] - height_list = [] - for quad in poly_quads: - quad_w = (np.linalg.norm(quad[0] - quad[1]) + - np.linalg.norm(quad[2] - quad[3])) / 2.0 - quad_h = (np.linalg.norm(quad[0] - quad[3]) + - np.linalg.norm(quad[2] - quad[1])) / 2.0 - width_list.append(quad_w) - height_list.append(quad_h) - norm_width = max(sum(width_list) / n_char, 1.0) - average_height = max(sum(height_list) / len(height_list), 1.0) - k = 1 - for quad in poly_quads: - direct_vector_full = ( - (quad[1] + quad[2]) - (quad[0] + quad[3])) / 2.0 - direct_vector = direct_vector_full / ( - np.linalg.norm(direct_vector_full) + 1e-6) * norm_width - direction_label = tuple( - map(float, - [direct_vector[0], direct_vector[1], 1.0 / average_height])) - cv2.fillPoly(direction_map, - quad.round().astype(np.int32)[np.newaxis, :, :], - direction_label) - k += 1 - return direction_map - - def calculate_average_height(self, poly_quads): - """ - """ - height_list = [] - for quad in poly_quads: - quad_h = (np.linalg.norm(quad[0] - quad[3]) + - np.linalg.norm(quad[2] - quad[1])) / 2.0 - height_list.append(quad_h) - average_height = max(sum(height_list) / len(height_list), 1.0) - return average_height - - def generate_tcl_ctc_label(self, - h, - w, - polys, - tags, - text_strs, - ds_ratio, - tcl_ratio=0.3, - shrink_ratio_of_width=0.15): - """ - Generate polygon. - """ - score_map_big = np.zeros( - ( - h, - w, ), dtype=np.float32) - h, w = int(h * ds_ratio), int(w * ds_ratio) - polys = polys * ds_ratio - - score_map = np.zeros( - ( - h, - w, ), dtype=np.float32) - score_label_map = np.zeros( - ( - h, - w, ), dtype=np.float32) - tbo_map = np.zeros((h, w, 5), dtype=np.float32) - training_mask = np.ones( - ( - h, - w, ), dtype=np.float32) - direction_map = np.ones((h, w, 3)) * np.array([0, 0, 1]).reshape( - [1, 1, 3]).astype(np.float32) - - label_idx = 0 - score_label_map_text_label_list = [] - pos_list, pos_mask, label_list = [], [], [] - for poly_idx, poly_tag in enumerate(zip(polys, tags)): - poly = poly_tag[0] - tag = poly_tag[1] - - # generate min_area_quad - min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly) - min_area_quad_h = 0.5 * ( - np.linalg.norm(min_area_quad[0] - min_area_quad[3]) + - np.linalg.norm(min_area_quad[1] - min_area_quad[2])) - min_area_quad_w = 0.5 * ( - np.linalg.norm(min_area_quad[0] - min_area_quad[1]) + - np.linalg.norm(min_area_quad[2] - min_area_quad[3])) - - if min(min_area_quad_h, min_area_quad_w) < self.min_text_size * ds_ratio \ - or min(min_area_quad_h, min_area_quad_w) > self.max_text_size * ds_ratio: - continue - - if tag: - cv2.fillPoly(training_mask, - poly.astype(np.int32)[np.newaxis, :, :], 0.15) - else: - text_label = text_strs[poly_idx] - text_label = self.prepare_text_label(text_label, - self.Lexicon_Table) - - text_label_index_list = [[self.Lexicon_Table.index(c_)] - for c_ in text_label - if c_ in self.Lexicon_Table] - if len(text_label_index_list) < 1: - continue - - tcl_poly = self.poly2tcl(poly, tcl_ratio) - tcl_quads = self.poly2quads(tcl_poly) - poly_quads = self.poly2quads(poly) - - stcl_quads, quad_index = self.shrink_poly_along_width( - tcl_quads, - shrink_ratio_of_width=shrink_ratio_of_width, - expand_height_ratio=1.0 / tcl_ratio) - - cv2.fillPoly(score_map, - np.round(stcl_quads).astype(np.int32), 1.0) - cv2.fillPoly(score_map_big, - np.round(stcl_quads / ds_ratio).astype(np.int32), - 1.0) - - for idx, quad in enumerate(stcl_quads): - quad_mask = np.zeros((h, w), dtype=np.float32) - quad_mask = cv2.fillPoly( - quad_mask, - np.round(quad[np.newaxis, :, :]).astype(np.int32), 1.0) - tbo_map = self.gen_quad_tbo(poly_quads[quad_index[idx]], - quad_mask, tbo_map) - - # score label map and score_label_map_text_label_list for refine - if label_idx == 0: - text_pos_list_ = [[len(self.Lexicon_Table)], ] - score_label_map_text_label_list.append(text_pos_list_) - - label_idx += 1 - cv2.fillPoly(score_label_map, - np.round(poly_quads).astype(np.int32), label_idx) - score_label_map_text_label_list.append(text_label_index_list) - - # direction info, fix-me - n_char = len(text_label_index_list) - direction_map = self.generate_direction_map(poly_quads, n_char, - direction_map) - - # pos info - average_shrink_height = self.calculate_average_height( - stcl_quads) - pos_l, pos_m = self.fit_and_gather_tcl_points_v2( - min_area_quad, - poly, - max_h=h, - max_w=w, - fixed_point_num=64, - img_id=self.img_id, - reference_height=average_shrink_height) - - label_l = text_label_index_list - if len(text_label_index_list) < 2: - continue - - pos_list.append(pos_l) - pos_mask.append(pos_m) - label_list.append(label_l) - - # use big score_map for smooth tcl lines - score_map_big_resized = cv2.resize( - score_map_big, dsize=None, fx=ds_ratio, fy=ds_ratio) - score_map = np.array(score_map_big_resized > 1e-3, dtype='float32') - - return score_map, score_label_map, tbo_map, direction_map, training_mask, \ - pos_list, pos_mask, label_list, score_label_map_text_label_list - - def adjust_point(self, poly): - """ - adjust point order. - """ - point_num = poly.shape[0] - if point_num == 4: - len_1 = np.linalg.norm(poly[0] - poly[1]) - len_2 = np.linalg.norm(poly[1] - poly[2]) - len_3 = np.linalg.norm(poly[2] - poly[3]) - len_4 = np.linalg.norm(poly[3] - poly[0]) - - if (len_1 + len_3) * 1.5 < (len_2 + len_4): - poly = poly[[1, 2, 3, 0], :] - - elif point_num > 4: - vector_1 = poly[0] - poly[1] - vector_2 = poly[1] - poly[2] - cos_theta = np.dot(vector_1, vector_2) / ( - np.linalg.norm(vector_1) * np.linalg.norm(vector_2) + 1e-6) - theta = np.arccos(np.round(cos_theta, decimals=4)) - - if abs(theta) > (70 / 180 * math.pi): - index = list(range(1, point_num)) + [0] - poly = poly[np.array(index), :] - return poly - - def gen_min_area_quad_from_poly(self, poly): - """ - Generate min area quad from poly. - """ - point_num = poly.shape[0] - min_area_quad = np.zeros((4, 2), dtype=np.float32) - if point_num == 4: - min_area_quad = poly - center_point = np.sum(poly, axis=0) / 4 - else: - rect = cv2.minAreaRect(poly.astype( - np.int32)) # (center (x,y), (width, height), angle of rotation) - center_point = rect[0] - box = np.array(cv2.boxPoints(rect)) - - first_point_idx = 0 - min_dist = 1e4 - for i in range(4): - dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \ - np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \ - np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \ - np.linalg.norm(box[(i + 3) % 4] - poly[-1]) - if dist < min_dist: - min_dist = dist - first_point_idx = i - - for i in range(4): - min_area_quad[i] = box[(first_point_idx + i) % 4] - - return min_area_quad, center_point - - def shrink_quad_along_width(self, - quad, - begin_width_ratio=0., - end_width_ratio=1.): - """ - Generate shrink_quad_along_width. - """ - ratio_pair = np.array( - [[begin_width_ratio], [end_width_ratio]], dtype=np.float32) - p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair - p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair - return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]]) - - def shrink_poly_along_width(self, - quads, - shrink_ratio_of_width, - expand_height_ratio=1.0): - """ - shrink poly with given length. - """ - upper_edge_list = [] - - def get_cut_info(edge_len_list, cut_len): - for idx, edge_len in enumerate(edge_len_list): - cut_len -= edge_len - if cut_len <= 0.000001: - ratio = (cut_len + edge_len_list[idx]) / edge_len_list[idx] - return idx, ratio - - for quad in quads: - upper_edge_len = np.linalg.norm(quad[0] - quad[1]) - upper_edge_list.append(upper_edge_len) - - # length of left edge and right edge. - left_length = np.linalg.norm(quads[0][0] - quads[0][ - 3]) * expand_height_ratio - right_length = np.linalg.norm(quads[-1][1] - quads[-1][ - 2]) * expand_height_ratio - - shrink_length = min(left_length, right_length, - sum(upper_edge_list)) * shrink_ratio_of_width - # shrinking length - upper_len_left = shrink_length - upper_len_right = sum(upper_edge_list) - shrink_length - - left_idx, left_ratio = get_cut_info(upper_edge_list, upper_len_left) - left_quad = self.shrink_quad_along_width( - quads[left_idx], begin_width_ratio=left_ratio, end_width_ratio=1) - right_idx, right_ratio = get_cut_info(upper_edge_list, upper_len_right) - right_quad = self.shrink_quad_along_width( - quads[right_idx], begin_width_ratio=0, end_width_ratio=right_ratio) - - out_quad_list = [] - if left_idx == right_idx: - out_quad_list.append( - [left_quad[0], right_quad[1], right_quad[2], left_quad[3]]) - else: - out_quad_list.append(left_quad) - for idx in range(left_idx + 1, right_idx): - out_quad_list.append(quads[idx]) - out_quad_list.append(right_quad) - - return np.array(out_quad_list), list(range(left_idx, right_idx + 1)) - - def prepare_text_label(self, label_str, Lexicon_Table): - """ - Prepare text lablel by given Lexicon_Table. - """ - if len(Lexicon_Table) == 36: - return label_str.lower() - else: - return label_str - - def vector_angle(self, A, B): - """ - Calculate the angle between vector AB and x-axis positive direction. - """ - AB = np.array([B[1] - A[1], B[0] - A[0]]) - return np.arctan2(*AB) - - def theta_line_cross_point(self, theta, point): - """ - Calculate the line through given point and angle in ax + by + c =0 form. - """ - x, y = point - cos = np.cos(theta) - sin = np.sin(theta) - return [sin, -cos, cos * y - sin * x] - - def line_cross_two_point(self, A, B): - """ - Calculate the line through given point A and B in ax + by + c =0 form. - """ - angle = self.vector_angle(A, B) - return self.theta_line_cross_point(angle, A) - - def average_angle(self, poly): - """ - Calculate the average angle between left and right edge in given poly. - """ - p0, p1, p2, p3 = poly - angle30 = self.vector_angle(p3, p0) - angle21 = self.vector_angle(p2, p1) - return (angle30 + angle21) / 2 - - def line_cross_point(self, line1, line2): - """ - line1 and line2 in 0=ax+by+c form, compute the cross point of line1 and line2 - """ - a1, b1, c1 = line1 - a2, b2, c2 = line2 - d = a1 * b2 - a2 * b1 - - if d == 0: - print('Cross point does not exist') - return np.array([0, 0], dtype=np.float32) - else: - x = (b1 * c2 - b2 * c1) / d - y = (a2 * c1 - a1 * c2) / d - - return np.array([x, y], dtype=np.float32) - - def quad2tcl(self, poly, ratio): - """ - Generate center line by poly clock-wise point. (4, 2) - """ - ratio_pair = np.array( - [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32) - p0_3 = poly[0] + (poly[3] - poly[0]) * ratio_pair - p1_2 = poly[1] + (poly[2] - poly[1]) * ratio_pair - return np.array([p0_3[0], p1_2[0], p1_2[1], p0_3[1]]) - - def poly2tcl(self, poly, ratio): - """ - Generate center line by poly clock-wise point. - """ - ratio_pair = np.array( - [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32) - tcl_poly = np.zeros_like(poly) - point_num = poly.shape[0] - - for idx in range(point_num // 2): - point_pair = poly[idx] + (poly[point_num - 1 - idx] - poly[idx] - ) * ratio_pair - tcl_poly[idx] = point_pair[0] - tcl_poly[point_num - 1 - idx] = point_pair[1] - return tcl_poly - - def gen_quad_tbo(self, quad, tcl_mask, tbo_map): - """ - Generate tbo_map for give quad. - """ - # upper and lower line function: ax + by + c = 0; - up_line = self.line_cross_two_point(quad[0], quad[1]) - lower_line = self.line_cross_two_point(quad[3], quad[2]) - - quad_h = 0.5 * (np.linalg.norm(quad[0] - quad[3]) + - np.linalg.norm(quad[1] - quad[2])) - quad_w = 0.5 * (np.linalg.norm(quad[0] - quad[1]) + - np.linalg.norm(quad[2] - quad[3])) - - # average angle of left and right line. - angle = self.average_angle(quad) - - xy_in_poly = np.argwhere(tcl_mask == 1) - for y, x in xy_in_poly: - point = (x, y) - line = self.theta_line_cross_point(angle, point) - cross_point_upper = self.line_cross_point(up_line, line) - cross_point_lower = self.line_cross_point(lower_line, line) - ##FIX, offset reverse - upper_offset_x, upper_offset_y = cross_point_upper - point - lower_offset_x, lower_offset_y = cross_point_lower - point - tbo_map[y, x, 0] = upper_offset_y - tbo_map[y, x, 1] = upper_offset_x - tbo_map[y, x, 2] = lower_offset_y - tbo_map[y, x, 3] = lower_offset_x - tbo_map[y, x, 4] = 1.0 / max(min(quad_h, quad_w), 1.0) * 2 - return tbo_map - - def poly2quads(self, poly): - """ - Split poly into quads. - """ - quad_list = [] - point_num = poly.shape[0] - - # point pair - point_pair_list = [] - for idx in range(point_num // 2): - point_pair = [poly[idx], poly[point_num - 1 - idx]] - point_pair_list.append(point_pair) - - quad_num = point_num // 2 - 1 - for idx in range(quad_num): - # reshape and adjust to clock-wise - quad_list.append((np.array(point_pair_list)[[idx, idx + 1]] - ).reshape(4, 2)[[0, 2, 3, 1]]) - - return np.array(quad_list) - - def rotate_im_poly(self, im, text_polys): - """ - rotate image with 90 / 180 / 270 degre - """ - im_w, im_h = im.shape[1], im.shape[0] - dst_im = im.copy() - dst_polys = [] - rand_degree_ratio = np.random.rand() - rand_degree_cnt = 1 - if rand_degree_ratio > 0.5: - rand_degree_cnt = 3 - for i in range(rand_degree_cnt): - dst_im = np.rot90(dst_im) - rot_degree = -90 * rand_degree_cnt - rot_angle = rot_degree * math.pi / 180.0 - n_poly = text_polys.shape[0] - cx, cy = 0.5 * im_w, 0.5 * im_h - ncx, ncy = 0.5 * dst_im.shape[1], 0.5 * dst_im.shape[0] - for i in range(n_poly): - wordBB = text_polys[i] - poly = [] - for j in range(4): # 16->4 - sx, sy = wordBB[j][0], wordBB[j][1] - dx = math.cos(rot_angle) * (sx - cx) - math.sin(rot_angle) * ( - sy - cy) + ncx - dy = math.sin(rot_angle) * (sx - cx) + math.cos(rot_angle) * ( - sy - cy) + ncy - poly.append([dx, dy]) - dst_polys.append(poly) - return dst_im, np.array(dst_polys, dtype=np.float32) - - def __call__(self, data): - input_size = 512 - im = data['image'] - text_polys = data['polys'] - text_tags = data['ignore_tags'] - text_strs = data['texts'] - h, w, _ = im.shape - text_polys, text_tags, hv_tags = self.check_and_validate_polys( - text_polys, text_tags, (h, w)) - if text_polys.shape[0] <= 0: - return None - # set aspect ratio and keep area fix - asp_scales = np.arange(1.0, 1.55, 0.1) - asp_scale = np.random.choice(asp_scales) - if np.random.rand() < 0.5: - asp_scale = 1.0 / asp_scale - asp_scale = math.sqrt(asp_scale) - - asp_wx = asp_scale - asp_hy = 1.0 / asp_scale - im = cv2.resize(im, dsize=None, fx=asp_wx, fy=asp_hy) - text_polys[:, :, 0] *= asp_wx - text_polys[:, :, 1] *= asp_hy - - h, w, _ = im.shape - if max(h, w) > 2048: - rd_scale = 2048.0 / max(h, w) - im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale) - text_polys *= rd_scale - h, w, _ = im.shape - if min(h, w) < 16: - return None - - # no background - im, text_polys, text_tags, hv_tags, text_strs = self.crop_area( - im, - text_polys, - text_tags, - hv_tags, - text_strs, - crop_background=False) - - if text_polys.shape[0] == 0: - return None - # # continue for all ignore case - if np.sum((text_tags * 1.0)) >= text_tags.size: - return None - new_h, new_w, _ = im.shape - if (new_h is None) or (new_w is None): - return None - # resize image - std_ratio = float(input_size) / max(new_w, new_h) - rand_scales = np.array( - [0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0, 1.0, 1.0, 1.0, 1.0]) - rz_scale = std_ratio * np.random.choice(rand_scales) - im = cv2.resize(im, dsize=None, fx=rz_scale, fy=rz_scale) - text_polys[:, :, 0] *= rz_scale - text_polys[:, :, 1] *= rz_scale - - # add gaussian blur - if np.random.rand() < 0.1 * 0.5: - ks = np.random.permutation(5)[0] + 1 - ks = int(ks / 2) * 2 + 1 - im = cv2.GaussianBlur(im, ksize=(ks, ks), sigmaX=0, sigmaY=0) - # add brighter - if np.random.rand() < 0.1 * 0.5: - im = im * (1.0 + np.random.rand() * 0.5) - im = np.clip(im, 0.0, 255.0) - # add darker - if np.random.rand() < 0.1 * 0.5: - im = im * (1.0 - np.random.rand() * 0.5) - im = np.clip(im, 0.0, 255.0) - - # Padding the im to [input_size, input_size] - new_h, new_w, _ = im.shape - if min(new_w, new_h) < input_size * 0.5: - return None - im_padded = np.ones((input_size, input_size, 3), dtype=np.float32) - im_padded[:, :, 2] = 0.485 * 255 - im_padded[:, :, 1] = 0.456 * 255 - im_padded[:, :, 0] = 0.406 * 255 - - # Random the start position - del_h = input_size - new_h - del_w = input_size - new_w - sh, sw = 0, 0 - if del_h > 1: - sh = int(np.random.rand() * del_h) - if del_w > 1: - sw = int(np.random.rand() * del_w) - - # Padding - im_padded[sh:sh + new_h, sw:sw + new_w, :] = im.copy() - text_polys[:, :, 0] += sw - text_polys[:, :, 1] += sh - - score_map, score_label_map, border_map, direction_map, training_mask, \ - pos_list, pos_mask, label_list, score_label_map_text_label = self.generate_tcl_ctc_label(input_size, - input_size, - text_polys, - text_tags, - text_strs, 0.25) - if len(label_list) <= 0: # eliminate negative samples - return None - pos_list_temp = np.zeros([64, 3]) - pos_mask_temp = np.zeros([64, 1]) - label_list_temp = np.zeros([self.max_text_length, 1]) + self.pad_num - - for i, label in enumerate(label_list): - n = len(label) - if n > self.max_text_length: - label_list[i] = label[:self.max_text_length] - continue - while n < self.max_text_length: - label.append([self.pad_num]) - n += 1 - - for i in range(len(label_list)): - label_list[i] = np.array(label_list[i]) - - if len(pos_list) <= 0 or len(pos_list) > self.max_text_nums: - return None - for __ in range(self.max_text_nums - len(pos_list), 0, -1): - pos_list.append(pos_list_temp) - pos_mask.append(pos_mask_temp) - label_list.append(label_list_temp) - - if self.img_id == self.batch_size - 1: - self.img_id = 0 - else: - self.img_id += 1 - - im_padded[:, :, 2] -= 0.485 * 255 - im_padded[:, :, 1] -= 0.456 * 255 - im_padded[:, :, 0] -= 0.406 * 255 - im_padded[:, :, 2] /= (255.0 * 0.229) - im_padded[:, :, 1] /= (255.0 * 0.224) - im_padded[:, :, 0] /= (255.0 * 0.225) - im_padded = im_padded.transpose((2, 0, 1)) - images = im_padded[::-1, :, :] - tcl_maps = score_map[np.newaxis, :, :] - tcl_label_maps = score_label_map[np.newaxis, :, :] - border_maps = border_map.transpose((2, 0, 1)) - direction_maps = direction_map.transpose((2, 0, 1)) - training_masks = training_mask[np.newaxis, :, :] - pos_list = np.array(pos_list) - pos_mask = np.array(pos_mask) - label_list = np.array(label_list) - data['images'] = images - data['tcl_maps'] = tcl_maps - data['tcl_label_maps'] = tcl_label_maps - data['border_maps'] = border_maps - data['direction_maps'] = direction_maps - data['training_masks'] = training_masks - data['label_list'] = label_list - data['pos_list'] = pos_list - data['pos_mask'] = pos_mask - return data diff --git a/backend/ppocr/data/imaug/randaugment.py b/backend/ppocr/data/imaug/randaugment.py deleted file mode 100644 index 56f114d2..00000000 --- a/backend/ppocr/data/imaug/randaugment.py +++ /dev/null @@ -1,143 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from PIL import Image, ImageEnhance, ImageOps -import numpy as np -import random -import six - - -class RawRandAugment(object): - def __init__(self, - num_layers=2, - magnitude=5, - fillcolor=(128, 128, 128), - **kwargs): - self.num_layers = num_layers - self.magnitude = magnitude - self.max_level = 10 - - abso_level = self.magnitude / self.max_level - self.level_map = { - "shearX": 0.3 * abso_level, - "shearY": 0.3 * abso_level, - "translateX": 150.0 / 331 * abso_level, - "translateY": 150.0 / 331 * abso_level, - "rotate": 30 * abso_level, - "color": 0.9 * abso_level, - "posterize": int(4.0 * abso_level), - "solarize": 256.0 * abso_level, - "contrast": 0.9 * abso_level, - "sharpness": 0.9 * abso_level, - "brightness": 0.9 * abso_level, - "autocontrast": 0, - "equalize": 0, - "invert": 0 - } - - # from https://stackoverflow.com/questions/5252170/ - # specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand - def rotate_with_fill(img, magnitude): - rot = img.convert("RGBA").rotate(magnitude) - return Image.composite(rot, - Image.new("RGBA", rot.size, (128, ) * 4), - rot).convert(img.mode) - - rnd_ch_op = random.choice - - self.func = { - "shearX": lambda img, magnitude: img.transform( - img.size, - Image.AFFINE, - (1, magnitude * rnd_ch_op([-1, 1]), 0, 0, 1, 0), - Image.BICUBIC, - fillcolor=fillcolor), - "shearY": lambda img, magnitude: img.transform( - img.size, - Image.AFFINE, - (1, 0, 0, magnitude * rnd_ch_op([-1, 1]), 1, 0), - Image.BICUBIC, - fillcolor=fillcolor), - "translateX": lambda img, magnitude: img.transform( - img.size, - Image.AFFINE, - (1, 0, magnitude * img.size[0] * rnd_ch_op([-1, 1]), 0, 1, 0), - fillcolor=fillcolor), - "translateY": lambda img, magnitude: img.transform( - img.size, - Image.AFFINE, - (1, 0, 0, 0, 1, magnitude * img.size[1] * rnd_ch_op([-1, 1])), - fillcolor=fillcolor), - "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude), - "color": lambda img, magnitude: ImageEnhance.Color(img).enhance( - 1 + magnitude * rnd_ch_op([-1, 1])), - "posterize": lambda img, magnitude: - ImageOps.posterize(img, magnitude), - "solarize": lambda img, magnitude: - ImageOps.solarize(img, magnitude), - "contrast": lambda img, magnitude: - ImageEnhance.Contrast(img).enhance( - 1 + magnitude * rnd_ch_op([-1, 1])), - "sharpness": lambda img, magnitude: - ImageEnhance.Sharpness(img).enhance( - 1 + magnitude * rnd_ch_op([-1, 1])), - "brightness": lambda img, magnitude: - ImageEnhance.Brightness(img).enhance( - 1 + magnitude * rnd_ch_op([-1, 1])), - "autocontrast": lambda img, magnitude: - ImageOps.autocontrast(img), - "equalize": lambda img, magnitude: ImageOps.equalize(img), - "invert": lambda img, magnitude: ImageOps.invert(img) - } - - def __call__(self, img): - avaiable_op_names = list(self.level_map.keys()) - for layer_num in range(self.num_layers): - op_name = np.random.choice(avaiable_op_names) - img = self.func[op_name](img, self.level_map[op_name]) - return img - - -class RandAugment(RawRandAugment): - """ RandAugment wrapper to auto fit different img types """ - - def __init__(self, prob=0.5, *args, **kwargs): - self.prob = prob - if six.PY2: - super(RandAugment, self).__init__(*args, **kwargs) - else: - super().__init__(*args, **kwargs) - - def __call__(self, data): - if np.random.rand() > self.prob: - return data - img = data['image'] - if not isinstance(img, Image.Image): - img = np.ascontiguousarray(img) - img = Image.fromarray(img) - - if six.PY2: - img = super(RandAugment, self).__call__(img) - else: - img = super().__call__(img) - - if isinstance(img, Image.Image): - img = np.asarray(img) - data['image'] = img - return data diff --git a/backend/ppocr/data/imaug/random_crop_data.py b/backend/ppocr/data/imaug/random_crop_data.py deleted file mode 100644 index 64aa110d..00000000 --- a/backend/ppocr/data/imaug/random_crop_data.py +++ /dev/null @@ -1,234 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/random_crop_data.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np -import cv2 -import random - - -def is_poly_in_rect(poly, x, y, w, h): - poly = np.array(poly) - if poly[:, 0].min() < x or poly[:, 0].max() > x + w: - return False - if poly[:, 1].min() < y or poly[:, 1].max() > y + h: - return False - return True - - -def is_poly_outside_rect(poly, x, y, w, h): - poly = np.array(poly) - if poly[:, 0].max() < x or poly[:, 0].min() > x + w: - return True - if poly[:, 1].max() < y or poly[:, 1].min() > y + h: - return True - return False - - -def split_regions(axis): - regions = [] - min_axis = 0 - for i in range(1, axis.shape[0]): - if axis[i] != axis[i - 1] + 1: - region = axis[min_axis:i] - min_axis = i - regions.append(region) - return regions - - -def random_select(axis, max_size): - xx = np.random.choice(axis, size=2) - xmin = np.min(xx) - xmax = np.max(xx) - xmin = np.clip(xmin, 0, max_size - 1) - xmax = np.clip(xmax, 0, max_size - 1) - return xmin, xmax - - -def region_wise_random_select(regions, max_size): - selected_index = list(np.random.choice(len(regions), 2)) - selected_values = [] - for index in selected_index: - axis = regions[index] - xx = int(np.random.choice(axis, size=1)) - selected_values.append(xx) - xmin = min(selected_values) - xmax = max(selected_values) - return xmin, xmax - - -def crop_area(im, text_polys, min_crop_side_ratio, max_tries): - h, w, _ = im.shape - h_array = np.zeros(h, dtype=np.int32) - w_array = np.zeros(w, dtype=np.int32) - for points in text_polys: - points = np.round(points, decimals=0).astype(np.int32) - minx = np.min(points[:, 0]) - maxx = np.max(points[:, 0]) - w_array[minx:maxx] = 1 - miny = np.min(points[:, 1]) - maxy = np.max(points[:, 1]) - h_array[miny:maxy] = 1 - # ensure the cropped area not across a text - h_axis = np.where(h_array == 0)[0] - w_axis = np.where(w_array == 0)[0] - - if len(h_axis) == 0 or len(w_axis) == 0: - return 0, 0, w, h - - h_regions = split_regions(h_axis) - w_regions = split_regions(w_axis) - - for i in range(max_tries): - if len(w_regions) > 1: - xmin, xmax = region_wise_random_select(w_regions, w) - else: - xmin, xmax = random_select(w_axis, w) - if len(h_regions) > 1: - ymin, ymax = region_wise_random_select(h_regions, h) - else: - ymin, ymax = random_select(h_axis, h) - - if xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h: - # area too small - continue - num_poly_in_rect = 0 - for poly in text_polys: - if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin, - ymax - ymin): - num_poly_in_rect += 1 - break - - if num_poly_in_rect > 0: - return xmin, ymin, xmax - xmin, ymax - ymin - - return 0, 0, w, h - - -class EastRandomCropData(object): - def __init__(self, - size=(640, 640), - max_tries=10, - min_crop_side_ratio=0.1, - keep_ratio=True, - **kwargs): - self.size = size - self.max_tries = max_tries - self.min_crop_side_ratio = min_crop_side_ratio - self.keep_ratio = keep_ratio - - def __call__(self, data): - img = data['image'] - text_polys = data['polys'] - ignore_tags = data['ignore_tags'] - texts = data['texts'] - all_care_polys = [ - text_polys[i] for i, tag in enumerate(ignore_tags) if not tag - ] - # 计算crop区域 - crop_x, crop_y, crop_w, crop_h = crop_area( - img, all_care_polys, self.min_crop_side_ratio, self.max_tries) - # crop 图片 保持比例填充 - scale_w = self.size[0] / crop_w - scale_h = self.size[1] / crop_h - scale = min(scale_w, scale_h) - h = int(crop_h * scale) - w = int(crop_w * scale) - if self.keep_ratio: - padimg = np.zeros((self.size[1], self.size[0], img.shape[2]), - img.dtype) - padimg[:h, :w] = cv2.resize( - img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h)) - img = padimg - else: - img = cv2.resize( - img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], - tuple(self.size)) - # crop 文本框 - text_polys_crop = [] - ignore_tags_crop = [] - texts_crop = [] - for poly, text, tag in zip(text_polys, texts, ignore_tags): - poly = ((poly - (crop_x, crop_y)) * scale).tolist() - if not is_poly_outside_rect(poly, 0, 0, w, h): - text_polys_crop.append(poly) - ignore_tags_crop.append(tag) - texts_crop.append(text) - data['image'] = img - data['polys'] = np.array(text_polys_crop) - data['ignore_tags'] = ignore_tags_crop - data['texts'] = texts_crop - return data - - -class RandomCropImgMask(object): - def __init__(self, size, main_key, crop_keys, p=3 / 8, **kwargs): - self.size = size - self.main_key = main_key - self.crop_keys = crop_keys - self.p = p - - def __call__(self, data): - image = data['image'] - - h, w = image.shape[0:2] - th, tw = self.size - if w == tw and h == th: - return data - - mask = data[self.main_key] - if np.max(mask) > 0 and random.random() > self.p: - # make sure to crop the text region - tl = np.min(np.where(mask > 0), axis=1) - (th, tw) - tl[tl < 0] = 0 - br = np.max(np.where(mask > 0), axis=1) - (th, tw) - br[br < 0] = 0 - - br[0] = min(br[0], h - th) - br[1] = min(br[1], w - tw) - - i = random.randint(tl[0], br[0]) if tl[0] < br[0] else 0 - j = random.randint(tl[1], br[1]) if tl[1] < br[1] else 0 - else: - i = random.randint(0, h - th) if h - th > 0 else 0 - j = random.randint(0, w - tw) if w - tw > 0 else 0 - - # return i, j, th, tw - for k in data: - if k in self.crop_keys: - if len(data[k].shape) == 3: - if np.argmin(data[k].shape) == 0: - img = data[k][:, i:i + th, j:j + tw] - if img.shape[1] != img.shape[2]: - a = 1 - elif np.argmin(data[k].shape) == 2: - img = data[k][i:i + th, j:j + tw, :] - if img.shape[1] != img.shape[0]: - a = 1 - else: - img = data[k] - else: - img = data[k][i:i + th, j:j + tw] - if img.shape[0] != img.shape[1]: - a = 1 - data[k] = img - return data diff --git a/backend/ppocr/data/imaug/rec_img_aug.py b/backend/ppocr/data/imaug/rec_img_aug.py deleted file mode 100644 index 7483dffe..00000000 --- a/backend/ppocr/data/imaug/rec_img_aug.py +++ /dev/null @@ -1,601 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import cv2 -import numpy as np -import random -import copy -from PIL import Image -from .text_image_aug import tia_perspective, tia_stretch, tia_distort - - -class RecAug(object): - def __init__(self, use_tia=True, aug_prob=0.4, **kwargs): - self.use_tia = use_tia - self.aug_prob = aug_prob - - def __call__(self, data): - img = data['image'] - img = warp(img, 10, self.use_tia, self.aug_prob) - data['image'] = img - return data - - -class RecConAug(object): - def __init__(self, - prob=0.5, - image_shape=(32, 320, 3), - max_text_length=25, - ext_data_num=1, - **kwargs): - self.ext_data_num = ext_data_num - self.prob = prob - self.max_text_length = max_text_length - self.image_shape = image_shape - self.max_wh_ratio = self.image_shape[1] / self.image_shape[0] - - def merge_ext_data(self, data, ext_data): - ori_w = round(data['image'].shape[1] / data['image'].shape[0] * - self.image_shape[0]) - ext_w = round(ext_data['image'].shape[1] / ext_data['image'].shape[0] * - self.image_shape[0]) - data['image'] = cv2.resize(data['image'], (ori_w, self.image_shape[0])) - ext_data['image'] = cv2.resize(ext_data['image'], - (ext_w, self.image_shape[0])) - data['image'] = np.concatenate( - [data['image'], ext_data['image']], axis=1) - data["label"] += ext_data["label"] - return data - - def __call__(self, data): - rnd_num = random.random() - if rnd_num > self.prob: - return data - for idx, ext_data in enumerate(data["ext_data"]): - if len(data["label"]) + len(ext_data[ - "label"]) > self.max_text_length: - break - concat_ratio = data['image'].shape[1] / data['image'].shape[ - 0] + ext_data['image'].shape[1] / ext_data['image'].shape[0] - if concat_ratio > self.max_wh_ratio: - break - data = self.merge_ext_data(data, ext_data) - data.pop("ext_data") - return data - - -class ClsResizeImg(object): - def __init__(self, image_shape, **kwargs): - self.image_shape = image_shape - - def __call__(self, data): - img = data['image'] - norm_img, _ = resize_norm_img(img, self.image_shape) - data['image'] = norm_img - return data - - -class NRTRRecResizeImg(object): - def __init__(self, image_shape, resize_type, padding=False, **kwargs): - self.image_shape = image_shape - self.resize_type = resize_type - self.padding = padding - - def __call__(self, data): - img = data['image'] - img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - image_shape = self.image_shape - if self.padding: - imgC, imgH, imgW = image_shape - # todo: change to 0 and modified image shape - h = img.shape[0] - w = img.shape[1] - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - norm_img = np.expand_dims(resized_image, -1) - norm_img = norm_img.transpose((2, 0, 1)) - resized_image = norm_img.astype(np.float32) / 128. - 1. - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - data['image'] = padding_im - return data - if self.resize_type == 'PIL': - image_pil = Image.fromarray(np.uint8(img)) - img = image_pil.resize(self.image_shape, Image.ANTIALIAS) - img = np.array(img) - if self.resize_type == 'OpenCV': - img = cv2.resize(img, self.image_shape) - norm_img = np.expand_dims(img, -1) - norm_img = norm_img.transpose((2, 0, 1)) - data['image'] = norm_img.astype(np.float32) / 128. - 1. - return data - - -class RecResizeImg(object): - def __init__(self, - image_shape, - infer_mode=False, - character_dict_path='./ppocr/utils/ppocr_keys_v1.txt', - padding=True, - **kwargs): - self.image_shape = image_shape - self.infer_mode = infer_mode - self.character_dict_path = character_dict_path - self.padding = padding - - def __call__(self, data): - img = data['image'] - if self.infer_mode and self.character_dict_path is not None: - norm_img, valid_ratio = resize_norm_img_chinese(img, - self.image_shape) - else: - norm_img, valid_ratio = resize_norm_img(img, self.image_shape, - self.padding) - data['image'] = norm_img - data['valid_ratio'] = valid_ratio - return data - - -class SRNRecResizeImg(object): - def __init__(self, image_shape, num_heads, max_text_length, **kwargs): - self.image_shape = image_shape - self.num_heads = num_heads - self.max_text_length = max_text_length - - def __call__(self, data): - img = data['image'] - norm_img = resize_norm_img_srn(img, self.image_shape) - data['image'] = norm_img - [encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2] = \ - srn_other_inputs(self.image_shape, self.num_heads, self.max_text_length) - - data['encoder_word_pos'] = encoder_word_pos - data['gsrm_word_pos'] = gsrm_word_pos - data['gsrm_slf_attn_bias1'] = gsrm_slf_attn_bias1 - data['gsrm_slf_attn_bias2'] = gsrm_slf_attn_bias2 - return data - - -class SARRecResizeImg(object): - def __init__(self, image_shape, width_downsample_ratio=0.25, **kwargs): - self.image_shape = image_shape - self.width_downsample_ratio = width_downsample_ratio - - def __call__(self, data): - img = data['image'] - norm_img, resize_shape, pad_shape, valid_ratio = resize_norm_img_sar( - img, self.image_shape, self.width_downsample_ratio) - data['image'] = norm_img - data['resized_shape'] = resize_shape - data['pad_shape'] = pad_shape - data['valid_ratio'] = valid_ratio - return data - - -class PRENResizeImg(object): - def __init__(self, image_shape, **kwargs): - """ - Accroding to original paper's realization, it's a hard resize method here. - So maybe you should optimize it to fit for your task better. - """ - self.dst_h, self.dst_w = image_shape - - def __call__(self, data): - img = data['image'] - resized_img = cv2.resize( - img, (self.dst_w, self.dst_h), interpolation=cv2.INTER_LINEAR) - resized_img = resized_img.transpose((2, 0, 1)) / 255 - resized_img -= 0.5 - resized_img /= 0.5 - data['image'] = resized_img.astype(np.float32) - return data - - -def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25): - imgC, imgH, imgW_min, imgW_max = image_shape - h = img.shape[0] - w = img.shape[1] - valid_ratio = 1.0 - # make sure new_width is an integral multiple of width_divisor. - width_divisor = int(1 / width_downsample_ratio) - # resize - ratio = w / float(h) - resize_w = math.ceil(imgH * ratio) - if resize_w % width_divisor != 0: - resize_w = round(resize_w / width_divisor) * width_divisor - if imgW_min is not None: - resize_w = max(imgW_min, resize_w) - if imgW_max is not None: - valid_ratio = min(1.0, 1.0 * resize_w / imgW_max) - resize_w = min(imgW_max, resize_w) - resized_image = cv2.resize(img, (resize_w, imgH)) - resized_image = resized_image.astype('float32') - # norm - if image_shape[0] == 1: - resized_image = resized_image / 255 - resized_image = resized_image[np.newaxis, :] - else: - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - resize_shape = resized_image.shape - padding_im = -1.0 * np.ones((imgC, imgH, imgW_max), dtype=np.float32) - padding_im[:, :, 0:resize_w] = resized_image - pad_shape = padding_im.shape - - return padding_im, resize_shape, pad_shape, valid_ratio - - -def resize_norm_img(img, image_shape, padding=True): - imgC, imgH, imgW = image_shape - h = img.shape[0] - w = img.shape[1] - if not padding: - resized_image = cv2.resize( - img, (imgW, imgH), interpolation=cv2.INTER_LINEAR) - resized_w = imgW - else: - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - if image_shape[0] == 1: - resized_image = resized_image / 255 - resized_image = resized_image[np.newaxis, :] - else: - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - valid_ratio = min(1.0, float(resized_w / imgW)) - return padding_im, valid_ratio - - -def resize_norm_img_chinese(img, image_shape): - imgC, imgH, imgW = image_shape - # todo: change to 0 and modified image shape - max_wh_ratio = imgW * 1.0 / imgH - h, w = img.shape[0], img.shape[1] - ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, ratio) - imgW = int(imgH * max_wh_ratio) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - if image_shape[0] == 1: - resized_image = resized_image / 255 - resized_image = resized_image[np.newaxis, :] - else: - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - valid_ratio = min(1.0, float(resized_w / imgW)) - return padding_im, valid_ratio - - -def resize_norm_img_srn(img, image_shape): - imgC, imgH, imgW = image_shape - - img_black = np.zeros((imgH, imgW)) - im_hei = img.shape[0] - im_wid = img.shape[1] - - if im_wid <= im_hei * 1: - img_new = cv2.resize(img, (imgH * 1, imgH)) - elif im_wid <= im_hei * 2: - img_new = cv2.resize(img, (imgH * 2, imgH)) - elif im_wid <= im_hei * 3: - img_new = cv2.resize(img, (imgH * 3, imgH)) - else: - img_new = cv2.resize(img, (imgW, imgH)) - - img_np = np.asarray(img_new) - img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY) - img_black[:, 0:img_np.shape[1]] = img_np - img_black = img_black[:, :, np.newaxis] - - row, col, c = img_black.shape - c = 1 - - return np.reshape(img_black, (c, row, col)).astype(np.float32) - - -def srn_other_inputs(image_shape, num_heads, max_text_length): - - imgC, imgH, imgW = image_shape - feature_dim = int((imgH / 8) * (imgW / 8)) - - encoder_word_pos = np.array(range(0, feature_dim)).reshape( - (feature_dim, 1)).astype('int64') - gsrm_word_pos = np.array(range(0, max_text_length)).reshape( - (max_text_length, 1)).astype('int64') - - gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length)) - gsrm_slf_attn_bias1 = np.triu(gsrm_attn_bias_data, 1).reshape( - [1, max_text_length, max_text_length]) - gsrm_slf_attn_bias1 = np.tile(gsrm_slf_attn_bias1, - [num_heads, 1, 1]) * [-1e9] - - gsrm_slf_attn_bias2 = np.tril(gsrm_attn_bias_data, -1).reshape( - [1, max_text_length, max_text_length]) - gsrm_slf_attn_bias2 = np.tile(gsrm_slf_attn_bias2, - [num_heads, 1, 1]) * [-1e9] - - return [ - encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, - gsrm_slf_attn_bias2 - ] - - -def flag(): - """ - flag - """ - return 1 if random.random() > 0.5000001 else -1 - - -def cvtColor(img): - """ - cvtColor - """ - hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) - delta = 0.001 * random.random() * flag() - hsv[:, :, 2] = hsv[:, :, 2] * (1 + delta) - new_img = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) - return new_img - - -def blur(img): - """ - blur - """ - h, w, _ = img.shape - if h > 10 and w > 10: - return cv2.GaussianBlur(img, (5, 5), 1) - else: - return img - - -def jitter(img): - """ - jitter - """ - w, h, _ = img.shape - if h > 10 and w > 10: - thres = min(w, h) - s = int(random.random() * thres * 0.01) - src_img = img.copy() - for i in range(s): - img[i:, i:, :] = src_img[:w - i, :h - i, :] - return img - else: - return img - - -def add_gasuss_noise(image, mean=0, var=0.1): - """ - Gasuss noise - """ - - noise = np.random.normal(mean, var**0.5, image.shape) - out = image + 0.5 * noise - out = np.clip(out, 0, 255) - out = np.uint8(out) - return out - - -def get_crop(image): - """ - random crop - """ - h, w, _ = image.shape - top_min = 1 - top_max = 8 - top_crop = int(random.randint(top_min, top_max)) - top_crop = min(top_crop, h - 1) - crop_img = image.copy() - ratio = random.randint(0, 1) - if ratio: - crop_img = crop_img[top_crop:h, :, :] - else: - crop_img = crop_img[0:h - top_crop, :, :] - return crop_img - - -class Config: - """ - Config - """ - - def __init__(self, use_tia): - self.anglex = random.random() * 30 - self.angley = random.random() * 15 - self.anglez = random.random() * 10 - self.fov = 42 - self.r = 0 - self.shearx = random.random() * 0.3 - self.sheary = random.random() * 0.05 - self.borderMode = cv2.BORDER_REPLICATE - self.use_tia = use_tia - - def make(self, w, h, ang): - """ - make - """ - self.anglex = random.random() * 5 * flag() - self.angley = random.random() * 5 * flag() - self.anglez = -1 * random.random() * int(ang) * flag() - self.fov = 42 - self.r = 0 - self.shearx = 0 - self.sheary = 0 - self.borderMode = cv2.BORDER_REPLICATE - self.w = w - self.h = h - - self.perspective = self.use_tia - self.stretch = self.use_tia - self.distort = self.use_tia - - self.crop = True - self.affine = False - self.reverse = True - self.noise = True - self.jitter = True - self.blur = True - self.color = True - - -def rad(x): - """ - rad - """ - return x * np.pi / 180 - - -def get_warpR(config): - """ - get_warpR - """ - anglex, angley, anglez, fov, w, h, r = \ - config.anglex, config.angley, config.anglez, config.fov, config.w, config.h, config.r - if w > 69 and w < 112: - anglex = anglex * 1.5 - - z = np.sqrt(w**2 + h**2) / 2 / np.tan(rad(fov / 2)) - # Homogeneous coordinate transformation matrix - rx = np.array([[1, 0, 0, 0], - [0, np.cos(rad(anglex)), -np.sin(rad(anglex)), 0], [ - 0, - -np.sin(rad(anglex)), - np.cos(rad(anglex)), - 0, - ], [0, 0, 0, 1]], np.float32) - ry = np.array([[np.cos(rad(angley)), 0, np.sin(rad(angley)), 0], - [0, 1, 0, 0], [ - -np.sin(rad(angley)), - 0, - np.cos(rad(angley)), - 0, - ], [0, 0, 0, 1]], np.float32) - rz = np.array([[np.cos(rad(anglez)), np.sin(rad(anglez)), 0, 0], - [-np.sin(rad(anglez)), np.cos(rad(anglez)), 0, 0], - [0, 0, 1, 0], [0, 0, 0, 1]], np.float32) - r = rx.dot(ry).dot(rz) - # generate 4 points - pcenter = np.array([h / 2, w / 2, 0, 0], np.float32) - p1 = np.array([0, 0, 0, 0], np.float32) - pcenter - p2 = np.array([w, 0, 0, 0], np.float32) - pcenter - p3 = np.array([0, h, 0, 0], np.float32) - pcenter - p4 = np.array([w, h, 0, 0], np.float32) - pcenter - dst1 = r.dot(p1) - dst2 = r.dot(p2) - dst3 = r.dot(p3) - dst4 = r.dot(p4) - list_dst = np.array([dst1, dst2, dst3, dst4]) - org = np.array([[0, 0], [w, 0], [0, h], [w, h]], np.float32) - dst = np.zeros((4, 2), np.float32) - # Project onto the image plane - dst[:, 0] = list_dst[:, 0] * z / (z - list_dst[:, 2]) + pcenter[0] - dst[:, 1] = list_dst[:, 1] * z / (z - list_dst[:, 2]) + pcenter[1] - - warpR = cv2.getPerspectiveTransform(org, dst) - - dst1, dst2, dst3, dst4 = dst - r1 = int(min(dst1[1], dst2[1])) - r2 = int(max(dst3[1], dst4[1])) - c1 = int(min(dst1[0], dst3[0])) - c2 = int(max(dst2[0], dst4[0])) - - try: - ratio = min(1.0 * h / (r2 - r1), 1.0 * w / (c2 - c1)) - - dx = -c1 - dy = -r1 - T1 = np.float32([[1., 0, dx], [0, 1., dy], [0, 0, 1.0 / ratio]]) - ret = T1.dot(warpR) - except: - ratio = 1.0 - T1 = np.float32([[1., 0, 0], [0, 1., 0], [0, 0, 1.]]) - ret = T1 - return ret, (-r1, -c1), ratio, dst - - -def get_warpAffine(config): - """ - get_warpAffine - """ - anglez = config.anglez - rz = np.array([[np.cos(rad(anglez)), np.sin(rad(anglez)), 0], - [-np.sin(rad(anglez)), np.cos(rad(anglez)), 0]], np.float32) - return rz - - -def warp(img, ang, use_tia=True, prob=0.4): - """ - warp - """ - h, w, _ = img.shape - config = Config(use_tia=use_tia) - config.make(w, h, ang) - new_img = img - - if config.distort: - img_height, img_width = img.shape[0:2] - if random.random() <= prob and img_height >= 20 and img_width >= 20: - new_img = tia_distort(new_img, random.randint(3, 6)) - - if config.stretch: - img_height, img_width = img.shape[0:2] - if random.random() <= prob and img_height >= 20 and img_width >= 20: - new_img = tia_stretch(new_img, random.randint(3, 6)) - - if config.perspective: - if random.random() <= prob: - new_img = tia_perspective(new_img) - - if config.crop: - img_height, img_width = img.shape[0:2] - if random.random() <= prob and img_height >= 20 and img_width >= 20: - new_img = get_crop(new_img) - - if config.blur: - if random.random() <= prob: - new_img = blur(new_img) - if config.color: - if random.random() <= prob: - new_img = cvtColor(new_img) - if config.jitter: - new_img = jitter(new_img) - if config.noise: - if random.random() <= prob: - new_img = add_gasuss_noise(new_img) - if config.reverse: - if random.random() <= prob: - new_img = 255 - new_img - return new_img diff --git a/backend/ppocr/data/imaug/sast_process.py b/backend/ppocr/data/imaug/sast_process.py deleted file mode 100644 index 08d03b19..00000000 --- a/backend/ppocr/data/imaug/sast_process.py +++ /dev/null @@ -1,777 +0,0 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -""" -This part code is refered from: -https://github.com/songdejia/EAST/blob/master/data_utils.py -""" -import math -import cv2 -import numpy as np -import json -import sys -import os - -__all__ = ['SASTProcessTrain'] - - -class SASTProcessTrain(object): - def __init__(self, - image_shape=[512, 512], - min_crop_size=24, - min_crop_side_ratio=0.3, - min_text_size=10, - max_text_size=512, - **kwargs): - self.input_size = image_shape[1] - self.min_crop_size = min_crop_size - self.min_crop_side_ratio = min_crop_side_ratio - self.min_text_size = min_text_size - self.max_text_size = max_text_size - - def quad_area(self, poly): - """ - compute area of a polygon - :param poly: - :return: - """ - edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]), - (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]), - (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]), - (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])] - return np.sum(edge) / 2. - - def gen_quad_from_poly(self, poly): - """ - Generate min area quad from poly. - """ - point_num = poly.shape[0] - min_area_quad = np.zeros((4, 2), dtype=np.float32) - if True: - rect = cv2.minAreaRect(poly.astype( - np.int32)) # (center (x,y), (width, height), angle of rotation) - center_point = rect[0] - box = np.array(cv2.boxPoints(rect)) - - first_point_idx = 0 - min_dist = 1e4 - for i in range(4): - dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \ - np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \ - np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \ - np.linalg.norm(box[(i + 3) % 4] - poly[-1]) - if dist < min_dist: - min_dist = dist - first_point_idx = i - for i in range(4): - min_area_quad[i] = box[(first_point_idx + i) % 4] - - return min_area_quad - - def check_and_validate_polys(self, polys, tags, xxx_todo_changeme): - """ - check so that the text poly is in the same direction, - and also filter some invalid polygons - :param polys: - :param tags: - :return: - """ - (h, w) = xxx_todo_changeme - if polys.shape[0] == 0: - return polys, np.array([]), np.array([]) - polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1) - polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1) - - validated_polys = [] - validated_tags = [] - hv_tags = [] - for poly, tag in zip(polys, tags): - quad = self.gen_quad_from_poly(poly) - p_area = self.quad_area(quad) - if abs(p_area) < 1: - print('invalid poly') - continue - if p_area > 0: - if tag == False: - print('poly in wrong direction') - tag = True # reversed cases should be ignore - poly = poly[(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, - 1), :] - quad = quad[(0, 3, 2, 1), :] - - len_w = np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[3] - - quad[2]) - len_h = np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - - quad[2]) - hv_tag = 1 - - if len_w * 2.0 < len_h: - hv_tag = 0 - - validated_polys.append(poly) - validated_tags.append(tag) - hv_tags.append(hv_tag) - return np.array(validated_polys), np.array(validated_tags), np.array( - hv_tags) - - def crop_area(self, - im, - polys, - tags, - hv_tags, - crop_background=False, - max_tries=25): - """ - make random crop from the input image - :param im: - :param polys: - :param tags: - :param crop_background: - :param max_tries: 50 -> 25 - :return: - """ - h, w, _ = im.shape - pad_h = h // 10 - pad_w = w // 10 - h_array = np.zeros((h + pad_h * 2), dtype=np.int32) - w_array = np.zeros((w + pad_w * 2), dtype=np.int32) - for poly in polys: - poly = np.round(poly, decimals=0).astype(np.int32) - minx = np.min(poly[:, 0]) - maxx = np.max(poly[:, 0]) - w_array[minx + pad_w:maxx + pad_w] = 1 - miny = np.min(poly[:, 1]) - maxy = np.max(poly[:, 1]) - h_array[miny + pad_h:maxy + pad_h] = 1 - # ensure the cropped area not across a text - h_axis = np.where(h_array == 0)[0] - w_axis = np.where(w_array == 0)[0] - if len(h_axis) == 0 or len(w_axis) == 0: - return im, polys, tags, hv_tags - for i in range(max_tries): - xx = np.random.choice(w_axis, size=2) - xmin = np.min(xx) - pad_w - xmax = np.max(xx) - pad_w - xmin = np.clip(xmin, 0, w - 1) - xmax = np.clip(xmax, 0, w - 1) - yy = np.random.choice(h_axis, size=2) - ymin = np.min(yy) - pad_h - ymax = np.max(yy) - pad_h - ymin = np.clip(ymin, 0, h - 1) - ymax = np.clip(ymax, 0, h - 1) - # if xmax - xmin < ARGS.min_crop_side_ratio * w or \ - # ymax - ymin < ARGS.min_crop_side_ratio * h: - if xmax - xmin < self.min_crop_size or \ - ymax - ymin < self.min_crop_size: - # area too small - continue - if polys.shape[0] != 0: - poly_axis_in_area = (polys[:, :, 0] >= xmin) & (polys[:, :, 0] <= xmax) \ - & (polys[:, :, 1] >= ymin) & (polys[:, :, 1] <= ymax) - selected_polys = np.where( - np.sum(poly_axis_in_area, axis=1) == 4)[0] - else: - selected_polys = [] - if len(selected_polys) == 0: - # no text in this area - if crop_background: - return im[ymin : ymax + 1, xmin : xmax + 1, :], \ - polys[selected_polys], tags[selected_polys], hv_tags[selected_polys] - else: - continue - im = im[ymin:ymax + 1, xmin:xmax + 1, :] - polys = polys[selected_polys] - tags = tags[selected_polys] - hv_tags = hv_tags[selected_polys] - polys[:, :, 0] -= xmin - polys[:, :, 1] -= ymin - return im, polys, tags, hv_tags - - return im, polys, tags, hv_tags - - def generate_direction_map(self, poly_quads, direction_map): - """ - """ - width_list = [] - height_list = [] - for quad in poly_quads: - quad_w = (np.linalg.norm(quad[0] - quad[1]) + - np.linalg.norm(quad[2] - quad[3])) / 2.0 - quad_h = (np.linalg.norm(quad[0] - quad[3]) + - np.linalg.norm(quad[2] - quad[1])) / 2.0 - width_list.append(quad_w) - height_list.append(quad_h) - norm_width = max(sum(width_list) / (len(width_list) + 1e-6), 1.0) - average_height = max(sum(height_list) / (len(height_list) + 1e-6), 1.0) - - for quad in poly_quads: - direct_vector_full = ( - (quad[1] + quad[2]) - (quad[0] + quad[3])) / 2.0 - direct_vector = direct_vector_full / ( - np.linalg.norm(direct_vector_full) + 1e-6) * norm_width - direction_label = tuple( - map(float, [ - direct_vector[0], direct_vector[1], 1.0 / (average_height + - 1e-6) - ])) - cv2.fillPoly(direction_map, - quad.round().astype(np.int32)[np.newaxis, :, :], - direction_label) - return direction_map - - def calculate_average_height(self, poly_quads): - """ - """ - height_list = [] - for quad in poly_quads: - quad_h = (np.linalg.norm(quad[0] - quad[3]) + - np.linalg.norm(quad[2] - quad[1])) / 2.0 - height_list.append(quad_h) - average_height = max(sum(height_list) / len(height_list), 1.0) - return average_height - - def generate_tcl_label(self, - hw, - polys, - tags, - ds_ratio, - tcl_ratio=0.3, - shrink_ratio_of_width=0.15): - """ - Generate polygon. - """ - h, w = hw - h, w = int(h * ds_ratio), int(w * ds_ratio) - polys = polys * ds_ratio - - score_map = np.zeros( - ( - h, - w, ), dtype=np.float32) - tbo_map = np.zeros((h, w, 5), dtype=np.float32) - training_mask = np.ones( - ( - h, - w, ), dtype=np.float32) - direction_map = np.ones((h, w, 3)) * np.array([0, 0, 1]).reshape( - [1, 1, 3]).astype(np.float32) - - for poly_idx, poly_tag in enumerate(zip(polys, tags)): - poly = poly_tag[0] - tag = poly_tag[1] - - # generate min_area_quad - min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly) - min_area_quad_h = 0.5 * ( - np.linalg.norm(min_area_quad[0] - min_area_quad[3]) + - np.linalg.norm(min_area_quad[1] - min_area_quad[2])) - min_area_quad_w = 0.5 * ( - np.linalg.norm(min_area_quad[0] - min_area_quad[1]) + - np.linalg.norm(min_area_quad[2] - min_area_quad[3])) - - if min(min_area_quad_h, min_area_quad_w) < self.min_text_size * ds_ratio \ - or min(min_area_quad_h, min_area_quad_w) > self.max_text_size * ds_ratio: - continue - - if tag: - # continue - cv2.fillPoly(training_mask, - poly.astype(np.int32)[np.newaxis, :, :], 0.15) - else: - tcl_poly = self.poly2tcl(poly, tcl_ratio) - tcl_quads = self.poly2quads(tcl_poly) - poly_quads = self.poly2quads(poly) - # stcl map - stcl_quads, quad_index = self.shrink_poly_along_width( - tcl_quads, - shrink_ratio_of_width=shrink_ratio_of_width, - expand_height_ratio=1.0 / tcl_ratio) - # generate tcl map - cv2.fillPoly(score_map, - np.round(stcl_quads).astype(np.int32), 1.0) - - # generate tbo map - for idx, quad in enumerate(stcl_quads): - quad_mask = np.zeros((h, w), dtype=np.float32) - quad_mask = cv2.fillPoly( - quad_mask, - np.round(quad[np.newaxis, :, :]).astype(np.int32), 1.0) - tbo_map = self.gen_quad_tbo(poly_quads[quad_index[idx]], - quad_mask, tbo_map) - return score_map, tbo_map, training_mask - - def generate_tvo_and_tco(self, - hw, - polys, - tags, - tcl_ratio=0.3, - ds_ratio=0.25): - """ - Generate tcl map, tvo map and tbo map. - """ - h, w = hw - h, w = int(h * ds_ratio), int(w * ds_ratio) - polys = polys * ds_ratio - poly_mask = np.zeros((h, w), dtype=np.float32) - - tvo_map = np.ones((9, h, w), dtype=np.float32) - tvo_map[0:-1:2] = np.tile(np.arange(0, w), (h, 1)) - tvo_map[1:-1:2] = np.tile(np.arange(0, w), (h, 1)).T - poly_tv_xy_map = np.zeros((8, h, w), dtype=np.float32) - - # tco map - tco_map = np.ones((3, h, w), dtype=np.float32) - tco_map[0] = np.tile(np.arange(0, w), (h, 1)) - tco_map[1] = np.tile(np.arange(0, w), (h, 1)).T - poly_tc_xy_map = np.zeros((2, h, w), dtype=np.float32) - - poly_short_edge_map = np.ones((h, w), dtype=np.float32) - - for poly, poly_tag in zip(polys, tags): - - if poly_tag == True: - continue - - # adjust point order for vertical poly - poly = self.adjust_point(poly) - - # generate min_area_quad - min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly) - min_area_quad_h = 0.5 * ( - np.linalg.norm(min_area_quad[0] - min_area_quad[3]) + - np.linalg.norm(min_area_quad[1] - min_area_quad[2])) - min_area_quad_w = 0.5 * ( - np.linalg.norm(min_area_quad[0] - min_area_quad[1]) + - np.linalg.norm(min_area_quad[2] - min_area_quad[3])) - - # generate tcl map and text, 128 * 128 - tcl_poly = self.poly2tcl(poly, tcl_ratio) - - # generate poly_tv_xy_map - for idx in range(4): - cv2.fillPoly( - poly_tv_xy_map[2 * idx], - np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), - float(min(max(min_area_quad[idx, 0], 0), w))) - cv2.fillPoly( - poly_tv_xy_map[2 * idx + 1], - np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), - float(min(max(min_area_quad[idx, 1], 0), h))) - - # generate poly_tc_xy_map - for idx in range(2): - cv2.fillPoly( - poly_tc_xy_map[idx], - np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), - float(center_point[idx])) - - # generate poly_short_edge_map - cv2.fillPoly( - poly_short_edge_map, - np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), - float(max(min(min_area_quad_h, min_area_quad_w), 1.0))) - - # generate poly_mask and training_mask - cv2.fillPoly(poly_mask, - np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), - 1) - - tvo_map *= poly_mask - tvo_map[:8] -= poly_tv_xy_map - tvo_map[-1] /= poly_short_edge_map - tvo_map = tvo_map.transpose((1, 2, 0)) - - tco_map *= poly_mask - tco_map[:2] -= poly_tc_xy_map - tco_map[-1] /= poly_short_edge_map - tco_map = tco_map.transpose((1, 2, 0)) - - return tvo_map, tco_map - - def adjust_point(self, poly): - """ - adjust point order. - """ - point_num = poly.shape[0] - if point_num == 4: - len_1 = np.linalg.norm(poly[0] - poly[1]) - len_2 = np.linalg.norm(poly[1] - poly[2]) - len_3 = np.linalg.norm(poly[2] - poly[3]) - len_4 = np.linalg.norm(poly[3] - poly[0]) - - if (len_1 + len_3) * 1.5 < (len_2 + len_4): - poly = poly[[1, 2, 3, 0], :] - - elif point_num > 4: - vector_1 = poly[0] - poly[1] - vector_2 = poly[1] - poly[2] - cos_theta = np.dot(vector_1, vector_2) / ( - np.linalg.norm(vector_1) * np.linalg.norm(vector_2) + 1e-6) - theta = np.arccos(np.round(cos_theta, decimals=4)) - - if abs(theta) > (70 / 180 * math.pi): - index = list(range(1, point_num)) + [0] - poly = poly[np.array(index), :] - return poly - - def gen_min_area_quad_from_poly(self, poly): - """ - Generate min area quad from poly. - """ - point_num = poly.shape[0] - min_area_quad = np.zeros((4, 2), dtype=np.float32) - if point_num == 4: - min_area_quad = poly - center_point = np.sum(poly, axis=0) / 4 - else: - rect = cv2.minAreaRect(poly.astype( - np.int32)) # (center (x,y), (width, height), angle of rotation) - center_point = rect[0] - box = np.array(cv2.boxPoints(rect)) - - first_point_idx = 0 - min_dist = 1e4 - for i in range(4): - dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \ - np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \ - np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \ - np.linalg.norm(box[(i + 3) % 4] - poly[-1]) - if dist < min_dist: - min_dist = dist - first_point_idx = i - - for i in range(4): - min_area_quad[i] = box[(first_point_idx + i) % 4] - - return min_area_quad, center_point - - def shrink_quad_along_width(self, - quad, - begin_width_ratio=0., - end_width_ratio=1.): - """ - Generate shrink_quad_along_width. - """ - ratio_pair = np.array( - [[begin_width_ratio], [end_width_ratio]], dtype=np.float32) - p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair - p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair - return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]]) - - def shrink_poly_along_width(self, - quads, - shrink_ratio_of_width, - expand_height_ratio=1.0): - """ - shrink poly with given length. - """ - upper_edge_list = [] - - def get_cut_info(edge_len_list, cut_len): - for idx, edge_len in enumerate(edge_len_list): - cut_len -= edge_len - if cut_len <= 0.000001: - ratio = (cut_len + edge_len_list[idx]) / edge_len_list[idx] - return idx, ratio - - for quad in quads: - upper_edge_len = np.linalg.norm(quad[0] - quad[1]) - upper_edge_list.append(upper_edge_len) - - # length of left edge and right edge. - left_length = np.linalg.norm(quads[0][0] - quads[0][ - 3]) * expand_height_ratio - right_length = np.linalg.norm(quads[-1][1] - quads[-1][ - 2]) * expand_height_ratio - - shrink_length = min(left_length, right_length, - sum(upper_edge_list)) * shrink_ratio_of_width - # shrinking length - upper_len_left = shrink_length - upper_len_right = sum(upper_edge_list) - shrink_length - - left_idx, left_ratio = get_cut_info(upper_edge_list, upper_len_left) - left_quad = self.shrink_quad_along_width( - quads[left_idx], begin_width_ratio=left_ratio, end_width_ratio=1) - right_idx, right_ratio = get_cut_info(upper_edge_list, upper_len_right) - right_quad = self.shrink_quad_along_width( - quads[right_idx], begin_width_ratio=0, end_width_ratio=right_ratio) - - out_quad_list = [] - if left_idx == right_idx: - out_quad_list.append( - [left_quad[0], right_quad[1], right_quad[2], left_quad[3]]) - else: - out_quad_list.append(left_quad) - for idx in range(left_idx + 1, right_idx): - out_quad_list.append(quads[idx]) - out_quad_list.append(right_quad) - - return np.array(out_quad_list), list(range(left_idx, right_idx + 1)) - - def vector_angle(self, A, B): - """ - Calculate the angle between vector AB and x-axis positive direction. - """ - AB = np.array([B[1] - A[1], B[0] - A[0]]) - return np.arctan2(*AB) - - def theta_line_cross_point(self, theta, point): - """ - Calculate the line through given point and angle in ax + by + c =0 form. - """ - x, y = point - cos = np.cos(theta) - sin = np.sin(theta) - return [sin, -cos, cos * y - sin * x] - - def line_cross_two_point(self, A, B): - """ - Calculate the line through given point A and B in ax + by + c =0 form. - """ - angle = self.vector_angle(A, B) - return self.theta_line_cross_point(angle, A) - - def average_angle(self, poly): - """ - Calculate the average angle between left and right edge in given poly. - """ - p0, p1, p2, p3 = poly - angle30 = self.vector_angle(p3, p0) - angle21 = self.vector_angle(p2, p1) - return (angle30 + angle21) / 2 - - def line_cross_point(self, line1, line2): - """ - line1 and line2 in 0=ax+by+c form, compute the cross point of line1 and line2 - """ - a1, b1, c1 = line1 - a2, b2, c2 = line2 - d = a1 * b2 - a2 * b1 - - if d == 0: - #print("line1", line1) - #print("line2", line2) - print('Cross point does not exist') - return np.array([0, 0], dtype=np.float32) - else: - x = (b1 * c2 - b2 * c1) / d - y = (a2 * c1 - a1 * c2) / d - - return np.array([x, y], dtype=np.float32) - - def quad2tcl(self, poly, ratio): - """ - Generate center line by poly clock-wise point. (4, 2) - """ - ratio_pair = np.array( - [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32) - p0_3 = poly[0] + (poly[3] - poly[0]) * ratio_pair - p1_2 = poly[1] + (poly[2] - poly[1]) * ratio_pair - return np.array([p0_3[0], p1_2[0], p1_2[1], p0_3[1]]) - - def poly2tcl(self, poly, ratio): - """ - Generate center line by poly clock-wise point. - """ - ratio_pair = np.array( - [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32) - tcl_poly = np.zeros_like(poly) - point_num = poly.shape[0] - - for idx in range(point_num // 2): - point_pair = poly[idx] + (poly[point_num - 1 - idx] - poly[idx] - ) * ratio_pair - tcl_poly[idx] = point_pair[0] - tcl_poly[point_num - 1 - idx] = point_pair[1] - return tcl_poly - - def gen_quad_tbo(self, quad, tcl_mask, tbo_map): - """ - Generate tbo_map for give quad. - """ - # upper and lower line function: ax + by + c = 0; - up_line = self.line_cross_two_point(quad[0], quad[1]) - lower_line = self.line_cross_two_point(quad[3], quad[2]) - - quad_h = 0.5 * (np.linalg.norm(quad[0] - quad[3]) + - np.linalg.norm(quad[1] - quad[2])) - quad_w = 0.5 * (np.linalg.norm(quad[0] - quad[1]) + - np.linalg.norm(quad[2] - quad[3])) - - # average angle of left and right line. - angle = self.average_angle(quad) - - xy_in_poly = np.argwhere(tcl_mask == 1) - for y, x in xy_in_poly: - point = (x, y) - line = self.theta_line_cross_point(angle, point) - cross_point_upper = self.line_cross_point(up_line, line) - cross_point_lower = self.line_cross_point(lower_line, line) - ##FIX, offset reverse - upper_offset_x, upper_offset_y = cross_point_upper - point - lower_offset_x, lower_offset_y = cross_point_lower - point - tbo_map[y, x, 0] = upper_offset_y - tbo_map[y, x, 1] = upper_offset_x - tbo_map[y, x, 2] = lower_offset_y - tbo_map[y, x, 3] = lower_offset_x - tbo_map[y, x, 4] = 1.0 / max(min(quad_h, quad_w), 1.0) * 2 - return tbo_map - - def poly2quads(self, poly): - """ - Split poly into quads. - """ - quad_list = [] - point_num = poly.shape[0] - - # point pair - point_pair_list = [] - for idx in range(point_num // 2): - point_pair = [poly[idx], poly[point_num - 1 - idx]] - point_pair_list.append(point_pair) - - quad_num = point_num // 2 - 1 - for idx in range(quad_num): - # reshape and adjust to clock-wise - quad_list.append((np.array(point_pair_list)[[idx, idx + 1]] - ).reshape(4, 2)[[0, 2, 3, 1]]) - - return np.array(quad_list) - - def __call__(self, data): - im = data['image'] - text_polys = data['polys'] - text_tags = data['ignore_tags'] - if im is None: - return None - if text_polys.shape[0] == 0: - return None - - h, w, _ = im.shape - text_polys, text_tags, hv_tags = self.check_and_validate_polys( - text_polys, text_tags, (h, w)) - - if text_polys.shape[0] == 0: - return None - - #set aspect ratio and keep area fix - asp_scales = np.arange(1.0, 1.55, 0.1) - asp_scale = np.random.choice(asp_scales) - - if np.random.rand() < 0.5: - asp_scale = 1.0 / asp_scale - asp_scale = math.sqrt(asp_scale) - - asp_wx = asp_scale - asp_hy = 1.0 / asp_scale - im = cv2.resize(im, dsize=None, fx=asp_wx, fy=asp_hy) - text_polys[:, :, 0] *= asp_wx - text_polys[:, :, 1] *= asp_hy - - h, w, _ = im.shape - if max(h, w) > 2048: - rd_scale = 2048.0 / max(h, w) - im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale) - text_polys *= rd_scale - h, w, _ = im.shape - if min(h, w) < 16: - return None - - #no background - im, text_polys, text_tags, hv_tags = self.crop_area(im, \ - text_polys, text_tags, hv_tags, crop_background=False) - - if text_polys.shape[0] == 0: - return None - #continue for all ignore case - if np.sum((text_tags * 1.0)) >= text_tags.size: - return None - new_h, new_w, _ = im.shape - if (new_h is None) or (new_w is None): - return None - #resize image - std_ratio = float(self.input_size) / max(new_w, new_h) - rand_scales = np.array( - [0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0, 1.0, 1.0, 1.0, 1.0]) - rz_scale = std_ratio * np.random.choice(rand_scales) - im = cv2.resize(im, dsize=None, fx=rz_scale, fy=rz_scale) - text_polys[:, :, 0] *= rz_scale - text_polys[:, :, 1] *= rz_scale - - #add gaussian blur - if np.random.rand() < 0.1 * 0.5: - ks = np.random.permutation(5)[0] + 1 - ks = int(ks / 2) * 2 + 1 - im = cv2.GaussianBlur(im, ksize=(ks, ks), sigmaX=0, sigmaY=0) - #add brighter - if np.random.rand() < 0.1 * 0.5: - im = im * (1.0 + np.random.rand() * 0.5) - im = np.clip(im, 0.0, 255.0) - #add darker - if np.random.rand() < 0.1 * 0.5: - im = im * (1.0 - np.random.rand() * 0.5) - im = np.clip(im, 0.0, 255.0) - - # Padding the im to [input_size, input_size] - new_h, new_w, _ = im.shape - if min(new_w, new_h) < self.input_size * 0.5: - return None - - im_padded = np.ones( - (self.input_size, self.input_size, 3), dtype=np.float32) - im_padded[:, :, 2] = 0.485 * 255 - im_padded[:, :, 1] = 0.456 * 255 - im_padded[:, :, 0] = 0.406 * 255 - - # Random the start position - del_h = self.input_size - new_h - del_w = self.input_size - new_w - sh, sw = 0, 0 - if del_h > 1: - sh = int(np.random.rand() * del_h) - if del_w > 1: - sw = int(np.random.rand() * del_w) - - # Padding - im_padded[sh:sh + new_h, sw:sw + new_w, :] = im.copy() - text_polys[:, :, 0] += sw - text_polys[:, :, 1] += sh - - score_map, border_map, training_mask = self.generate_tcl_label( - (self.input_size, self.input_size), text_polys, text_tags, 0.25) - - # SAST head - tvo_map, tco_map = self.generate_tvo_and_tco( - (self.input_size, self.input_size), - text_polys, - text_tags, - tcl_ratio=0.3, - ds_ratio=0.25) - # print("test--------tvo_map shape:", tvo_map.shape) - - im_padded[:, :, 2] -= 0.485 * 255 - im_padded[:, :, 1] -= 0.456 * 255 - im_padded[:, :, 0] -= 0.406 * 255 - im_padded[:, :, 2] /= (255.0 * 0.229) - im_padded[:, :, 1] /= (255.0 * 0.224) - im_padded[:, :, 0] /= (255.0 * 0.225) - im_padded = im_padded.transpose((2, 0, 1)) - - data['image'] = im_padded[::-1, :, :] - data['score_map'] = score_map[np.newaxis, :, :] - data['border_map'] = border_map.transpose((2, 0, 1)) - data['training_mask'] = training_mask[np.newaxis, :, :] - data['tvo_map'] = tvo_map.transpose((2, 0, 1)) - data['tco_map'] = tco_map.transpose((2, 0, 1)) - return data diff --git a/backend/ppocr/data/imaug/ssl_img_aug.py b/backend/ppocr/data/imaug/ssl_img_aug.py deleted file mode 100644 index f9ed6ac3..00000000 --- a/backend/ppocr/data/imaug/ssl_img_aug.py +++ /dev/null @@ -1,60 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import cv2 -import numpy as np -import random -from PIL import Image - -from .rec_img_aug import resize_norm_img - - -class SSLRotateResize(object): - def __init__(self, - image_shape, - padding=False, - select_all=True, - mode="train", - **kwargs): - self.image_shape = image_shape - self.padding = padding - self.select_all = select_all - self.mode = mode - - def __call__(self, data): - img = data["image"] - - data["image_r90"] = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE) - data["image_r180"] = cv2.rotate(data["image_r90"], - cv2.ROTATE_90_CLOCKWISE) - data["image_r270"] = cv2.rotate(data["image_r180"], - cv2.ROTATE_90_CLOCKWISE) - - images = [] - for key in ["image", "image_r90", "image_r180", "image_r270"]: - images.append( - resize_norm_img( - data.pop(key), - image_shape=self.image_shape, - padding=self.padding)[0]) - data["image"] = np.stack(images, axis=0) - data["label"] = np.array(list(range(4))) - if not self.select_all: - data["image"] = data["image"][0::2] # just choose 0 and 180 - data["label"] = data["label"][0:2] # label needs to be continuous - if self.mode == "test": - data["image"] = data["image"][0] - data["label"] = data["label"][0] - return data diff --git a/backend/ppocr/data/imaug/text_image_aug/__init__.py b/backend/ppocr/data/imaug/text_image_aug/__init__.py deleted file mode 100644 index bca26263..00000000 --- a/backend/ppocr/data/imaug/text_image_aug/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .augment import tia_perspective, tia_distort, tia_stretch - -__all__ = ['tia_distort', 'tia_stretch', 'tia_perspective'] diff --git a/backend/ppocr/data/imaug/text_image_aug/augment.py b/backend/ppocr/data/imaug/text_image_aug/augment.py deleted file mode 100644 index 2d15dd5f..00000000 --- a/backend/ppocr/data/imaug/text_image_aug/augment.py +++ /dev/null @@ -1,120 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/RubanSeven/Text-Image-Augmentation-python/blob/master/augment.py -""" - -import numpy as np -from .warp_mls import WarpMLS - - -def tia_distort(src, segment=4): - img_h, img_w = src.shape[:2] - - cut = img_w // segment - thresh = cut // 3 - - src_pts = list() - dst_pts = list() - - src_pts.append([0, 0]) - src_pts.append([img_w, 0]) - src_pts.append([img_w, img_h]) - src_pts.append([0, img_h]) - - dst_pts.append([np.random.randint(thresh), np.random.randint(thresh)]) - dst_pts.append( - [img_w - np.random.randint(thresh), np.random.randint(thresh)]) - dst_pts.append( - [img_w - np.random.randint(thresh), img_h - np.random.randint(thresh)]) - dst_pts.append( - [np.random.randint(thresh), img_h - np.random.randint(thresh)]) - - half_thresh = thresh * 0.5 - - for cut_idx in np.arange(1, segment, 1): - src_pts.append([cut * cut_idx, 0]) - src_pts.append([cut * cut_idx, img_h]) - dst_pts.append([ - cut * cut_idx + np.random.randint(thresh) - half_thresh, - np.random.randint(thresh) - half_thresh - ]) - dst_pts.append([ - cut * cut_idx + np.random.randint(thresh) - half_thresh, - img_h + np.random.randint(thresh) - half_thresh - ]) - - trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h) - dst = trans.generate() - - return dst - - -def tia_stretch(src, segment=4): - img_h, img_w = src.shape[:2] - - cut = img_w // segment - thresh = cut * 4 // 5 - - src_pts = list() - dst_pts = list() - - src_pts.append([0, 0]) - src_pts.append([img_w, 0]) - src_pts.append([img_w, img_h]) - src_pts.append([0, img_h]) - - dst_pts.append([0, 0]) - dst_pts.append([img_w, 0]) - dst_pts.append([img_w, img_h]) - dst_pts.append([0, img_h]) - - half_thresh = thresh * 0.5 - - for cut_idx in np.arange(1, segment, 1): - move = np.random.randint(thresh) - half_thresh - src_pts.append([cut * cut_idx, 0]) - src_pts.append([cut * cut_idx, img_h]) - dst_pts.append([cut * cut_idx + move, 0]) - dst_pts.append([cut * cut_idx + move, img_h]) - - trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h) - dst = trans.generate() - - return dst - - -def tia_perspective(src): - img_h, img_w = src.shape[:2] - - thresh = img_h // 2 - - src_pts = list() - dst_pts = list() - - src_pts.append([0, 0]) - src_pts.append([img_w, 0]) - src_pts.append([img_w, img_h]) - src_pts.append([0, img_h]) - - dst_pts.append([0, np.random.randint(thresh)]) - dst_pts.append([img_w, np.random.randint(thresh)]) - dst_pts.append([img_w, img_h - np.random.randint(thresh)]) - dst_pts.append([0, img_h - np.random.randint(thresh)]) - - trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h) - dst = trans.generate() - - return dst \ No newline at end of file diff --git a/backend/ppocr/data/imaug/text_image_aug/warp_mls.py b/backend/ppocr/data/imaug/text_image_aug/warp_mls.py deleted file mode 100644 index 75de1111..00000000 --- a/backend/ppocr/data/imaug/text_image_aug/warp_mls.py +++ /dev/null @@ -1,168 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/RubanSeven/Text-Image-Augmentation-python/blob/master/warp_mls.py -""" - -import numpy as np - - -class WarpMLS: - def __init__(self, src, src_pts, dst_pts, dst_w, dst_h, trans_ratio=1.): - self.src = src - self.src_pts = src_pts - self.dst_pts = dst_pts - self.pt_count = len(self.dst_pts) - self.dst_w = dst_w - self.dst_h = dst_h - self.trans_ratio = trans_ratio - self.grid_size = 100 - self.rdx = np.zeros((self.dst_h, self.dst_w)) - self.rdy = np.zeros((self.dst_h, self.dst_w)) - - @staticmethod - def __bilinear_interp(x, y, v11, v12, v21, v22): - return (v11 * (1 - y) + v12 * y) * (1 - x) + (v21 * - (1 - y) + v22 * y) * x - - def generate(self): - self.calc_delta() - return self.gen_img() - - def calc_delta(self): - w = np.zeros(self.pt_count, dtype=np.float32) - - if self.pt_count < 2: - return - - i = 0 - while 1: - if self.dst_w <= i < self.dst_w + self.grid_size - 1: - i = self.dst_w - 1 - elif i >= self.dst_w: - break - - j = 0 - while 1: - if self.dst_h <= j < self.dst_h + self.grid_size - 1: - j = self.dst_h - 1 - elif j >= self.dst_h: - break - - sw = 0 - swp = np.zeros(2, dtype=np.float32) - swq = np.zeros(2, dtype=np.float32) - new_pt = np.zeros(2, dtype=np.float32) - cur_pt = np.array([i, j], dtype=np.float32) - - k = 0 - for k in range(self.pt_count): - if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]: - break - - w[k] = 1. / ( - (i - self.dst_pts[k][0]) * (i - self.dst_pts[k][0]) + - (j - self.dst_pts[k][1]) * (j - self.dst_pts[k][1])) - - sw += w[k] - swp = swp + w[k] * np.array(self.dst_pts[k]) - swq = swq + w[k] * np.array(self.src_pts[k]) - - if k == self.pt_count - 1: - pstar = 1 / sw * swp - qstar = 1 / sw * swq - - miu_s = 0 - for k in range(self.pt_count): - if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]: - continue - pt_i = self.dst_pts[k] - pstar - miu_s += w[k] * np.sum(pt_i * pt_i) - - cur_pt -= pstar - cur_pt_j = np.array([-cur_pt[1], cur_pt[0]]) - - for k in range(self.pt_count): - if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]: - continue - - pt_i = self.dst_pts[k] - pstar - pt_j = np.array([-pt_i[1], pt_i[0]]) - - tmp_pt = np.zeros(2, dtype=np.float32) - tmp_pt[0] = np.sum(pt_i * cur_pt) * self.src_pts[k][0] - \ - np.sum(pt_j * cur_pt) * self.src_pts[k][1] - tmp_pt[1] = -np.sum(pt_i * cur_pt_j) * self.src_pts[k][0] + \ - np.sum(pt_j * cur_pt_j) * self.src_pts[k][1] - tmp_pt *= (w[k] / miu_s) - new_pt += tmp_pt - - new_pt += qstar - else: - new_pt = self.src_pts[k] - - self.rdx[j, i] = new_pt[0] - i - self.rdy[j, i] = new_pt[1] - j - - j += self.grid_size - i += self.grid_size - - def gen_img(self): - src_h, src_w = self.src.shape[:2] - dst = np.zeros_like(self.src, dtype=np.float32) - - for i in np.arange(0, self.dst_h, self.grid_size): - for j in np.arange(0, self.dst_w, self.grid_size): - ni = i + self.grid_size - nj = j + self.grid_size - w = h = self.grid_size - if ni >= self.dst_h: - ni = self.dst_h - 1 - h = ni - i + 1 - if nj >= self.dst_w: - nj = self.dst_w - 1 - w = nj - j + 1 - - di = np.reshape(np.arange(h), (-1, 1)) - dj = np.reshape(np.arange(w), (1, -1)) - delta_x = self.__bilinear_interp( - di / h, dj / w, self.rdx[i, j], self.rdx[i, nj], - self.rdx[ni, j], self.rdx[ni, nj]) - delta_y = self.__bilinear_interp( - di / h, dj / w, self.rdy[i, j], self.rdy[i, nj], - self.rdy[ni, j], self.rdy[ni, nj]) - nx = j + dj + delta_x * self.trans_ratio - ny = i + di + delta_y * self.trans_ratio - nx = np.clip(nx, 0, src_w - 1) - ny = np.clip(ny, 0, src_h - 1) - nxi = np.array(np.floor(nx), dtype=np.int32) - nyi = np.array(np.floor(ny), dtype=np.int32) - nxi1 = np.array(np.ceil(nx), dtype=np.int32) - nyi1 = np.array(np.ceil(ny), dtype=np.int32) - - if len(self.src.shape) == 3: - x = np.tile(np.expand_dims(ny - nyi, axis=-1), (1, 1, 3)) - y = np.tile(np.expand_dims(nx - nxi, axis=-1), (1, 1, 3)) - else: - x = ny - nyi - y = nx - nxi - dst[i:i + h, j:j + w] = self.__bilinear_interp( - x, y, self.src[nyi, nxi], self.src[nyi, nxi1], - self.src[nyi1, nxi], self.src[nyi1, nxi1]) - - dst = np.clip(dst, 0, 255) - dst = np.array(dst, dtype=np.uint8) - - return dst diff --git a/backend/ppocr/data/imaug/vqa/__init__.py b/backend/ppocr/data/imaug/vqa/__init__.py deleted file mode 100644 index a5025e79..00000000 --- a/backend/ppocr/data/imaug/vqa/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .token import VQATokenPad, VQASerTokenChunk, VQAReTokenChunk, VQAReTokenRelation - -__all__ = [ - 'VQATokenPad', 'VQASerTokenChunk', 'VQAReTokenChunk', 'VQAReTokenRelation' -] diff --git a/backend/ppocr/data/imaug/vqa/token/__init__.py b/backend/ppocr/data/imaug/vqa/token/__init__.py deleted file mode 100644 index 7c115661..00000000 --- a/backend/ppocr/data/imaug/vqa/token/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .vqa_token_chunk import VQASerTokenChunk, VQAReTokenChunk -from .vqa_token_pad import VQATokenPad -from .vqa_token_relation import VQAReTokenRelation diff --git a/backend/ppocr/data/imaug/vqa/token/vqa_token_chunk.py b/backend/ppocr/data/imaug/vqa/token/vqa_token_chunk.py deleted file mode 100644 index 1fa949e6..00000000 --- a/backend/ppocr/data/imaug/vqa/token/vqa_token_chunk.py +++ /dev/null @@ -1,122 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from collections import defaultdict - - -class VQASerTokenChunk(object): - def __init__(self, max_seq_len=512, infer_mode=False, **kwargs): - self.max_seq_len = max_seq_len - self.infer_mode = infer_mode - - def __call__(self, data): - encoded_inputs_all = [] - seq_len = len(data['input_ids']) - for index in range(0, seq_len, self.max_seq_len): - chunk_beg = index - chunk_end = min(index + self.max_seq_len, seq_len) - encoded_inputs_example = {} - for key in data: - if key in [ - 'label', 'input_ids', 'labels', 'token_type_ids', - 'bbox', 'attention_mask' - ]: - if self.infer_mode and key == 'labels': - encoded_inputs_example[key] = data[key] - else: - encoded_inputs_example[key] = data[key][chunk_beg: - chunk_end] - else: - encoded_inputs_example[key] = data[key] - - encoded_inputs_all.append(encoded_inputs_example) - if len(encoded_inputs_all) == 0: - return None - return encoded_inputs_all[0] - - -class VQAReTokenChunk(object): - def __init__(self, - max_seq_len=512, - entities_labels=None, - infer_mode=False, - **kwargs): - self.max_seq_len = max_seq_len - self.entities_labels = { - 'HEADER': 0, - 'QUESTION': 1, - 'ANSWER': 2 - } if entities_labels is None else entities_labels - self.infer_mode = infer_mode - - def __call__(self, data): - # prepare data - entities = data.pop('entities') - relations = data.pop('relations') - encoded_inputs_all = [] - for index in range(0, len(data["input_ids"]), self.max_seq_len): - item = {} - for key in data: - if key in [ - 'label', 'input_ids', 'labels', 'token_type_ids', - 'bbox', 'attention_mask' - ]: - if self.infer_mode and key == 'labels': - item[key] = data[key] - else: - item[key] = data[key][index:index + self.max_seq_len] - else: - item[key] = data[key] - # select entity in current chunk - entities_in_this_span = [] - global_to_local_map = {} # - for entity_id, entity in enumerate(entities): - if (index <= entity["start"] < index + self.max_seq_len and - index <= entity["end"] < index + self.max_seq_len): - entity["start"] = entity["start"] - index - entity["end"] = entity["end"] - index - global_to_local_map[entity_id] = len(entities_in_this_span) - entities_in_this_span.append(entity) - - # select relations in current chunk - relations_in_this_span = [] - for relation in relations: - if (index <= relation["start_index"] < index + self.max_seq_len - and index <= relation["end_index"] < - index + self.max_seq_len): - relations_in_this_span.append({ - "head": global_to_local_map[relation["head"]], - "tail": global_to_local_map[relation["tail"]], - "start_index": relation["start_index"] - index, - "end_index": relation["end_index"] - index, - }) - item.update({ - "entities": self.reformat(entities_in_this_span), - "relations": self.reformat(relations_in_this_span), - }) - if len(item['entities']) > 0: - item['entities']['label'] = [ - self.entities_labels[x] for x in item['entities']['label'] - ] - encoded_inputs_all.append(item) - if len(encoded_inputs_all) == 0: - return None - return encoded_inputs_all[0] - - def reformat(self, data): - new_data = defaultdict(list) - for item in data: - for k, v in item.items(): - new_data[k].append(v) - return new_data diff --git a/backend/ppocr/data/imaug/vqa/token/vqa_token_pad.py b/backend/ppocr/data/imaug/vqa/token/vqa_token_pad.py deleted file mode 100644 index 8e5a20f9..00000000 --- a/backend/ppocr/data/imaug/vqa/token/vqa_token_pad.py +++ /dev/null @@ -1,104 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle -import numpy as np - - -class VQATokenPad(object): - def __init__(self, - max_seq_len=512, - pad_to_max_seq_len=True, - return_attention_mask=True, - return_token_type_ids=True, - truncation_strategy="longest_first", - return_overflowing_tokens=False, - return_special_tokens_mask=False, - infer_mode=False, - **kwargs): - self.max_seq_len = max_seq_len - self.pad_to_max_seq_len = max_seq_len - self.return_attention_mask = return_attention_mask - self.return_token_type_ids = return_token_type_ids - self.truncation_strategy = truncation_strategy - self.return_overflowing_tokens = return_overflowing_tokens - self.return_special_tokens_mask = return_special_tokens_mask - self.pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index - self.infer_mode = infer_mode - - def __call__(self, data): - needs_to_be_padded = self.pad_to_max_seq_len and len(data[ - "input_ids"]) < self.max_seq_len - - if needs_to_be_padded: - if 'tokenizer_params' in data: - tokenizer_params = data.pop('tokenizer_params') - else: - tokenizer_params = dict( - padding_side='right', pad_token_type_id=0, pad_token_id=1) - - difference = self.max_seq_len - len(data["input_ids"]) - if tokenizer_params['padding_side'] == 'right': - if self.return_attention_mask: - data["attention_mask"] = [1] * len(data[ - "input_ids"]) + [0] * difference - if self.return_token_type_ids: - data["token_type_ids"] = ( - data["token_type_ids"] + - [tokenizer_params['pad_token_type_id']] * difference) - if self.return_special_tokens_mask: - data["special_tokens_mask"] = data[ - "special_tokens_mask"] + [1] * difference - data["input_ids"] = data["input_ids"] + [ - tokenizer_params['pad_token_id'] - ] * difference - if not self.infer_mode: - data["labels"] = data[ - "labels"] + [self.pad_token_label_id] * difference - data["bbox"] = data["bbox"] + [[0, 0, 0, 0]] * difference - elif tokenizer_params['padding_side'] == 'left': - if self.return_attention_mask: - data["attention_mask"] = [0] * difference + [ - 1 - ] * len(data["input_ids"]) - if self.return_token_type_ids: - data["token_type_ids"] = ( - [tokenizer_params['pad_token_type_id']] * difference + - data["token_type_ids"]) - if self.return_special_tokens_mask: - data["special_tokens_mask"] = [ - 1 - ] * difference + data["special_tokens_mask"] - data["input_ids"] = [tokenizer_params['pad_token_id'] - ] * difference + data["input_ids"] - if not self.infer_mode: - data["labels"] = [self.pad_token_label_id - ] * difference + data["labels"] - data["bbox"] = [[0, 0, 0, 0]] * difference + data["bbox"] - else: - if self.return_attention_mask: - data["attention_mask"] = [1] * len(data["input_ids"]) - - for key in data: - if key in [ - 'input_ids', 'labels', 'token_type_ids', 'bbox', - 'attention_mask' - ]: - if self.infer_mode: - if key != 'labels': - length = min(len(data[key]), self.max_seq_len) - data[key] = data[key][:length] - else: - continue - data[key] = np.array(data[key], dtype='int64') - return data diff --git a/backend/ppocr/data/imaug/vqa/token/vqa_token_relation.py b/backend/ppocr/data/imaug/vqa/token/vqa_token_relation.py deleted file mode 100644 index 293988ff..00000000 --- a/backend/ppocr/data/imaug/vqa/token/vqa_token_relation.py +++ /dev/null @@ -1,67 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class VQAReTokenRelation(object): - def __init__(self, **kwargs): - pass - - def __call__(self, data): - """ - build relations - """ - entities = data['entities'] - relations = data['relations'] - id2label = data.pop('id2label') - empty_entity = data.pop('empty_entity') - entity_id_to_index_map = data.pop('entity_id_to_index_map') - - relations = list(set(relations)) - relations = [ - rel for rel in relations - if rel[0] not in empty_entity and rel[1] not in empty_entity - ] - kv_relations = [] - for rel in relations: - pair = [id2label[rel[0]], id2label[rel[1]]] - if pair == ["question", "answer"]: - kv_relations.append({ - "head": entity_id_to_index_map[rel[0]], - "tail": entity_id_to_index_map[rel[1]] - }) - elif pair == ["answer", "question"]: - kv_relations.append({ - "head": entity_id_to_index_map[rel[1]], - "tail": entity_id_to_index_map[rel[0]] - }) - else: - continue - relations = sorted( - [{ - "head": rel["head"], - "tail": rel["tail"], - "start_index": self.get_relation_span(rel, entities)[0], - "end_index": self.get_relation_span(rel, entities)[1], - } for rel in kv_relations], - key=lambda x: x["head"], ) - - data['relations'] = relations - return data - - def get_relation_span(self, rel, entities): - bound = [] - for entity_index in [rel["head"], rel["tail"]]: - bound.append(entities[entity_index]["start"]) - bound.append(entities[entity_index]["end"]) - return min(bound), max(bound) diff --git a/backend/ppocr/data/lmdb_dataset.py b/backend/ppocr/data/lmdb_dataset.py deleted file mode 100644 index e1b49809..00000000 --- a/backend/ppocr/data/lmdb_dataset.py +++ /dev/null @@ -1,118 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import os -from paddle.io import Dataset -import lmdb -import cv2 - -from .imaug import transform, create_operators - - -class LMDBDataSet(Dataset): - def __init__(self, config, mode, logger, seed=None): - super(LMDBDataSet, self).__init__() - - global_config = config['Global'] - dataset_config = config[mode]['dataset'] - loader_config = config[mode]['loader'] - batch_size = loader_config['batch_size_per_card'] - data_dir = dataset_config['data_dir'] - self.do_shuffle = loader_config['shuffle'] - - self.lmdb_sets = self.load_hierarchical_lmdb_dataset(data_dir) - logger.info("Initialize indexs of datasets:%s" % data_dir) - self.data_idx_order_list = self.dataset_traversal() - if self.do_shuffle: - np.random.shuffle(self.data_idx_order_list) - self.ops = create_operators(dataset_config['transforms'], global_config) - - ratio_list = dataset_config.get("ratio_list", [1.0]) - self.need_reset = True in [x < 1 for x in ratio_list] - - def load_hierarchical_lmdb_dataset(self, data_dir): - lmdb_sets = {} - dataset_idx = 0 - for dirpath, dirnames, filenames in os.walk(data_dir + '/'): - if not dirnames: - env = lmdb.open( - dirpath, - max_readers=32, - readonly=True, - lock=False, - readahead=False, - meminit=False) - txn = env.begin(write=False) - num_samples = int(txn.get('num-samples'.encode())) - lmdb_sets[dataset_idx] = {"dirpath":dirpath, "env":env, \ - "txn":txn, "num_samples":num_samples} - dataset_idx += 1 - return lmdb_sets - - def dataset_traversal(self): - lmdb_num = len(self.lmdb_sets) - total_sample_num = 0 - for lno in range(lmdb_num): - total_sample_num += self.lmdb_sets[lno]['num_samples'] - data_idx_order_list = np.zeros((total_sample_num, 2)) - beg_idx = 0 - for lno in range(lmdb_num): - tmp_sample_num = self.lmdb_sets[lno]['num_samples'] - end_idx = beg_idx + tmp_sample_num - data_idx_order_list[beg_idx:end_idx, 0] = lno - data_idx_order_list[beg_idx:end_idx, 1] \ - = list(range(tmp_sample_num)) - data_idx_order_list[beg_idx:end_idx, 1] += 1 - beg_idx = beg_idx + tmp_sample_num - return data_idx_order_list - - def get_img_data(self, value): - """get_img_data""" - if not value: - return None - imgdata = np.frombuffer(value, dtype='uint8') - if imgdata is None: - return None - imgori = cv2.imdecode(imgdata, 1) - if imgori is None: - return None - return imgori - - def get_lmdb_sample_info(self, txn, index): - label_key = 'label-%09d'.encode() % index - label = txn.get(label_key) - if label is None: - return None - label = label.decode('utf-8') - img_key = 'image-%09d'.encode() % index - imgbuf = txn.get(img_key) - return imgbuf, label - - def __getitem__(self, idx): - lmdb_idx, file_idx = self.data_idx_order_list[idx] - lmdb_idx = int(lmdb_idx) - file_idx = int(file_idx) - sample_info = self.get_lmdb_sample_info(self.lmdb_sets[lmdb_idx]['txn'], - file_idx) - if sample_info is None: - return self.__getitem__(np.random.randint(self.__len__())) - img, label = sample_info - data = {'image': img, 'label': label} - outs = transform(data, self.ops) - if outs is None: - return self.__getitem__(np.random.randint(self.__len__())) - return outs - - def __len__(self): - return self.data_idx_order_list.shape[0] diff --git a/backend/ppocr/data/pgnet_dataset.py b/backend/ppocr/data/pgnet_dataset.py deleted file mode 100644 index 6f80179c..00000000 --- a/backend/ppocr/data/pgnet_dataset.py +++ /dev/null @@ -1,106 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import os -from paddle.io import Dataset -from .imaug import transform, create_operators -import random - - -class PGDataSet(Dataset): - def __init__(self, config, mode, logger, seed=None): - super(PGDataSet, self).__init__() - - self.logger = logger - self.seed = seed - self.mode = mode - global_config = config['Global'] - dataset_config = config[mode]['dataset'] - loader_config = config[mode]['loader'] - - self.delimiter = dataset_config.get('delimiter', '\t') - label_file_list = dataset_config.pop('label_file_list') - data_source_num = len(label_file_list) - ratio_list = dataset_config.get("ratio_list", [1.0]) - if isinstance(ratio_list, (float, int)): - ratio_list = [float(ratio_list)] * int(data_source_num) - assert len( - ratio_list - ) == data_source_num, "The length of ratio_list should be the same as the file_list." - self.data_dir = dataset_config['data_dir'] - self.do_shuffle = loader_config['shuffle'] - - logger.info("Initialize indexs of datasets:%s" % label_file_list) - self.data_lines = self.get_image_info_list(label_file_list, ratio_list) - self.data_idx_order_list = list(range(len(self.data_lines))) - if mode.lower() == "train": - self.shuffle_data_random() - - self.ops = create_operators(dataset_config['transforms'], global_config) - - self.need_reset = True in [x < 1 for x in ratio_list] - - def shuffle_data_random(self): - if self.do_shuffle: - random.seed(self.seed) - random.shuffle(self.data_lines) - return - - def get_image_info_list(self, file_list, ratio_list): - if isinstance(file_list, str): - file_list = [file_list] - data_lines = [] - for idx, file in enumerate(file_list): - with open(file, "rb") as f: - lines = f.readlines() - if self.mode == "train" or ratio_list[idx] < 1.0: - random.seed(self.seed) - lines = random.sample(lines, - round(len(lines) * ratio_list[idx])) - data_lines.extend(lines) - return data_lines - - def __getitem__(self, idx): - file_idx = self.data_idx_order_list[idx] - data_line = self.data_lines[file_idx] - img_id = 0 - try: - data_line = data_line.decode('utf-8') - substr = data_line.strip("\n").split(self.delimiter) - file_name = substr[0] - label = substr[1] - img_path = os.path.join(self.data_dir, file_name) - if self.mode.lower() == 'eval': - try: - img_id = int(data_line.split(".")[0][7:]) - except: - img_id = 0 - data = {'img_path': img_path, 'label': label, 'img_id': img_id} - if not os.path.exists(img_path): - raise Exception("{} does not exist!".format(img_path)) - with open(data['img_path'], 'rb') as f: - img = f.read() - data['image'] = img - outs = transform(data, self.ops) - except Exception as e: - self.logger.error( - "When parsing line {}, error happened with msg: {}".format( - self.data_idx_order_list[idx], e)) - outs = None - if outs is None: - return self.__getitem__(np.random.randint(self.__len__())) - return outs - - def __len__(self): - return len(self.data_idx_order_list) diff --git a/backend/ppocr/data/pubtab_dataset.py b/backend/ppocr/data/pubtab_dataset.py deleted file mode 100644 index 671cda76..00000000 --- a/backend/ppocr/data/pubtab_dataset.py +++ /dev/null @@ -1,114 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import os -import random -from paddle.io import Dataset -import json - -from .imaug import transform, create_operators - - -class PubTabDataSet(Dataset): - def __init__(self, config, mode, logger, seed=None): - super(PubTabDataSet, self).__init__() - self.logger = logger - - global_config = config['Global'] - dataset_config = config[mode]['dataset'] - loader_config = config[mode]['loader'] - - label_file_path = dataset_config.pop('label_file_path') - - self.data_dir = dataset_config['data_dir'] - self.do_shuffle = loader_config['shuffle'] - self.do_hard_select = False - if 'hard_select' in loader_config: - self.do_hard_select = loader_config['hard_select'] - self.hard_prob = loader_config['hard_prob'] - if self.do_hard_select: - self.img_select_prob = self.load_hard_select_prob() - self.table_select_type = None - if 'table_select_type' in loader_config: - self.table_select_type = loader_config['table_select_type'] - self.table_select_prob = loader_config['table_select_prob'] - - self.seed = seed - logger.info("Initialize indexs of datasets:%s" % label_file_path) - with open(label_file_path, "rb") as f: - self.data_lines = f.readlines() - self.data_idx_order_list = list(range(len(self.data_lines))) - if mode.lower() == "train": - self.shuffle_data_random() - self.ops = create_operators(dataset_config['transforms'], global_config) - - ratio_list = dataset_config.get("ratio_list", [1.0]) - self.need_reset = True in [x < 1 for x in ratio_list] - - def shuffle_data_random(self): - if self.do_shuffle: - random.seed(self.seed) - random.shuffle(self.data_lines) - return - - def __getitem__(self, idx): - try: - data_line = self.data_lines[idx] - data_line = data_line.decode('utf-8').strip("\n") - info = json.loads(data_line) - file_name = info['filename'] - select_flag = True - if self.do_hard_select: - prob = self.img_select_prob[file_name] - if prob < random.uniform(0, 1): - select_flag = False - - if self.table_select_type: - structure = info['html']['structure']['tokens'].copy() - structure_str = ''.join(structure) - table_type = "simple" - if 'colspan' in structure_str or 'rowspan' in structure_str: - table_type = "complex" - if table_type == "complex": - if self.table_select_prob < random.uniform(0, 1): - select_flag = False - - if select_flag: - cells = info['html']['cells'].copy() - structure = info['html']['structure'].copy() - img_path = os.path.join(self.data_dir, file_name) - data = { - 'img_path': img_path, - 'cells': cells, - 'structure': structure - } - if not os.path.exists(img_path): - raise Exception("{} does not exist!".format(img_path)) - with open(data['img_path'], 'rb') as f: - img = f.read() - data['image'] = img - outs = transform(data, self.ops) - else: - outs = None - except Exception as e: - self.logger.error( - "When parsing line {}, error happened with msg: {}".format( - data_line, e)) - outs = None - if outs is None: - return self.__getitem__(np.random.randint(self.__len__())) - return outs - - def __len__(self): - return len(self.data_idx_order_list) diff --git a/backend/ppocr/data/simple_dataset.py b/backend/ppocr/data/simple_dataset.py deleted file mode 100644 index b5da9b88..00000000 --- a/backend/ppocr/data/simple_dataset.py +++ /dev/null @@ -1,151 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import os -import json -import random -import traceback -from paddle.io import Dataset -from .imaug import transform, create_operators - - -class SimpleDataSet(Dataset): - def __init__(self, config, mode, logger, seed=None): - super(SimpleDataSet, self).__init__() - self.logger = logger - self.mode = mode.lower() - - global_config = config['Global'] - dataset_config = config[mode]['dataset'] - loader_config = config[mode]['loader'] - - self.delimiter = dataset_config.get('delimiter', '\t') - label_file_list = dataset_config.pop('label_file_list') - data_source_num = len(label_file_list) - ratio_list = dataset_config.get("ratio_list", [1.0]) - if isinstance(ratio_list, (float, int)): - ratio_list = [float(ratio_list)] * int(data_source_num) - - assert len( - ratio_list - ) == data_source_num, "The length of ratio_list should be the same as the file_list." - self.data_dir = dataset_config['data_dir'] - self.do_shuffle = loader_config['shuffle'] - self.seed = seed - logger.info("Initialize indexs of datasets:%s" % label_file_list) - self.data_lines = self.get_image_info_list(label_file_list, ratio_list) - self.data_idx_order_list = list(range(len(self.data_lines))) - if self.mode == "train" and self.do_shuffle: - self.shuffle_data_random() - self.ops = create_operators(dataset_config['transforms'], global_config) - self.ext_op_transform_idx = dataset_config.get("ext_op_transform_idx", - 2) - self.need_reset = True in [x < 1 for x in ratio_list] - - def get_image_info_list(self, file_list, ratio_list): - if isinstance(file_list, str): - file_list = [file_list] - data_lines = [] - for idx, file in enumerate(file_list): - with open(file, "rb") as f: - lines = f.readlines() - if self.mode == "train" or ratio_list[idx] < 1.0: - random.seed(self.seed) - lines = random.sample(lines, - round(len(lines) * ratio_list[idx])) - data_lines.extend(lines) - return data_lines - - def shuffle_data_random(self): - random.seed(self.seed) - random.shuffle(self.data_lines) - return - - def _try_parse_filename_list(self, file_name): - # multiple images -> one gt label - if len(file_name) > 0 and file_name[0] == "[": - try: - info = json.loads(file_name) - file_name = random.choice(info) - except: - pass - return file_name - - def get_ext_data(self): - ext_data_num = 0 - for op in self.ops: - if hasattr(op, 'ext_data_num'): - ext_data_num = getattr(op, 'ext_data_num') - break - load_data_ops = self.ops[:self.ext_op_transform_idx] - ext_data = [] - - while len(ext_data) < ext_data_num: - file_idx = self.data_idx_order_list[np.random.randint(self.__len__( - ))] - data_line = self.data_lines[file_idx] - data_line = data_line.decode('utf-8') - substr = data_line.strip("\n").split(self.delimiter) - file_name = substr[0] - file_name = self._try_parse_filename_list(file_name) - label = substr[1] - img_path = os.path.join(self.data_dir, file_name) - data = {'img_path': img_path, 'label': label} - if not os.path.exists(img_path): - continue - with open(data['img_path'], 'rb') as f: - img = f.read() - data['image'] = img - data = transform(data, load_data_ops) - - if data is None: - continue - if 'polys' in data.keys(): - if data['polys'].shape[1] != 4: - continue - ext_data.append(data) - return ext_data - - def __getitem__(self, idx): - file_idx = self.data_idx_order_list[idx] - data_line = self.data_lines[file_idx] - try: - data_line = data_line.decode('utf-8') - substr = data_line.strip("\n").split(self.delimiter) - file_name = substr[0] - file_name = self._try_parse_filename_list(file_name) - label = substr[1] - img_path = os.path.join(self.data_dir, file_name) - data = {'img_path': img_path, 'label': label} - if not os.path.exists(img_path): - raise Exception("{} does not exist!".format(img_path)) - with open(data['img_path'], 'rb') as f: - img = f.read() - data['image'] = img - data['ext_data'] = self.get_ext_data() - outs = transform(data, self.ops) - except: - self.logger.error( - "When parsing line {}, error happened with msg: {}".format( - data_line, traceback.format_exc())) - outs = None - if outs is None: - # during evaluation, we should fix the idx to get same results for many times of evaluation. - rnd_idx = np.random.randint(self.__len__( - )) if self.mode == "train" else (idx + 1) % self.__len__() - return self.__getitem__(rnd_idx) - return outs - - def __len__(self): - return len(self.data_idx_order_list) diff --git a/backend/ppocr/losses/__init__.py b/backend/ppocr/losses/__init__.py deleted file mode 100755 index de8419b7..00000000 --- a/backend/ppocr/losses/__init__.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import copy -import paddle -import paddle.nn as nn - -# basic_loss -from .basic_loss import LossFromOutput - -# det loss -from .det_db_loss import DBLoss -from .det_east_loss import EASTLoss -from .det_sast_loss import SASTLoss -from .det_pse_loss import PSELoss -from .det_fce_loss import FCELoss - -# rec loss -from .rec_ctc_loss import CTCLoss -from .rec_att_loss import AttentionLoss -from .rec_srn_loss import SRNLoss -from .rec_nrtr_loss import NRTRLoss -from .rec_sar_loss import SARLoss -from .rec_aster_loss import AsterLoss -from .rec_pren_loss import PRENLoss -from .rec_multi_loss import MultiLoss - -# cls loss -from .cls_loss import ClsLoss - -# e2e loss -from .e2e_pg_loss import PGLoss -from .kie_sdmgr_loss import SDMGRLoss - -# basic loss function -from .basic_loss import DistanceLoss - -# combined loss function -from .combined_loss import CombinedLoss - -# table loss -from .table_att_loss import TableAttentionLoss - -# vqa token loss -from .vqa_token_layoutlm_loss import VQASerTokenLayoutLMLoss - - -def build_loss(config): - support_dict = [ - 'DBLoss', 'PSELoss', 'EASTLoss', 'SASTLoss', 'FCELoss', 'CTCLoss', - 'ClsLoss', 'AttentionLoss', 'SRNLoss', 'PGLoss', 'CombinedLoss', - 'NRTRLoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss', - 'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss', 'MultiLoss' - ] - config = copy.deepcopy(config) - module_name = config.pop('name') - assert module_name in support_dict, Exception('loss only support {}'.format( - support_dict)) - module_class = eval(module_name)(**config) - return module_class diff --git a/backend/ppocr/losses/ace_loss.py b/backend/ppocr/losses/ace_loss.py deleted file mode 100644 index 915b99e6..00000000 --- a/backend/ppocr/losses/ace_loss.py +++ /dev/null @@ -1,52 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This code is refer from: https://github.com/viig99/LS-ACELoss - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -import paddle.nn as nn - - -class ACELoss(nn.Layer): - def __init__(self, **kwargs): - super().__init__() - self.loss_func = nn.CrossEntropyLoss( - weight=None, - ignore_index=0, - reduction='none', - soft_label=True, - axis=-1) - - def __call__(self, predicts, batch): - if isinstance(predicts, (list, tuple)): - predicts = predicts[-1] - - B, N = predicts.shape[:2] - div = paddle.to_tensor([N]).astype('float32') - - predicts = nn.functional.softmax(predicts, axis=-1) - aggregation_preds = paddle.sum(predicts, axis=1) - aggregation_preds = paddle.divide(aggregation_preds, div) - - length = batch[2].astype("float32") - batch = batch[3].astype("float32") - batch[:, 0] = paddle.subtract(div, length) - batch = paddle.divide(batch, div) - - loss = self.loss_func(aggregation_preds, batch) - return {"loss_ace": loss} diff --git a/backend/ppocr/losses/basic_loss.py b/backend/ppocr/losses/basic_loss.py deleted file mode 100644 index 2df96ea2..00000000 --- a/backend/ppocr/losses/basic_loss.py +++ /dev/null @@ -1,155 +0,0 @@ -#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import paddle -import paddle.nn as nn -import paddle.nn.functional as F - -from paddle.nn import L1Loss -from paddle.nn import MSELoss as L2Loss -from paddle.nn import SmoothL1Loss - - -class CELoss(nn.Layer): - def __init__(self, epsilon=None): - super().__init__() - if epsilon is not None and (epsilon <= 0 or epsilon >= 1): - epsilon = None - self.epsilon = epsilon - - def _labelsmoothing(self, target, class_num): - if target.shape[-1] != class_num: - one_hot_target = F.one_hot(target, class_num) - else: - one_hot_target = target - soft_target = F.label_smooth(one_hot_target, epsilon=self.epsilon) - soft_target = paddle.reshape(soft_target, shape=[-1, class_num]) - return soft_target - - def forward(self, x, label): - loss_dict = {} - if self.epsilon is not None: - class_num = x.shape[-1] - label = self._labelsmoothing(label, class_num) - x = -F.log_softmax(x, axis=-1) - loss = paddle.sum(x * label, axis=-1) - else: - if label.shape[-1] == x.shape[-1]: - label = F.softmax(label, axis=-1) - soft_label = True - else: - soft_label = False - loss = F.cross_entropy(x, label=label, soft_label=soft_label) - return loss - - -class KLJSLoss(object): - def __init__(self, mode='kl'): - assert mode in ['kl', 'js', 'KL', 'JS' - ], "mode can only be one of ['kl', 'js', 'KL', 'JS']" - self.mode = mode - - def __call__(self, p1, p2, reduction="mean"): - - loss = paddle.multiply(p2, paddle.log((p2 + 1e-5) / (p1 + 1e-5) + 1e-5)) - - if self.mode.lower() == "js": - loss += paddle.multiply( - p1, paddle.log((p1 + 1e-5) / (p2 + 1e-5) + 1e-5)) - loss *= 0.5 - if reduction == "mean": - loss = paddle.mean(loss, axis=[1, 2]) - elif reduction == "none" or reduction is None: - return loss - else: - loss = paddle.sum(loss, axis=[1, 2]) - - return loss - - -class DMLLoss(nn.Layer): - """ - DMLLoss - """ - - def __init__(self, act=None, use_log=False): - super().__init__() - if act is not None: - assert act in ["softmax", "sigmoid"] - if act == "softmax": - self.act = nn.Softmax(axis=-1) - elif act == "sigmoid": - self.act = nn.Sigmoid() - else: - self.act = None - - self.use_log = use_log - self.jskl_loss = KLJSLoss(mode="js") - - def _kldiv(self, x, target): - eps = 1.0e-10 - loss = target * (paddle.log(target + eps) - x) - # batch mean loss - loss = paddle.sum(loss) / loss.shape[0] - return loss - - def forward(self, out1, out2): - if self.act is not None: - out1 = self.act(out1) + 1e-10 - out2 = self.act(out2) + 1e-10 - if self.use_log: - # for recognition distillation, log is needed for feature map - log_out1 = paddle.log(out1) - log_out2 = paddle.log(out2) - loss = ( - self._kldiv(log_out1, out2) + self._kldiv(log_out2, out1)) / 2.0 - else: - # for detection distillation log is not needed - loss = self.jskl_loss(out1, out2) - return loss - - -class DistanceLoss(nn.Layer): - """ - DistanceLoss: - mode: loss mode - """ - - def __init__(self, mode="l2", **kargs): - super().__init__() - assert mode in ["l1", "l2", "smooth_l1"] - if mode == "l1": - self.loss_func = nn.L1Loss(**kargs) - elif mode == "l2": - self.loss_func = nn.MSELoss(**kargs) - elif mode == "smooth_l1": - self.loss_func = nn.SmoothL1Loss(**kargs) - - def forward(self, x, y): - return self.loss_func(x, y) - - -class LossFromOutput(nn.Layer): - def __init__(self, key='loss', reduction='none'): - super().__init__() - self.key = key - self.reduction = reduction - - def forward(self, predicts, batch): - loss = predicts[self.key] - if self.reduction == 'mean': - loss = paddle.mean(loss) - elif self.reduction == 'sum': - loss = paddle.sum(loss) - return {'loss': loss} diff --git a/backend/ppocr/losses/center_loss.py b/backend/ppocr/losses/center_loss.py deleted file mode 100644 index f62b8af3..00000000 --- a/backend/ppocr/losses/center_loss.py +++ /dev/null @@ -1,88 +0,0 @@ -#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -# This code is refer from: https://github.com/KaiyangZhou/pytorch-center-loss - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import pickle - -import paddle -import paddle.nn as nn -import paddle.nn.functional as F - - -class CenterLoss(nn.Layer): - """ - Reference: Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016. - """ - - def __init__(self, num_classes=6625, feat_dim=96, center_file_path=None): - super().__init__() - self.num_classes = num_classes - self.feat_dim = feat_dim - self.centers = paddle.randn( - shape=[self.num_classes, self.feat_dim]).astype("float64") - - if center_file_path is not None: - assert os.path.exists( - center_file_path - ), f"center path({center_file_path}) must exist when it is not None." - with open(center_file_path, 'rb') as f: - char_dict = pickle.load(f) - for key in char_dict.keys(): - self.centers[key] = paddle.to_tensor(char_dict[key]) - - def __call__(self, predicts, batch): - assert isinstance(predicts, (list, tuple)) - features, predicts = predicts - - feats_reshape = paddle.reshape( - features, [-1, features.shape[-1]]).astype("float64") - label = paddle.argmax(predicts, axis=2) - label = paddle.reshape(label, [label.shape[0] * label.shape[1]]) - - batch_size = feats_reshape.shape[0] - - #calc l2 distance between feats and centers - square_feat = paddle.sum(paddle.square(feats_reshape), - axis=1, - keepdim=True) - square_feat = paddle.expand(square_feat, [batch_size, self.num_classes]) - - square_center = paddle.sum(paddle.square(self.centers), - axis=1, - keepdim=True) - square_center = paddle.expand( - square_center, [self.num_classes, batch_size]).astype("float64") - square_center = paddle.transpose(square_center, [1, 0]) - - distmat = paddle.add(square_feat, square_center) - feat_dot_center = paddle.matmul(feats_reshape, - paddle.transpose(self.centers, [1, 0])) - distmat = distmat - 2.0 * feat_dot_center - - #generate the mask - classes = paddle.arange(self.num_classes).astype("int64") - label = paddle.expand( - paddle.unsqueeze(label, 1), (batch_size, self.num_classes)) - mask = paddle.equal( - paddle.expand(classes, [batch_size, self.num_classes]), - label).astype("float64") - dist = paddle.multiply(distmat, mask) - - loss = paddle.sum(paddle.clip(dist, min=1e-12, max=1e+12)) / batch_size - return {'loss_center': loss} diff --git a/backend/ppocr/losses/cls_loss.py b/backend/ppocr/losses/cls_loss.py deleted file mode 100755 index abc5e5b7..00000000 --- a/backend/ppocr/losses/cls_loss.py +++ /dev/null @@ -1,30 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn - - -class ClsLoss(nn.Layer): - def __init__(self, **kwargs): - super(ClsLoss, self).__init__() - self.loss_func = nn.CrossEntropyLoss(reduction='mean') - - def forward(self, predicts, batch): - label = batch[1].astype("int64") - loss = self.loss_func(input=predicts, label=label) - return {'loss': loss} diff --git a/backend/ppocr/losses/combined_loss.py b/backend/ppocr/losses/combined_loss.py deleted file mode 100644 index f4cdee8f..00000000 --- a/backend/ppocr/losses/combined_loss.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle -import paddle.nn as nn - -from .rec_ctc_loss import CTCLoss -from .center_loss import CenterLoss -from .ace_loss import ACELoss -from .rec_sar_loss import SARLoss - -from .distillation_loss import DistillationCTCLoss -from .distillation_loss import DistillationSARLoss -from .distillation_loss import DistillationDMLLoss -from .distillation_loss import DistillationDistanceLoss, DistillationDBLoss, DistillationDilaDBLoss - - -class CombinedLoss(nn.Layer): - """ - CombinedLoss: - a combionation of loss function - """ - - def __init__(self, loss_config_list=None): - super().__init__() - self.loss_func = [] - self.loss_weight = [] - assert isinstance(loss_config_list, list), ( - 'operator config should be a list') - for config in loss_config_list: - assert isinstance(config, - dict) and len(config) == 1, "yaml format error" - name = list(config)[0] - param = config[name] - assert "weight" in param, "weight must be in param, but param just contains {}".format( - param.keys()) - self.loss_weight.append(param.pop("weight")) - self.loss_func.append(eval(name)(**param)) - - def forward(self, input, batch, **kargs): - loss_dict = {} - loss_all = 0. - for idx, loss_func in enumerate(self.loss_func): - loss = loss_func(input, batch, **kargs) - if isinstance(loss, paddle.Tensor): - loss = {"loss_{}_{}".format(str(loss), idx): loss} - - weight = self.loss_weight[idx] - - loss = {key: loss[key] * weight for key in loss} - - if "loss" in loss: - loss_all += loss["loss"] - else: - loss_all += paddle.add_n(list(loss.values())) - loss_dict.update(loss) - loss_dict["loss"] = loss_all - return loss_dict diff --git a/backend/ppocr/losses/det_basic_loss.py b/backend/ppocr/losses/det_basic_loss.py deleted file mode 100644 index 61ea579b..00000000 --- a/backend/ppocr/losses/det_basic_loss.py +++ /dev/null @@ -1,153 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/basic_loss.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -import paddle -from paddle import nn -import paddle.nn.functional as F - - -class BalanceLoss(nn.Layer): - def __init__(self, - balance_loss=True, - main_loss_type='DiceLoss', - negative_ratio=3, - return_origin=False, - eps=1e-6, - **kwargs): - """ - The BalanceLoss for Differentiable Binarization text detection - args: - balance_loss (bool): whether balance loss or not, default is True - main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss', - 'Euclidean','BCELoss', 'MaskL1Loss'], default is 'DiceLoss'. - negative_ratio (int|float): float, default is 3. - return_origin (bool): whether return unbalanced loss or not, default is False. - eps (float): default is 1e-6. - """ - super(BalanceLoss, self).__init__() - self.balance_loss = balance_loss - self.main_loss_type = main_loss_type - self.negative_ratio = negative_ratio - self.return_origin = return_origin - self.eps = eps - - if self.main_loss_type == "CrossEntropy": - self.loss = nn.CrossEntropyLoss() - elif self.main_loss_type == "Euclidean": - self.loss = nn.MSELoss() - elif self.main_loss_type == "DiceLoss": - self.loss = DiceLoss(self.eps) - elif self.main_loss_type == "BCELoss": - self.loss = BCELoss(reduction='none') - elif self.main_loss_type == "MaskL1Loss": - self.loss = MaskL1Loss(self.eps) - else: - loss_type = [ - 'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss' - ] - raise Exception( - "main_loss_type in BalanceLoss() can only be one of {}".format( - loss_type)) - - def forward(self, pred, gt, mask=None): - """ - The BalanceLoss for Differentiable Binarization text detection - args: - pred (variable): predicted feature maps. - gt (variable): ground truth feature maps. - mask (variable): masked maps. - return: (variable) balanced loss - """ - positive = gt * mask - negative = (1 - gt) * mask - - positive_count = int(positive.sum()) - negative_count = int( - min(negative.sum(), positive_count * self.negative_ratio)) - loss = self.loss(pred, gt, mask=mask) - - if not self.balance_loss: - return loss - - positive_loss = positive * loss - negative_loss = negative * loss - negative_loss = paddle.reshape(negative_loss, shape=[-1]) - if negative_count > 0: - sort_loss = negative_loss.sort(descending=True) - negative_loss = sort_loss[:negative_count] - # negative_loss, _ = paddle.topk(negative_loss, k=negative_count_int) - balance_loss = (positive_loss.sum() + negative_loss.sum()) / ( - positive_count + negative_count + self.eps) - else: - balance_loss = positive_loss.sum() / (positive_count + self.eps) - if self.return_origin: - return balance_loss, loss - - return balance_loss - - -class DiceLoss(nn.Layer): - def __init__(self, eps=1e-6): - super(DiceLoss, self).__init__() - self.eps = eps - - def forward(self, pred, gt, mask, weights=None): - """ - DiceLoss function. - """ - - assert pred.shape == gt.shape - assert pred.shape == mask.shape - if weights is not None: - assert weights.shape == mask.shape - mask = weights * mask - intersection = paddle.sum(pred * gt * mask) - - union = paddle.sum(pred * mask) + paddle.sum(gt * mask) + self.eps - loss = 1 - 2.0 * intersection / union - assert loss <= 1 - return loss - - -class MaskL1Loss(nn.Layer): - def __init__(self, eps=1e-6): - super(MaskL1Loss, self).__init__() - self.eps = eps - - def forward(self, pred, gt, mask): - """ - Mask L1 Loss - """ - loss = (paddle.abs(pred - gt) * mask).sum() / (mask.sum() + self.eps) - loss = paddle.mean(loss) - return loss - - -class BCELoss(nn.Layer): - def __init__(self, reduction='mean'): - super(BCELoss, self).__init__() - self.reduction = reduction - - def forward(self, input, label, mask=None, weight=None, name=None): - loss = F.binary_cross_entropy(input, label, reduction=self.reduction) - return loss diff --git a/backend/ppocr/losses/det_db_loss.py b/backend/ppocr/losses/det_db_loss.py deleted file mode 100755 index 708ffbdb..00000000 --- a/backend/ppocr/losses/det_db_loss.py +++ /dev/null @@ -1,76 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/DB_loss.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn - -from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss - - -class DBLoss(nn.Layer): - """ - Differentiable Binarization (DB) Loss Function - args: - param (dict): the super paramter for DB Loss - """ - - def __init__(self, - balance_loss=True, - main_loss_type='DiceLoss', - alpha=5, - beta=10, - ohem_ratio=3, - eps=1e-6, - **kwargs): - super(DBLoss, self).__init__() - self.alpha = alpha - self.beta = beta - self.dice_loss = DiceLoss(eps=eps) - self.l1_loss = MaskL1Loss(eps=eps) - self.bce_loss = BalanceLoss( - balance_loss=balance_loss, - main_loss_type=main_loss_type, - negative_ratio=ohem_ratio) - - def forward(self, predicts, labels): - predict_maps = predicts['maps'] - label_threshold_map, label_threshold_mask, label_shrink_map, label_shrink_mask = labels[ - 1:] - shrink_maps = predict_maps[:, 0, :, :] - threshold_maps = predict_maps[:, 1, :, :] - binary_maps = predict_maps[:, 2, :, :] - - loss_shrink_maps = self.bce_loss(shrink_maps, label_shrink_map, - label_shrink_mask) - loss_threshold_maps = self.l1_loss(threshold_maps, label_threshold_map, - label_threshold_mask) - loss_binary_maps = self.dice_loss(binary_maps, label_shrink_map, - label_shrink_mask) - loss_shrink_maps = self.alpha * loss_shrink_maps - loss_threshold_maps = self.beta * loss_threshold_maps - - loss_all = loss_shrink_maps + loss_threshold_maps \ - + loss_binary_maps - losses = {'loss': loss_all, \ - "loss_shrink_maps": loss_shrink_maps, \ - "loss_threshold_maps": loss_threshold_maps, \ - "loss_binary_maps": loss_binary_maps} - return losses diff --git a/backend/ppocr/losses/det_east_loss.py b/backend/ppocr/losses/det_east_loss.py deleted file mode 100644 index bcf5372b..00000000 --- a/backend/ppocr/losses/det_east_loss.py +++ /dev/null @@ -1,63 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -from .det_basic_loss import DiceLoss - - -class EASTLoss(nn.Layer): - """ - """ - - def __init__(self, - eps=1e-6, - **kwargs): - super(EASTLoss, self).__init__() - self.dice_loss = DiceLoss(eps=eps) - - def forward(self, predicts, labels): - l_score, l_geo, l_mask = labels[1:] - f_score = predicts['f_score'] - f_geo = predicts['f_geo'] - - dice_loss = self.dice_loss(f_score, l_score, l_mask) - - #smoooth_l1_loss - channels = 8 - l_geo_split = paddle.split( - l_geo, num_or_sections=channels + 1, axis=1) - f_geo_split = paddle.split(f_geo, num_or_sections=channels, axis=1) - smooth_l1 = 0 - for i in range(0, channels): - geo_diff = l_geo_split[i] - f_geo_split[i] - abs_geo_diff = paddle.abs(geo_diff) - smooth_l1_sign = paddle.less_than(abs_geo_diff, l_score) - smooth_l1_sign = paddle.cast(smooth_l1_sign, dtype='float32') - in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \ - (abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign) - out_loss = l_geo_split[-1] / channels * in_loss * l_score - smooth_l1 += out_loss - smooth_l1_loss = paddle.mean(smooth_l1 * l_score) - - dice_loss = dice_loss * 0.01 - total_loss = dice_loss + smooth_l1_loss - losses = {"loss":total_loss, \ - "dice_loss":dice_loss,\ - "smooth_l1_loss":smooth_l1_loss} - return losses diff --git a/backend/ppocr/losses/det_fce_loss.py b/backend/ppocr/losses/det_fce_loss.py deleted file mode 100644 index d7dfb5aa..00000000 --- a/backend/ppocr/losses/det_fce_loss.py +++ /dev/null @@ -1,227 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/losses/fce_loss.py -""" - -import numpy as np -from paddle import nn -import paddle -import paddle.nn.functional as F -from functools import partial - - -def multi_apply(func, *args, **kwargs): - pfunc = partial(func, **kwargs) if kwargs else func - map_results = map(pfunc, *args) - return tuple(map(list, zip(*map_results))) - - -class FCELoss(nn.Layer): - """The class for implementing FCENet loss - FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped - Text Detection - - [https://arxiv.org/abs/2104.10442] - - Args: - fourier_degree (int) : The maximum Fourier transform degree k. - num_sample (int) : The sampling points number of regression - loss. If it is too small, fcenet tends to be overfitting. - ohem_ratio (float): the negative/positive ratio in OHEM. - """ - - def __init__(self, fourier_degree, num_sample, ohem_ratio=3.): - super().__init__() - self.fourier_degree = fourier_degree - self.num_sample = num_sample - self.ohem_ratio = ohem_ratio - - def forward(self, preds, labels): - assert isinstance(preds, dict) - preds = preds['levels'] - - p3_maps, p4_maps, p5_maps = labels[1:] - assert p3_maps[0].shape[0] == 4 * self.fourier_degree + 5,\ - 'fourier degree not equal in FCEhead and FCEtarget' - - # to tensor - gts = [p3_maps, p4_maps, p5_maps] - for idx, maps in enumerate(gts): - gts[idx] = paddle.to_tensor(np.stack(maps)) - - losses = multi_apply(self.forward_single, preds, gts) - - loss_tr = paddle.to_tensor(0.).astype('float32') - loss_tcl = paddle.to_tensor(0.).astype('float32') - loss_reg_x = paddle.to_tensor(0.).astype('float32') - loss_reg_y = paddle.to_tensor(0.).astype('float32') - loss_all = paddle.to_tensor(0.).astype('float32') - - for idx, loss in enumerate(losses): - loss_all += sum(loss) - if idx == 0: - loss_tr += sum(loss) - elif idx == 1: - loss_tcl += sum(loss) - elif idx == 2: - loss_reg_x += sum(loss) - else: - loss_reg_y += sum(loss) - - results = dict( - loss=loss_all, - loss_text=loss_tr, - loss_center=loss_tcl, - loss_reg_x=loss_reg_x, - loss_reg_y=loss_reg_y, ) - return results - - def forward_single(self, pred, gt): - cls_pred = paddle.transpose(pred[0], (0, 2, 3, 1)) - reg_pred = paddle.transpose(pred[1], (0, 2, 3, 1)) - gt = paddle.transpose(gt, (0, 2, 3, 1)) - - k = 2 * self.fourier_degree + 1 - tr_pred = paddle.reshape(cls_pred[:, :, :, :2], (-1, 2)) - tcl_pred = paddle.reshape(cls_pred[:, :, :, 2:], (-1, 2)) - x_pred = paddle.reshape(reg_pred[:, :, :, 0:k], (-1, k)) - y_pred = paddle.reshape(reg_pred[:, :, :, k:2 * k], (-1, k)) - - tr_mask = gt[:, :, :, :1].reshape([-1]) - tcl_mask = gt[:, :, :, 1:2].reshape([-1]) - train_mask = gt[:, :, :, 2:3].reshape([-1]) - x_map = paddle.reshape(gt[:, :, :, 3:3 + k], (-1, k)) - y_map = paddle.reshape(gt[:, :, :, 3 + k:], (-1, k)) - - tr_train_mask = (train_mask * tr_mask).astype('bool') - tr_train_mask2 = paddle.concat( - [tr_train_mask.unsqueeze(1), tr_train_mask.unsqueeze(1)], axis=1) - # tr loss - loss_tr = self.ohem(tr_pred, tr_mask, train_mask) - # tcl loss - loss_tcl = paddle.to_tensor(0.).astype('float32') - tr_neg_mask = tr_train_mask.logical_not() - tr_neg_mask2 = paddle.concat( - [tr_neg_mask.unsqueeze(1), tr_neg_mask.unsqueeze(1)], axis=1) - if tr_train_mask.sum().item() > 0: - loss_tcl_pos = F.cross_entropy( - tcl_pred.masked_select(tr_train_mask2).reshape([-1, 2]), - tcl_mask.masked_select(tr_train_mask).astype('int64')) - loss_tcl_neg = F.cross_entropy( - tcl_pred.masked_select(tr_neg_mask2).reshape([-1, 2]), - tcl_mask.masked_select(tr_neg_mask).astype('int64')) - loss_tcl = loss_tcl_pos + 0.5 * loss_tcl_neg - - # regression loss - loss_reg_x = paddle.to_tensor(0.).astype('float32') - loss_reg_y = paddle.to_tensor(0.).astype('float32') - if tr_train_mask.sum().item() > 0: - weight = (tr_mask.masked_select(tr_train_mask.astype('bool')) - .astype('float32') + tcl_mask.masked_select( - tr_train_mask.astype('bool')).astype('float32')) / 2 - weight = weight.reshape([-1, 1]) - - ft_x, ft_y = self.fourier2poly(x_map, y_map) - ft_x_pre, ft_y_pre = self.fourier2poly(x_pred, y_pred) - - dim = ft_x.shape[1] - - tr_train_mask3 = paddle.concat( - [tr_train_mask.unsqueeze(1) for i in range(dim)], axis=1) - - loss_reg_x = paddle.mean(weight * F.smooth_l1_loss( - ft_x_pre.masked_select(tr_train_mask3).reshape([-1, dim]), - ft_x.masked_select(tr_train_mask3).reshape([-1, dim]), - reduction='none')) - loss_reg_y = paddle.mean(weight * F.smooth_l1_loss( - ft_y_pre.masked_select(tr_train_mask3).reshape([-1, dim]), - ft_y.masked_select(tr_train_mask3).reshape([-1, dim]), - reduction='none')) - - return loss_tr, loss_tcl, loss_reg_x, loss_reg_y - - def ohem(self, predict, target, train_mask): - - pos = (target * train_mask).astype('bool') - neg = ((1 - target) * train_mask).astype('bool') - - pos2 = paddle.concat([pos.unsqueeze(1), pos.unsqueeze(1)], axis=1) - neg2 = paddle.concat([neg.unsqueeze(1), neg.unsqueeze(1)], axis=1) - - n_pos = pos.astype('float32').sum() - - if n_pos.item() > 0: - loss_pos = F.cross_entropy( - predict.masked_select(pos2).reshape([-1, 2]), - target.masked_select(pos).astype('int64'), - reduction='sum') - loss_neg = F.cross_entropy( - predict.masked_select(neg2).reshape([-1, 2]), - target.masked_select(neg).astype('int64'), - reduction='none') - n_neg = min( - int(neg.astype('float32').sum().item()), - int(self.ohem_ratio * n_pos.astype('float32'))) - else: - loss_pos = paddle.to_tensor(0.) - loss_neg = F.cross_entropy( - predict.masked_select(neg2).reshape([-1, 2]), - target.masked_select(neg).astype('int64'), - reduction='none') - n_neg = 100 - if len(loss_neg) > n_neg: - loss_neg, _ = paddle.topk(loss_neg, n_neg) - - return (loss_pos + loss_neg.sum()) / (n_pos + n_neg).astype('float32') - - def fourier2poly(self, real_maps, imag_maps): - """Transform Fourier coefficient maps to polygon maps. - - Args: - real_maps (tensor): A map composed of the real parts of the - Fourier coefficients, whose shape is (-1, 2k+1) - imag_maps (tensor):A map composed of the imag parts of the - Fourier coefficients, whose shape is (-1, 2k+1) - - Returns - x_maps (tensor): A map composed of the x value of the polygon - represented by n sample points (xn, yn), whose shape is (-1, n) - y_maps (tensor): A map composed of the y value of the polygon - represented by n sample points (xn, yn), whose shape is (-1, n) - """ - - k_vect = paddle.arange( - -self.fourier_degree, self.fourier_degree + 1, - dtype='float32').reshape([-1, 1]) - i_vect = paddle.arange( - 0, self.num_sample, dtype='float32').reshape([1, -1]) - - transform_matrix = 2 * np.pi / self.num_sample * paddle.matmul(k_vect, - i_vect) - - x1 = paddle.einsum('ak, kn-> an', real_maps, - paddle.cos(transform_matrix)) - x2 = paddle.einsum('ak, kn-> an', imag_maps, - paddle.sin(transform_matrix)) - y1 = paddle.einsum('ak, kn-> an', real_maps, - paddle.sin(transform_matrix)) - y2 = paddle.einsum('ak, kn-> an', imag_maps, - paddle.cos(transform_matrix)) - - x_maps = x1 - x2 - y_maps = y1 + y2 - - return x_maps, y_maps diff --git a/backend/ppocr/losses/det_pse_loss.py b/backend/ppocr/losses/det_pse_loss.py deleted file mode 100644 index 6b31343e..00000000 --- a/backend/ppocr/losses/det_pse_loss.py +++ /dev/null @@ -1,149 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py -""" - -import paddle -from paddle import nn -from paddle.nn import functional as F -import numpy as np -from ppocr.utils.iou import iou - - -class PSELoss(nn.Layer): - def __init__(self, - alpha, - ohem_ratio=3, - kernel_sample_mask='pred', - reduction='sum', - eps=1e-6, - **kwargs): - """Implement PSE Loss. - """ - super(PSELoss, self).__init__() - assert reduction in ['sum', 'mean', 'none'] - self.alpha = alpha - self.ohem_ratio = ohem_ratio - self.kernel_sample_mask = kernel_sample_mask - self.reduction = reduction - self.eps = eps - - def forward(self, outputs, labels): - predicts = outputs['maps'] - predicts = F.interpolate(predicts, scale_factor=4) - - texts = predicts[:, 0, :, :] - kernels = predicts[:, 1:, :, :] - gt_texts, gt_kernels, training_masks = labels[1:] - - # text loss - selected_masks = self.ohem_batch(texts, gt_texts, training_masks) - - loss_text = self.dice_loss(texts, gt_texts, selected_masks) - iou_text = iou((texts > 0).astype('int64'), - gt_texts, - training_masks, - reduce=False) - losses = dict(loss_text=loss_text, iou_text=iou_text) - - # kernel loss - loss_kernels = [] - if self.kernel_sample_mask == 'gt': - selected_masks = gt_texts * training_masks - elif self.kernel_sample_mask == 'pred': - selected_masks = ( - F.sigmoid(texts) > 0.5).astype('float32') * training_masks - - for i in range(kernels.shape[1]): - kernel_i = kernels[:, i, :, :] - gt_kernel_i = gt_kernels[:, i, :, :] - loss_kernel_i = self.dice_loss(kernel_i, gt_kernel_i, - selected_masks) - loss_kernels.append(loss_kernel_i) - loss_kernels = paddle.mean(paddle.stack(loss_kernels, axis=1), axis=1) - iou_kernel = iou((kernels[:, -1, :, :] > 0).astype('int64'), - gt_kernels[:, -1, :, :], - training_masks * gt_texts, - reduce=False) - losses.update(dict(loss_kernels=loss_kernels, iou_kernel=iou_kernel)) - loss = self.alpha * loss_text + (1 - self.alpha) * loss_kernels - losses['loss'] = loss - if self.reduction == 'sum': - losses = {x: paddle.sum(v) for x, v in losses.items()} - elif self.reduction == 'mean': - losses = {x: paddle.mean(v) for x, v in losses.items()} - return losses - - def dice_loss(self, input, target, mask): - input = F.sigmoid(input) - - input = input.reshape([input.shape[0], -1]) - target = target.reshape([target.shape[0], -1]) - mask = mask.reshape([mask.shape[0], -1]) - - input = input * mask - target = target * mask - - a = paddle.sum(input * target, 1) - b = paddle.sum(input * input, 1) + self.eps - c = paddle.sum(target * target, 1) + self.eps - d = (2 * a) / (b + c) - return 1 - d - - def ohem_single(self, score, gt_text, training_mask, ohem_ratio=3): - pos_num = int(paddle.sum((gt_text > 0.5).astype('float32'))) - int( - paddle.sum( - paddle.logical_and((gt_text > 0.5), (training_mask <= 0.5)) - .astype('float32'))) - - if pos_num == 0: - selected_mask = training_mask - selected_mask = selected_mask.reshape( - [1, selected_mask.shape[0], selected_mask.shape[1]]).astype( - 'float32') - return selected_mask - - neg_num = int(paddle.sum((gt_text <= 0.5).astype('float32'))) - neg_num = int(min(pos_num * ohem_ratio, neg_num)) - - if neg_num == 0: - selected_mask = training_mask - selected_mask = selected_mask.reshape( - [1, selected_mask.shape[0], selected_mask.shape[1]]).astype( - 'float32') - return selected_mask - - neg_score = paddle.masked_select(score, gt_text <= 0.5) - neg_score_sorted = paddle.sort(-neg_score) - threshold = -neg_score_sorted[neg_num - 1] - - selected_mask = paddle.logical_and( - paddle.logical_or((score >= threshold), (gt_text > 0.5)), - (training_mask > 0.5)) - selected_mask = selected_mask.reshape( - [1, selected_mask.shape[0], selected_mask.shape[1]]).astype( - 'float32') - return selected_mask - - def ohem_batch(self, scores, gt_texts, training_masks, ohem_ratio=3): - selected_masks = [] - for i in range(scores.shape[0]): - selected_masks.append( - self.ohem_single(scores[i, :, :], gt_texts[i, :, :], - training_masks[i, :, :], ohem_ratio)) - - selected_masks = paddle.concat(selected_masks, 0).astype('float32') - return selected_masks diff --git a/backend/ppocr/losses/det_sast_loss.py b/backend/ppocr/losses/det_sast_loss.py deleted file mode 100644 index 2e0c756b..00000000 --- a/backend/ppocr/losses/det_sast_loss.py +++ /dev/null @@ -1,121 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -from .det_basic_loss import DiceLoss -import numpy as np - - -class SASTLoss(nn.Layer): - """ - """ - - def __init__(self, eps=1e-6, **kwargs): - super(SASTLoss, self).__init__() - self.dice_loss = DiceLoss(eps=eps) - - def forward(self, predicts, labels): - """ - tcl_pos: N x 128 x 3 - tcl_mask: N x 128 x 1 - tcl_label: N x X list or LoDTensor - """ - - f_score = predicts['f_score'] - f_border = predicts['f_border'] - f_tvo = predicts['f_tvo'] - f_tco = predicts['f_tco'] - - l_score, l_border, l_mask, l_tvo, l_tco = labels[1:] - - #score_loss - intersection = paddle.sum(f_score * l_score * l_mask) - union = paddle.sum(f_score * l_mask) + paddle.sum(l_score * l_mask) - score_loss = 1.0 - 2 * intersection / (union + 1e-5) - - #border loss - l_border_split, l_border_norm = paddle.split( - l_border, num_or_sections=[4, 1], axis=1) - f_border_split = f_border - border_ex_shape = l_border_norm.shape * np.array([1, 4, 1, 1]) - l_border_norm_split = paddle.expand( - x=l_border_norm, shape=border_ex_shape) - l_border_score = paddle.expand(x=l_score, shape=border_ex_shape) - l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape) - - border_diff = l_border_split - f_border_split - abs_border_diff = paddle.abs(border_diff) - border_sign = abs_border_diff < 1.0 - border_sign = paddle.cast(border_sign, dtype='float32') - border_sign.stop_gradient = True - border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \ - (abs_border_diff - 0.5) * (1.0 - border_sign) - border_out_loss = l_border_norm_split * border_in_loss - border_loss = paddle.sum(border_out_loss * l_border_score * l_border_mask) / \ - (paddle.sum(l_border_score * l_border_mask) + 1e-5) - - #tvo_loss - l_tvo_split, l_tvo_norm = paddle.split( - l_tvo, num_or_sections=[8, 1], axis=1) - f_tvo_split = f_tvo - tvo_ex_shape = l_tvo_norm.shape * np.array([1, 8, 1, 1]) - l_tvo_norm_split = paddle.expand(x=l_tvo_norm, shape=tvo_ex_shape) - l_tvo_score = paddle.expand(x=l_score, shape=tvo_ex_shape) - l_tvo_mask = paddle.expand(x=l_mask, shape=tvo_ex_shape) - # - tvo_geo_diff = l_tvo_split - f_tvo_split - abs_tvo_geo_diff = paddle.abs(tvo_geo_diff) - tvo_sign = abs_tvo_geo_diff < 1.0 - tvo_sign = paddle.cast(tvo_sign, dtype='float32') - tvo_sign.stop_gradient = True - tvo_in_loss = 0.5 * abs_tvo_geo_diff * abs_tvo_geo_diff * tvo_sign + \ - (abs_tvo_geo_diff - 0.5) * (1.0 - tvo_sign) - tvo_out_loss = l_tvo_norm_split * tvo_in_loss - tvo_loss = paddle.sum(tvo_out_loss * l_tvo_score * l_tvo_mask) / \ - (paddle.sum(l_tvo_score * l_tvo_mask) + 1e-5) - - #tco_loss - l_tco_split, l_tco_norm = paddle.split( - l_tco, num_or_sections=[2, 1], axis=1) - f_tco_split = f_tco - tco_ex_shape = l_tco_norm.shape * np.array([1, 2, 1, 1]) - l_tco_norm_split = paddle.expand(x=l_tco_norm, shape=tco_ex_shape) - l_tco_score = paddle.expand(x=l_score, shape=tco_ex_shape) - l_tco_mask = paddle.expand(x=l_mask, shape=tco_ex_shape) - - tco_geo_diff = l_tco_split - f_tco_split - abs_tco_geo_diff = paddle.abs(tco_geo_diff) - tco_sign = abs_tco_geo_diff < 1.0 - tco_sign = paddle.cast(tco_sign, dtype='float32') - tco_sign.stop_gradient = True - tco_in_loss = 0.5 * abs_tco_geo_diff * abs_tco_geo_diff * tco_sign + \ - (abs_tco_geo_diff - 0.5) * (1.0 - tco_sign) - tco_out_loss = l_tco_norm_split * tco_in_loss - tco_loss = paddle.sum(tco_out_loss * l_tco_score * l_tco_mask) / \ - (paddle.sum(l_tco_score * l_tco_mask) + 1e-5) - - # total loss - tvo_lw, tco_lw = 1.5, 1.5 - score_lw, border_lw = 1.0, 1.0 - total_loss = score_loss * score_lw + border_loss * border_lw + \ - tvo_loss * tvo_lw + tco_loss * tco_lw - - losses = {'loss':total_loss, "score_loss":score_loss,\ - "border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss} - return losses diff --git a/backend/ppocr/losses/distillation_loss.py b/backend/ppocr/losses/distillation_loss.py deleted file mode 100644 index 565b066d..00000000 --- a/backend/ppocr/losses/distillation_loss.py +++ /dev/null @@ -1,324 +0,0 @@ -#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import paddle -import paddle.nn as nn -import numpy as np -import cv2 - -from .rec_ctc_loss import CTCLoss -from .rec_sar_loss import SARLoss -from .basic_loss import DMLLoss -from .basic_loss import DistanceLoss -from .det_db_loss import DBLoss -from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss - - -def _sum_loss(loss_dict): - if "loss" in loss_dict.keys(): - return loss_dict - else: - loss_dict["loss"] = 0. - for k, value in loss_dict.items(): - if k == "loss": - continue - else: - loss_dict["loss"] += value - return loss_dict - - -class DistillationDMLLoss(DMLLoss): - """ - """ - - def __init__(self, - model_name_pairs=[], - act=None, - use_log=False, - key=None, - multi_head=False, - dis_head='ctc', - maps_name=None, - name="dml"): - super().__init__(act=act, use_log=use_log) - assert isinstance(model_name_pairs, list) - self.key = key - self.multi_head = multi_head - self.dis_head = dis_head - self.model_name_pairs = self._check_model_name_pairs(model_name_pairs) - self.name = name - self.maps_name = self._check_maps_name(maps_name) - - def _check_model_name_pairs(self, model_name_pairs): - if not isinstance(model_name_pairs, list): - return [] - elif isinstance(model_name_pairs[0], list) and isinstance( - model_name_pairs[0][0], str): - return model_name_pairs - else: - return [model_name_pairs] - - def _check_maps_name(self, maps_name): - if maps_name is None: - return None - elif type(maps_name) == str: - return [maps_name] - elif type(maps_name) == list: - return [maps_name] - else: - return None - - def _slice_out(self, outs): - new_outs = {} - for k in self.maps_name: - if k == "thrink_maps": - new_outs[k] = outs[:, 0, :, :] - elif k == "threshold_maps": - new_outs[k] = outs[:, 1, :, :] - elif k == "binary_maps": - new_outs[k] = outs[:, 2, :, :] - else: - continue - return new_outs - - def forward(self, predicts, batch): - loss_dict = dict() - for idx, pair in enumerate(self.model_name_pairs): - out1 = predicts[pair[0]] - out2 = predicts[pair[1]] - if self.key is not None: - out1 = out1[self.key] - out2 = out2[self.key] - - if self.maps_name is None: - if self.multi_head: - loss = super().forward(out1[self.dis_head], - out2[self.dis_head]) - else: - loss = super().forward(out1, out2) - if isinstance(loss, dict): - for key in loss: - loss_dict["{}_{}_{}_{}".format(key, pair[0], pair[1], - idx)] = loss[key] - else: - loss_dict["{}_{}".format(self.name, idx)] = loss - else: - outs1 = self._slice_out(out1) - outs2 = self._slice_out(out2) - for _c, k in enumerate(outs1.keys()): - loss = super().forward(outs1[k], outs2[k]) - if isinstance(loss, dict): - for key in loss: - loss_dict["{}_{}_{}_{}_{}".format(key, pair[ - 0], pair[1], self.maps_name, idx)] = loss[key] - else: - loss_dict["{}_{}_{}".format(self.name, self.maps_name[ - _c], idx)] = loss - - loss_dict = _sum_loss(loss_dict) - - return loss_dict - - -class DistillationCTCLoss(CTCLoss): - def __init__(self, - model_name_list=[], - key=None, - multi_head=False, - name="loss_ctc"): - super().__init__() - self.model_name_list = model_name_list - self.key = key - self.name = name - self.multi_head = multi_head - - def forward(self, predicts, batch): - loss_dict = dict() - for idx, model_name in enumerate(self.model_name_list): - out = predicts[model_name] - if self.key is not None: - out = out[self.key] - if self.multi_head: - assert 'ctc' in out, 'multi head has multi out' - loss = super().forward(out['ctc'], batch[:2] + batch[3:]) - else: - loss = super().forward(out, batch) - if isinstance(loss, dict): - for key in loss: - loss_dict["{}_{}_{}".format(self.name, model_name, - idx)] = loss[key] - else: - loss_dict["{}_{}".format(self.name, model_name)] = loss - return loss_dict - - -class DistillationSARLoss(SARLoss): - def __init__(self, - model_name_list=[], - key=None, - multi_head=False, - name="loss_sar", - **kwargs): - ignore_index = kwargs.get('ignore_index', 92) - super().__init__(ignore_index=ignore_index) - self.model_name_list = model_name_list - self.key = key - self.name = name - self.multi_head = multi_head - - def forward(self, predicts, batch): - loss_dict = dict() - for idx, model_name in enumerate(self.model_name_list): - out = predicts[model_name] - if self.key is not None: - out = out[self.key] - if self.multi_head: - assert 'sar' in out, 'multi head has multi out' - loss = super().forward(out['sar'], batch[:1] + batch[2:]) - else: - loss = super().forward(out, batch) - if isinstance(loss, dict): - for key in loss: - loss_dict["{}_{}_{}".format(self.name, model_name, - idx)] = loss[key] - else: - loss_dict["{}_{}".format(self.name, model_name)] = loss - return loss_dict - - -class DistillationDBLoss(DBLoss): - def __init__(self, - model_name_list=[], - balance_loss=True, - main_loss_type='DiceLoss', - alpha=5, - beta=10, - ohem_ratio=3, - eps=1e-6, - name="db", - **kwargs): - super().__init__() - self.model_name_list = model_name_list - self.name = name - self.key = None - - def forward(self, predicts, batch): - loss_dict = {} - for idx, model_name in enumerate(self.model_name_list): - out = predicts[model_name] - if self.key is not None: - out = out[self.key] - loss = super().forward(out, batch) - - if isinstance(loss, dict): - for key in loss.keys(): - if key == "loss": - continue - name = "{}_{}_{}".format(self.name, model_name, key) - loss_dict[name] = loss[key] - else: - loss_dict["{}_{}".format(self.name, model_name)] = loss - - loss_dict = _sum_loss(loss_dict) - return loss_dict - - -class DistillationDilaDBLoss(DBLoss): - def __init__(self, - model_name_pairs=[], - key=None, - balance_loss=True, - main_loss_type='DiceLoss', - alpha=5, - beta=10, - ohem_ratio=3, - eps=1e-6, - name="dila_dbloss"): - super().__init__() - self.model_name_pairs = model_name_pairs - self.name = name - self.key = key - - def forward(self, predicts, batch): - loss_dict = dict() - for idx, pair in enumerate(self.model_name_pairs): - stu_outs = predicts[pair[0]] - tch_outs = predicts[pair[1]] - if self.key is not None: - stu_preds = stu_outs[self.key] - tch_preds = tch_outs[self.key] - - stu_shrink_maps = stu_preds[:, 0, :, :] - stu_binary_maps = stu_preds[:, 2, :, :] - - # dilation to teacher prediction - dilation_w = np.array([[1, 1], [1, 1]]) - th_shrink_maps = tch_preds[:, 0, :, :] - th_shrink_maps = th_shrink_maps.numpy() > 0.3 # thresh = 0.3 - dilate_maps = np.zeros_like(th_shrink_maps).astype(np.float32) - for i in range(th_shrink_maps.shape[0]): - dilate_maps[i] = cv2.dilate( - th_shrink_maps[i, :, :].astype(np.uint8), dilation_w) - th_shrink_maps = paddle.to_tensor(dilate_maps) - - label_threshold_map, label_threshold_mask, label_shrink_map, label_shrink_mask = batch[ - 1:] - - # calculate the shrink map loss - bce_loss = self.alpha * self.bce_loss( - stu_shrink_maps, th_shrink_maps, label_shrink_mask) - loss_binary_maps = self.dice_loss(stu_binary_maps, th_shrink_maps, - label_shrink_mask) - - # k = f"{self.name}_{pair[0]}_{pair[1]}" - k = "{}_{}_{}".format(self.name, pair[0], pair[1]) - loss_dict[k] = bce_loss + loss_binary_maps - - loss_dict = _sum_loss(loss_dict) - return loss_dict - - -class DistillationDistanceLoss(DistanceLoss): - """ - """ - - def __init__(self, - mode="l2", - model_name_pairs=[], - key=None, - name="loss_distance", - **kargs): - super().__init__(mode=mode, **kargs) - assert isinstance(model_name_pairs, list) - self.key = key - self.model_name_pairs = model_name_pairs - self.name = name + "_l2" - - def forward(self, predicts, batch): - loss_dict = dict() - for idx, pair in enumerate(self.model_name_pairs): - out1 = predicts[pair[0]] - out2 = predicts[pair[1]] - if self.key is not None: - out1 = out1[self.key] - out2 = out2[self.key] - loss = super().forward(out1, out2) - if isinstance(loss, dict): - for key in loss: - loss_dict["{}_{}_{}".format(self.name, key, idx)] = loss[ - key] - else: - loss_dict["{}_{}_{}_{}".format(self.name, pair[0], pair[1], - idx)] = loss - return loss_dict diff --git a/backend/ppocr/losses/e2e_pg_loss.py b/backend/ppocr/losses/e2e_pg_loss.py deleted file mode 100644 index 10a8ed0a..00000000 --- a/backend/ppocr/losses/e2e_pg_loss.py +++ /dev/null @@ -1,140 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn -import paddle - -from .det_basic_loss import DiceLoss -from ppocr.utils.e2e_utils.extract_batchsize import pre_process - - -class PGLoss(nn.Layer): - def __init__(self, - tcl_bs, - max_text_length, - max_text_nums, - pad_num, - eps=1e-6, - **kwargs): - super(PGLoss, self).__init__() - self.tcl_bs = tcl_bs - self.max_text_nums = max_text_nums - self.max_text_length = max_text_length - self.pad_num = pad_num - self.dice_loss = DiceLoss(eps=eps) - - def border_loss(self, f_border, l_border, l_score, l_mask): - l_border_split, l_border_norm = paddle.tensor.split( - l_border, num_or_sections=[4, 1], axis=1) - f_border_split = f_border - b, c, h, w = l_border_norm.shape - l_border_norm_split = paddle.expand( - x=l_border_norm, shape=[b, 4 * c, h, w]) - b, c, h, w = l_score.shape - l_border_score = paddle.expand(x=l_score, shape=[b, 4 * c, h, w]) - b, c, h, w = l_mask.shape - l_border_mask = paddle.expand(x=l_mask, shape=[b, 4 * c, h, w]) - border_diff = l_border_split - f_border_split - abs_border_diff = paddle.abs(border_diff) - border_sign = abs_border_diff < 1.0 - border_sign = paddle.cast(border_sign, dtype='float32') - border_sign.stop_gradient = True - border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \ - (abs_border_diff - 0.5) * (1.0 - border_sign) - border_out_loss = l_border_norm_split * border_in_loss - border_loss = paddle.sum(border_out_loss * l_border_score * l_border_mask) / \ - (paddle.sum(l_border_score * l_border_mask) + 1e-5) - return border_loss - - def direction_loss(self, f_direction, l_direction, l_score, l_mask): - l_direction_split, l_direction_norm = paddle.tensor.split( - l_direction, num_or_sections=[2, 1], axis=1) - f_direction_split = f_direction - b, c, h, w = l_direction_norm.shape - l_direction_norm_split = paddle.expand( - x=l_direction_norm, shape=[b, 2 * c, h, w]) - b, c, h, w = l_score.shape - l_direction_score = paddle.expand(x=l_score, shape=[b, 2 * c, h, w]) - b, c, h, w = l_mask.shape - l_direction_mask = paddle.expand(x=l_mask, shape=[b, 2 * c, h, w]) - direction_diff = l_direction_split - f_direction_split - abs_direction_diff = paddle.abs(direction_diff) - direction_sign = abs_direction_diff < 1.0 - direction_sign = paddle.cast(direction_sign, dtype='float32') - direction_sign.stop_gradient = True - direction_in_loss = 0.5 * abs_direction_diff * abs_direction_diff * direction_sign + \ - (abs_direction_diff - 0.5) * (1.0 - direction_sign) - direction_out_loss = l_direction_norm_split * direction_in_loss - direction_loss = paddle.sum(direction_out_loss * l_direction_score * l_direction_mask) / \ - (paddle.sum(l_direction_score * l_direction_mask) + 1e-5) - return direction_loss - - def ctcloss(self, f_char, tcl_pos, tcl_mask, tcl_label, label_t): - f_char = paddle.transpose(f_char, [0, 2, 3, 1]) - tcl_pos = paddle.reshape(tcl_pos, [-1, 3]) - tcl_pos = paddle.cast(tcl_pos, dtype=int) - f_tcl_char = paddle.gather_nd(f_char, tcl_pos) - f_tcl_char = paddle.reshape(f_tcl_char, - [-1, 64, 37]) # len(Lexicon_Table)+1 - f_tcl_char_fg, f_tcl_char_bg = paddle.split(f_tcl_char, [36, 1], axis=2) - f_tcl_char_bg = f_tcl_char_bg * tcl_mask + (1.0 - tcl_mask) * 20.0 - b, c, l = tcl_mask.shape - tcl_mask_fg = paddle.expand(x=tcl_mask, shape=[b, c, 36 * l]) - tcl_mask_fg.stop_gradient = True - f_tcl_char_fg = f_tcl_char_fg * tcl_mask_fg + (1.0 - tcl_mask_fg) * ( - -20.0) - f_tcl_char_mask = paddle.concat([f_tcl_char_fg, f_tcl_char_bg], axis=2) - f_tcl_char_ld = paddle.transpose(f_tcl_char_mask, (1, 0, 2)) - N, B, _ = f_tcl_char_ld.shape - input_lengths = paddle.to_tensor([N] * B, dtype='int64') - cost = paddle.nn.functional.ctc_loss( - log_probs=f_tcl_char_ld, - labels=tcl_label, - input_lengths=input_lengths, - label_lengths=label_t, - blank=self.pad_num, - reduction='none') - cost = cost.mean() - return cost - - def forward(self, predicts, labels): - images, tcl_maps, tcl_label_maps, border_maps \ - , direction_maps, training_masks, label_list, pos_list, pos_mask = labels - # for all the batch_size - pos_list, pos_mask, label_list, label_t = pre_process( - label_list, pos_list, pos_mask, self.max_text_length, - self.max_text_nums, self.pad_num, self.tcl_bs) - - f_score, f_border, f_direction, f_char = predicts['f_score'], predicts['f_border'], predicts['f_direction'], \ - predicts['f_char'] - score_loss = self.dice_loss(f_score, tcl_maps, training_masks) - border_loss = self.border_loss(f_border, border_maps, tcl_maps, - training_masks) - direction_loss = self.direction_loss(f_direction, direction_maps, - tcl_maps, training_masks) - ctc_loss = self.ctcloss(f_char, pos_list, pos_mask, label_list, label_t) - loss_all = score_loss + border_loss + direction_loss + 5 * ctc_loss - - losses = { - 'loss': loss_all, - "score_loss": score_loss, - "border_loss": border_loss, - "direction_loss": direction_loss, - "ctc_loss": ctc_loss - } - return losses diff --git a/backend/ppocr/losses/kie_sdmgr_loss.py b/backend/ppocr/losses/kie_sdmgr_loss.py deleted file mode 100644 index 745671f5..00000000 --- a/backend/ppocr/losses/kie_sdmgr_loss.py +++ /dev/null @@ -1,115 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# reference from : https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/kie/losses/sdmgr_loss.py - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn -import paddle - - -class SDMGRLoss(nn.Layer): - def __init__(self, node_weight=1.0, edge_weight=1.0, ignore=0): - super().__init__() - self.loss_node = nn.CrossEntropyLoss(ignore_index=ignore) - self.loss_edge = nn.CrossEntropyLoss(ignore_index=-1) - self.node_weight = node_weight - self.edge_weight = edge_weight - self.ignore = ignore - - def pre_process(self, gts, tag): - gts, tag = gts.numpy(), tag.numpy().tolist() - temp_gts = [] - batch = len(tag) - for i in range(batch): - num, recoder_len = tag[i][0], tag[i][1] - temp_gts.append( - paddle.to_tensor( - gts[i, :num, :num + 1], dtype='int64')) - return temp_gts - - def accuracy(self, pred, target, topk=1, thresh=None): - """Calculate accuracy according to the prediction and target. - - Args: - pred (torch.Tensor): The model prediction, shape (N, num_class) - target (torch.Tensor): The target of each prediction, shape (N, ) - topk (int | tuple[int], optional): If the predictions in ``topk`` - matches the target, the predictions will be regarded as - correct ones. Defaults to 1. - thresh (float, optional): If not None, predictions with scores under - this threshold are considered incorrect. Default to None. - - Returns: - float | tuple[float]: If the input ``topk`` is a single integer, - the function will return a single float as accuracy. If - ``topk`` is a tuple containing multiple integers, the - function will return a tuple containing accuracies of - each ``topk`` number. - """ - assert isinstance(topk, (int, tuple)) - if isinstance(topk, int): - topk = (topk, ) - return_single = True - else: - return_single = False - - maxk = max(topk) - if pred.shape[0] == 0: - accu = [pred.new_tensor(0.) for i in range(len(topk))] - return accu[0] if return_single else accu - pred_value, pred_label = paddle.topk(pred, maxk, axis=1) - pred_label = pred_label.transpose( - [1, 0]) # transpose to shape (maxk, N) - correct = paddle.equal(pred_label, - (target.reshape([1, -1]).expand_as(pred_label))) - res = [] - for k in topk: - correct_k = paddle.sum(correct[:k].reshape([-1]).astype('float32'), - axis=0, - keepdim=True) - res.append( - paddle.multiply(correct_k, - paddle.to_tensor(100.0 / pred.shape[0]))) - return res[0] if return_single else res - - def forward(self, pred, batch): - node_preds, edge_preds = pred - gts, tag = batch[4], batch[5] - gts = self.pre_process(gts, tag) - node_gts, edge_gts = [], [] - for gt in gts: - node_gts.append(gt[:, 0]) - edge_gts.append(gt[:, 1:].reshape([-1])) - node_gts = paddle.concat(node_gts) - edge_gts = paddle.concat(edge_gts) - - node_valids = paddle.nonzero(node_gts != self.ignore).reshape([-1]) - edge_valids = paddle.nonzero(edge_gts != -1).reshape([-1]) - loss_node = self.loss_node(node_preds, node_gts) - loss_edge = self.loss_edge(edge_preds, edge_gts) - loss = self.node_weight * loss_node + self.edge_weight * loss_edge - return dict( - loss=loss, - loss_node=loss_node, - loss_edge=loss_edge, - acc_node=self.accuracy( - paddle.gather(node_preds, node_valids), - paddle.gather(node_gts, node_valids)), - acc_edge=self.accuracy( - paddle.gather(edge_preds, edge_valids), - paddle.gather(edge_gts, edge_valids))) diff --git a/backend/ppocr/losses/rec_aster_loss.py b/backend/ppocr/losses/rec_aster_loss.py deleted file mode 100644 index fbb99d29..00000000 --- a/backend/ppocr/losses/rec_aster_loss.py +++ /dev/null @@ -1,99 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn - - -class CosineEmbeddingLoss(nn.Layer): - def __init__(self, margin=0.): - super(CosineEmbeddingLoss, self).__init__() - self.margin = margin - self.epsilon = 1e-12 - - def forward(self, x1, x2, target): - similarity = paddle.fluid.layers.reduce_sum( - x1 * x2, dim=-1) / (paddle.norm( - x1, axis=-1) * paddle.norm( - x2, axis=-1) + self.epsilon) - one_list = paddle.full_like(target, fill_value=1) - out = paddle.fluid.layers.reduce_mean( - paddle.where( - paddle.equal(target, one_list), 1. - similarity, - paddle.maximum( - paddle.zeros_like(similarity), similarity - self.margin))) - - return out - - -class AsterLoss(nn.Layer): - def __init__(self, - weight=None, - size_average=True, - ignore_index=-100, - sequence_normalize=False, - sample_normalize=True, - **kwargs): - super(AsterLoss, self).__init__() - self.weight = weight - self.size_average = size_average - self.ignore_index = ignore_index - self.sequence_normalize = sequence_normalize - self.sample_normalize = sample_normalize - self.loss_sem = CosineEmbeddingLoss() - self.is_cosin_loss = True - self.loss_func_rec = nn.CrossEntropyLoss(weight=None, reduction='none') - - def forward(self, predicts, batch): - targets = batch[1].astype("int64") - label_lengths = batch[2].astype('int64') - sem_target = batch[3].astype('float32') - embedding_vectors = predicts['embedding_vectors'] - rec_pred = predicts['rec_pred'] - - if not self.is_cosin_loss: - sem_loss = paddle.sum(self.loss_sem(embedding_vectors, sem_target)) - else: - label_target = paddle.ones([embedding_vectors.shape[0]]) - sem_loss = paddle.sum( - self.loss_sem(embedding_vectors, sem_target, label_target)) - - # rec loss - batch_size, def_max_length = targets.shape[0], targets.shape[1] - - mask = paddle.zeros([batch_size, def_max_length]) - for i in range(batch_size): - mask[i, :label_lengths[i]] = 1 - mask = paddle.cast(mask, "float32") - max_length = max(label_lengths) - assert max_length == rec_pred.shape[1] - targets = targets[:, :max_length] - mask = mask[:, :max_length] - rec_pred = paddle.reshape(rec_pred, [-1, rec_pred.shape[2]]) - input = nn.functional.log_softmax(rec_pred, axis=1) - targets = paddle.reshape(targets, [-1, 1]) - mask = paddle.reshape(mask, [-1, 1]) - output = -paddle.index_sample(input, index=targets) * mask - output = paddle.sum(output) - if self.sequence_normalize: - output = output / paddle.sum(mask) - if self.sample_normalize: - output = output / batch_size - - loss = output + sem_loss * 0.1 - return {'loss': loss} diff --git a/backend/ppocr/losses/rec_att_loss.py b/backend/ppocr/losses/rec_att_loss.py deleted file mode 100644 index 6e2f6748..00000000 --- a/backend/ppocr/losses/rec_att_loss.py +++ /dev/null @@ -1,39 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn - - -class AttentionLoss(nn.Layer): - def __init__(self, **kwargs): - super(AttentionLoss, self).__init__() - self.loss_func = nn.CrossEntropyLoss(weight=None, reduction='none') - - def forward(self, predicts, batch): - targets = batch[1].astype("int64") - label_lengths = batch[2].astype('int64') - batch_size, num_steps, num_classes = predicts.shape[0], predicts.shape[ - 1], predicts.shape[2] - assert len(targets.shape) == len(list(predicts.shape)) - 1, \ - "The target's shape and inputs's shape is [N, d] and [N, num_steps]" - - inputs = paddle.reshape(predicts, [-1, predicts.shape[-1]]) - targets = paddle.reshape(targets, [-1]) - - return {'loss': paddle.sum(self.loss_func(inputs, targets))} diff --git a/backend/ppocr/losses/rec_ctc_loss.py b/backend/ppocr/losses/rec_ctc_loss.py deleted file mode 100755 index 502fc8c5..00000000 --- a/backend/ppocr/losses/rec_ctc_loss.py +++ /dev/null @@ -1,45 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn - - -class CTCLoss(nn.Layer): - def __init__(self, use_focal_loss=False, **kwargs): - super(CTCLoss, self).__init__() - self.loss_func = nn.CTCLoss(blank=0, reduction='none') - self.use_focal_loss = use_focal_loss - - def forward(self, predicts, batch): - if isinstance(predicts, (list, tuple)): - predicts = predicts[-1] - predicts = predicts.transpose((1, 0, 2)) - N, B, _ = predicts.shape - preds_lengths = paddle.to_tensor( - [N] * B, dtype='int64', place=paddle.CPUPlace()) - labels = batch[1].astype("int32") - label_lengths = batch[2].astype('int64') - loss = self.loss_func(predicts, labels, preds_lengths, label_lengths) - if self.use_focal_loss: - weight = paddle.exp(-loss) - weight = paddle.subtract(paddle.to_tensor([1.0]), weight) - weight = paddle.square(weight) - loss = paddle.multiply(loss, weight) - loss = loss.mean() - return {'loss': loss} diff --git a/backend/ppocr/losses/rec_enhanced_ctc_loss.py b/backend/ppocr/losses/rec_enhanced_ctc_loss.py deleted file mode 100644 index b57be646..00000000 --- a/backend/ppocr/losses/rec_enhanced_ctc_loss.py +++ /dev/null @@ -1,70 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -from .ace_loss import ACELoss -from .center_loss import CenterLoss -from .rec_ctc_loss import CTCLoss - - -class EnhancedCTCLoss(nn.Layer): - def __init__(self, - use_focal_loss=False, - use_ace_loss=False, - ace_loss_weight=0.1, - use_center_loss=False, - center_loss_weight=0.05, - num_classes=6625, - feat_dim=96, - init_center=False, - center_file_path=None, - **kwargs): - super(EnhancedCTCLoss, self).__init__() - self.ctc_loss_func = CTCLoss(use_focal_loss=use_focal_loss) - - self.use_ace_loss = False - if use_ace_loss: - self.use_ace_loss = use_ace_loss - self.ace_loss_func = ACELoss() - self.ace_loss_weight = ace_loss_weight - - self.use_center_loss = False - if use_center_loss: - self.use_center_loss = use_center_loss - self.center_loss_func = CenterLoss( - num_classes=num_classes, - feat_dim=feat_dim, - init_center=init_center, - center_file_path=center_file_path) - self.center_loss_weight = center_loss_weight - - def __call__(self, predicts, batch): - loss = self.ctc_loss_func(predicts, batch)["loss"] - - if self.use_center_loss: - center_loss = self.center_loss_func( - predicts, batch)["loss_center"] * self.center_loss_weight - loss = loss + center_loss - - if self.use_ace_loss: - ace_loss = self.ace_loss_func( - predicts, batch)["loss_ace"] * self.ace_loss_weight - loss = loss + ace_loss - - return {'enhanced_ctc_loss': loss} diff --git a/backend/ppocr/losses/rec_multi_loss.py b/backend/ppocr/losses/rec_multi_loss.py deleted file mode 100644 index 09f007af..00000000 --- a/backend/ppocr/losses/rec_multi_loss.py +++ /dev/null @@ -1,58 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn - -from .rec_ctc_loss import CTCLoss -from .rec_sar_loss import SARLoss - - -class MultiLoss(nn.Layer): - def __init__(self, **kwargs): - super().__init__() - self.loss_funcs = {} - self.loss_list = kwargs.pop('loss_config_list') - self.weight_1 = kwargs.get('weight_1', 1.0) - self.weight_2 = kwargs.get('weight_2', 1.0) - self.gtc_loss = kwargs.get('gtc_loss', 'sar') - for loss_info in self.loss_list: - for name, param in loss_info.items(): - if param is not None: - kwargs.update(param) - loss = eval(name)(**kwargs) - self.loss_funcs[name] = loss - - def forward(self, predicts, batch): - self.total_loss = {} - total_loss = 0.0 - # batch [image, label_ctc, label_sar, length, valid_ratio] - for name, loss_func in self.loss_funcs.items(): - if name == 'CTCLoss': - loss = loss_func(predicts['ctc'], - batch[:2] + batch[3:])['loss'] * self.weight_1 - elif name == 'SARLoss': - loss = loss_func(predicts['sar'], - batch[:1] + batch[2:])['loss'] * self.weight_2 - else: - raise NotImplementedError( - '{} is not supported in MultiLoss yet'.format(name)) - self.total_loss[name] = loss - total_loss += loss - self.total_loss['loss'] = total_loss - return self.total_loss diff --git a/backend/ppocr/losses/rec_nrtr_loss.py b/backend/ppocr/losses/rec_nrtr_loss.py deleted file mode 100644 index 200a6d04..00000000 --- a/backend/ppocr/losses/rec_nrtr_loss.py +++ /dev/null @@ -1,30 +0,0 @@ -import paddle -from paddle import nn -import paddle.nn.functional as F - - -class NRTRLoss(nn.Layer): - def __init__(self, smoothing=True, **kwargs): - super(NRTRLoss, self).__init__() - self.loss_func = nn.CrossEntropyLoss(reduction='mean', ignore_index=0) - self.smoothing = smoothing - - def forward(self, pred, batch): - pred = pred.reshape([-1, pred.shape[2]]) - max_len = batch[2].max() - tgt = batch[1][:, 1:2 + max_len] - tgt = tgt.reshape([-1]) - if self.smoothing: - eps = 0.1 - n_class = pred.shape[1] - one_hot = F.one_hot(tgt, pred.shape[1]) - one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1) - log_prb = F.log_softmax(pred, axis=1) - non_pad_mask = paddle.not_equal( - tgt, paddle.zeros( - tgt.shape, dtype=tgt.dtype)) - loss = -(one_hot * log_prb).sum(axis=1) - loss = loss.masked_select(non_pad_mask).mean() - else: - loss = self.loss_func(pred, tgt) - return {'loss': loss} diff --git a/backend/ppocr/losses/rec_pren_loss.py b/backend/ppocr/losses/rec_pren_loss.py deleted file mode 100644 index 7bc53d29..00000000 --- a/backend/ppocr/losses/rec_pren_loss.py +++ /dev/null @@ -1,30 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn - - -class PRENLoss(nn.Layer): - def __init__(self, **kwargs): - super(PRENLoss, self).__init__() - # note: 0 is padding idx - self.loss_func = nn.CrossEntropyLoss(reduction='mean', ignore_index=0) - - def forward(self, predicts, batch): - loss = self.loss_func(predicts, batch[1].astype('int64')) - return {'loss': loss} diff --git a/backend/ppocr/losses/rec_sar_loss.py b/backend/ppocr/losses/rec_sar_loss.py deleted file mode 100644 index a4f83f03..00000000 --- a/backend/ppocr/losses/rec_sar_loss.py +++ /dev/null @@ -1,29 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn - - -class SARLoss(nn.Layer): - def __init__(self, **kwargs): - super(SARLoss, self).__init__() - ignore_index = kwargs.get('ignore_index', 92) # 6626 - self.loss_func = paddle.nn.loss.CrossEntropyLoss( - reduction="mean", ignore_index=ignore_index) - - def forward(self, predicts, batch): - predict = predicts[:, : - -1, :] # ignore last index of outputs to be in same seq_len with targets - label = batch[1].astype( - "int64")[:, 1:] # ignore first index of target in loss calculation - batch_size, num_steps, num_classes = predict.shape[0], predict.shape[ - 1], predict.shape[2] - assert len(label.shape) == len(list(predict.shape)) - 1, \ - "The target's shape and inputs's shape is [N, d] and [N, num_steps]" - - inputs = paddle.reshape(predict, [-1, num_classes]) - targets = paddle.reshape(label, [-1]) - loss = self.loss_func(inputs, targets) - return {'loss': loss} diff --git a/backend/ppocr/losses/rec_srn_loss.py b/backend/ppocr/losses/rec_srn_loss.py deleted file mode 100644 index 7d5b65eb..00000000 --- a/backend/ppocr/losses/rec_srn_loss.py +++ /dev/null @@ -1,47 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn - - -class SRNLoss(nn.Layer): - def __init__(self, **kwargs): - super(SRNLoss, self).__init__() - self.loss_func = paddle.nn.loss.CrossEntropyLoss(reduction="sum") - - def forward(self, predicts, batch): - predict = predicts['predict'] - word_predict = predicts['word_out'] - gsrm_predict = predicts['gsrm_out'] - label = batch[1] - - casted_label = paddle.cast(x=label, dtype='int64') - casted_label = paddle.reshape(x=casted_label, shape=[-1, 1]) - - cost_word = self.loss_func(word_predict, label=casted_label) - cost_gsrm = self.loss_func(gsrm_predict, label=casted_label) - cost_vsfd = self.loss_func(predict, label=casted_label) - - cost_word = paddle.reshape(x=paddle.sum(cost_word), shape=[1]) - cost_gsrm = paddle.reshape(x=paddle.sum(cost_gsrm), shape=[1]) - cost_vsfd = paddle.reshape(x=paddle.sum(cost_vsfd), shape=[1]) - - sum_cost = cost_word * 3.0 + cost_vsfd + cost_gsrm * 0.15 - - return {'loss': sum_cost, 'word_loss': cost_word, 'img_loss': cost_vsfd} diff --git a/backend/ppocr/losses/table_att_loss.py b/backend/ppocr/losses/table_att_loss.py deleted file mode 100644 index d7fd99e6..00000000 --- a/backend/ppocr/losses/table_att_loss.py +++ /dev/null @@ -1,109 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -from paddle.nn import functional as F -from paddle import fluid - -class TableAttentionLoss(nn.Layer): - def __init__(self, structure_weight, loc_weight, use_giou=False, giou_weight=1.0, **kwargs): - super(TableAttentionLoss, self).__init__() - self.loss_func = nn.CrossEntropyLoss(weight=None, reduction='none') - self.structure_weight = structure_weight - self.loc_weight = loc_weight - self.use_giou = use_giou - self.giou_weight = giou_weight - - def giou_loss(self, preds, bbox, eps=1e-7, reduction='mean'): - ''' - :param preds:[[x1,y1,x2,y2], [x1,y1,x2,y2],,,] - :param bbox:[[x1,y1,x2,y2], [x1,y1,x2,y2],,,] - :return: loss - ''' - ix1 = fluid.layers.elementwise_max(preds[:, 0], bbox[:, 0]) - iy1 = fluid.layers.elementwise_max(preds[:, 1], bbox[:, 1]) - ix2 = fluid.layers.elementwise_min(preds[:, 2], bbox[:, 2]) - iy2 = fluid.layers.elementwise_min(preds[:, 3], bbox[:, 3]) - - iw = fluid.layers.clip(ix2 - ix1 + 1e-3, 0., 1e10) - ih = fluid.layers.clip(iy2 - iy1 + 1e-3, 0., 1e10) - - # overlap - inters = iw * ih - - # union - uni = (preds[:, 2] - preds[:, 0] + 1e-3) * (preds[:, 3] - preds[:, 1] + 1e-3 - ) + (bbox[:, 2] - bbox[:, 0] + 1e-3) * ( - bbox[:, 3] - bbox[:, 1] + 1e-3) - inters + eps - - # ious - ious = inters / uni - - ex1 = fluid.layers.elementwise_min(preds[:, 0], bbox[:, 0]) - ey1 = fluid.layers.elementwise_min(preds[:, 1], bbox[:, 1]) - ex2 = fluid.layers.elementwise_max(preds[:, 2], bbox[:, 2]) - ey2 = fluid.layers.elementwise_max(preds[:, 3], bbox[:, 3]) - ew = fluid.layers.clip(ex2 - ex1 + 1e-3, 0., 1e10) - eh = fluid.layers.clip(ey2 - ey1 + 1e-3, 0., 1e10) - - # enclose erea - enclose = ew * eh + eps - giou = ious - (enclose - uni) / enclose - - loss = 1 - giou - - if reduction == 'mean': - loss = paddle.mean(loss) - elif reduction == 'sum': - loss = paddle.sum(loss) - else: - raise NotImplementedError - return loss - - def forward(self, predicts, batch): - structure_probs = predicts['structure_probs'] - structure_targets = batch[1].astype("int64") - structure_targets = structure_targets[:, 1:] - if len(batch) == 6: - structure_mask = batch[5].astype("int64") - structure_mask = structure_mask[:, 1:] - structure_mask = paddle.reshape(structure_mask, [-1]) - structure_probs = paddle.reshape(structure_probs, [-1, structure_probs.shape[-1]]) - structure_targets = paddle.reshape(structure_targets, [-1]) - structure_loss = self.loss_func(structure_probs, structure_targets) - - if len(batch) == 6: - structure_loss = structure_loss * structure_mask - -# structure_loss = paddle.sum(structure_loss) * self.structure_weight - structure_loss = paddle.mean(structure_loss) * self.structure_weight - - loc_preds = predicts['loc_preds'] - loc_targets = batch[2].astype("float32") - loc_targets_mask = batch[4].astype("float32") - loc_targets = loc_targets[:, 1:, :] - loc_targets_mask = loc_targets_mask[:, 1:, :] - loc_loss = F.mse_loss(loc_preds * loc_targets_mask, loc_targets) * self.loc_weight - if self.use_giou: - loc_loss_giou = self.giou_loss(loc_preds * loc_targets_mask, loc_targets) * self.giou_weight - total_loss = structure_loss + loc_loss + loc_loss_giou - return {'loss':total_loss, "structure_loss":structure_loss, "loc_loss":loc_loss, "loc_loss_giou":loc_loss_giou} - else: - total_loss = structure_loss + loc_loss - return {'loss':total_loss, "structure_loss":structure_loss, "loc_loss":loc_loss} \ No newline at end of file diff --git a/backend/ppocr/losses/vqa_token_layoutlm_loss.py b/backend/ppocr/losses/vqa_token_layoutlm_loss.py deleted file mode 100755 index 244893d9..00000000 --- a/backend/ppocr/losses/vqa_token_layoutlm_loss.py +++ /dev/null @@ -1,42 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn - - -class VQASerTokenLayoutLMLoss(nn.Layer): - def __init__(self, num_classes): - super().__init__() - self.loss_class = nn.CrossEntropyLoss() - self.num_classes = num_classes - self.ignore_index = self.loss_class.ignore_index - - def forward(self, predicts, batch): - labels = batch[1] - attention_mask = batch[4] - if attention_mask is not None: - active_loss = attention_mask.reshape([-1, ]) == 1 - active_outputs = predicts.reshape( - [-1, self.num_classes])[active_loss] - active_labels = labels.reshape([-1, ])[active_loss] - loss = self.loss_class(active_outputs, active_labels) - else: - loss = self.loss_class( - predicts.reshape([-1, self.num_classes]), - labels.reshape([-1, ])) - return {'loss': loss} diff --git a/backend/ppocr/metrics/__init__.py b/backend/ppocr/metrics/__init__.py deleted file mode 100644 index c244066c..00000000 --- a/backend/ppocr/metrics/__init__.py +++ /dev/null @@ -1,47 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import copy - -__all__ = ["build_metric"] - -from .det_metric import DetMetric, DetFCEMetric -from .rec_metric import RecMetric -from .cls_metric import ClsMetric -from .e2e_metric import E2EMetric -from .distillation_metric import DistillationMetric -from .table_metric import TableMetric -from .kie_metric import KIEMetric -from .vqa_token_ser_metric import VQASerTokenMetric -from .vqa_token_re_metric import VQAReTokenMetric - - -def build_metric(config): - support_dict = [ - "DetMetric", "DetFCEMetric", "RecMetric", "ClsMetric", "E2EMetric", - "DistillationMetric", "TableMetric", 'KIEMetric', 'VQASerTokenMetric', - 'VQAReTokenMetric' - ] - - config = copy.deepcopy(config) - module_name = config.pop("name") - assert module_name in support_dict, Exception( - "metric only support {}".format(support_dict)) - module_class = eval(module_name)(**config) - return module_class diff --git a/backend/ppocr/metrics/cls_metric.py b/backend/ppocr/metrics/cls_metric.py deleted file mode 100644 index 6c077518..00000000 --- a/backend/ppocr/metrics/cls_metric.py +++ /dev/null @@ -1,46 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class ClsMetric(object): - def __init__(self, main_indicator='acc', **kwargs): - self.main_indicator = main_indicator - self.eps = 1e-5 - self.reset() - - def __call__(self, pred_label, *args, **kwargs): - preds, labels = pred_label - correct_num = 0 - all_num = 0 - for (pred, pred_conf), (target, _) in zip(preds, labels): - if pred == target: - correct_num += 1 - all_num += 1 - self.correct_num += correct_num - self.all_num += all_num - return {'acc': correct_num / (all_num + self.eps), } - - def get_metric(self): - """ - return metrics { - 'acc': 0 - } - """ - acc = self.correct_num / (self.all_num + self.eps) - self.reset() - return {'acc': acc} - - def reset(self): - self.correct_num = 0 - self.all_num = 0 diff --git a/backend/ppocr/metrics/det_metric.py b/backend/ppocr/metrics/det_metric.py deleted file mode 100644 index dca94c09..00000000 --- a/backend/ppocr/metrics/det_metric.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -__all__ = ['DetMetric', 'DetFCEMetric'] - -from .eval_det_iou import DetectionIoUEvaluator - - -class DetMetric(object): - def __init__(self, main_indicator='hmean', **kwargs): - self.evaluator = DetectionIoUEvaluator() - self.main_indicator = main_indicator - self.reset() - - def __call__(self, preds, batch, **kwargs): - ''' - batch: a list produced by dataloaders. - image: np.ndarray of shape (N, C, H, W). - ratio_list: np.ndarray of shape(N,2) - polygons: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions. - ignore_tags: np.ndarray of shape (N, K), indicates whether a region is ignorable or not. - preds: a list of dict produced by post process - points: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions. - ''' - gt_polyons_batch = batch[2] - ignore_tags_batch = batch[3] - for pred, gt_polyons, ignore_tags in zip(preds, gt_polyons_batch, - ignore_tags_batch): - # prepare gt - gt_info_list = [{ - 'points': gt_polyon, - 'text': '', - 'ignore': ignore_tag - } for gt_polyon, ignore_tag in zip(gt_polyons, ignore_tags)] - # prepare det - det_info_list = [{ - 'points': det_polyon, - 'text': '' - } for det_polyon in pred['points']] - result = self.evaluator.evaluate_image(gt_info_list, det_info_list) - self.results.append(result) - - def get_metric(self): - """ - return metrics { - 'precision': 0, - 'recall': 0, - 'hmean': 0 - } - """ - - metrics = self.evaluator.combine_results(self.results) - self.reset() - return metrics - - def reset(self): - self.results = [] # clear results - - -class DetFCEMetric(object): - def __init__(self, main_indicator='hmean', **kwargs): - self.evaluator = DetectionIoUEvaluator() - self.main_indicator = main_indicator - self.reset() - - def __call__(self, preds, batch, **kwargs): - ''' - batch: a list produced by dataloaders. - image: np.ndarray of shape (N, C, H, W). - ratio_list: np.ndarray of shape(N,2) - polygons: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions. - ignore_tags: np.ndarray of shape (N, K), indicates whether a region is ignorable or not. - preds: a list of dict produced by post process - points: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions. - ''' - gt_polyons_batch = batch[2] - ignore_tags_batch = batch[3] - - for pred, gt_polyons, ignore_tags in zip(preds, gt_polyons_batch, - ignore_tags_batch): - # prepare gt - gt_info_list = [{ - 'points': gt_polyon, - 'text': '', - 'ignore': ignore_tag - } for gt_polyon, ignore_tag in zip(gt_polyons, ignore_tags)] - # prepare det - det_info_list = [{ - 'points': det_polyon, - 'text': '', - 'score': score - } for det_polyon, score in zip(pred['points'], pred['scores'])] - - for score_thr in self.results.keys(): - det_info_list_thr = [ - det_info for det_info in det_info_list - if det_info['score'] >= score_thr - ] - result = self.evaluator.evaluate_image(gt_info_list, - det_info_list_thr) - self.results[score_thr].append(result) - - def get_metric(self): - """ - return metrics {'heman':0, - 'thr 0.3':'precision: 0 recall: 0 hmean: 0', - 'thr 0.4':'precision: 0 recall: 0 hmean: 0', - 'thr 0.5':'precision: 0 recall: 0 hmean: 0', - 'thr 0.6':'precision: 0 recall: 0 hmean: 0', - 'thr 0.7':'precision: 0 recall: 0 hmean: 0', - 'thr 0.8':'precision: 0 recall: 0 hmean: 0', - 'thr 0.9':'precision: 0 recall: 0 hmean: 0', - } - """ - metrics = {} - hmean = 0 - for score_thr in self.results.keys(): - metric = self.evaluator.combine_results(self.results[score_thr]) - # for key, value in metric.items(): - # metrics['{}_{}'.format(key, score_thr)] = value - metric_str = 'precision:{:.5f} recall:{:.5f} hmean:{:.5f}'.format( - metric['precision'], metric['recall'], metric['hmean']) - metrics['thr {}'.format(score_thr)] = metric_str - hmean = max(hmean, metric['hmean']) - metrics['hmean'] = hmean - - self.reset() - return metrics - - def reset(self): - self.results = { - 0.3: [], - 0.4: [], - 0.5: [], - 0.6: [], - 0.7: [], - 0.8: [], - 0.9: [] - } # clear results diff --git a/backend/ppocr/metrics/distillation_metric.py b/backend/ppocr/metrics/distillation_metric.py deleted file mode 100644 index c440cebd..00000000 --- a/backend/ppocr/metrics/distillation_metric.py +++ /dev/null @@ -1,73 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import importlib -import copy - -from .rec_metric import RecMetric -from .det_metric import DetMetric -from .e2e_metric import E2EMetric -from .cls_metric import ClsMetric - - -class DistillationMetric(object): - def __init__(self, - key=None, - base_metric_name=None, - main_indicator=None, - **kwargs): - self.main_indicator = main_indicator - self.key = key - self.main_indicator = main_indicator - self.base_metric_name = base_metric_name - self.kwargs = kwargs - self.metrics = None - - def _init_metrcis(self, preds): - self.metrics = dict() - mod = importlib.import_module(__name__) - for key in preds: - self.metrics[key] = getattr(mod, self.base_metric_name)( - main_indicator=self.main_indicator, **self.kwargs) - self.metrics[key].reset() - - def __call__(self, preds, batch, **kwargs): - assert isinstance(preds, dict) - if self.metrics is None: - self._init_metrcis(preds) - output = dict() - for key in preds: - self.metrics[key].__call__(preds[key], batch, **kwargs) - - def get_metric(self): - """ - return metrics { - 'acc': 0, - 'norm_edit_dis': 0, - } - """ - output = dict() - for key in self.metrics: - metric = self.metrics[key].get_metric() - # main indicator - if key == self.key: - output.update(metric) - else: - for sub_key in metric: - output["{}_{}".format(key, sub_key)] = metric[sub_key] - return output - - def reset(self): - for key in self.metrics: - self.metrics[key].reset() diff --git a/backend/ppocr/metrics/e2e_metric.py b/backend/ppocr/metrics/e2e_metric.py deleted file mode 100644 index 2f8ba3b2..00000000 --- a/backend/ppocr/metrics/e2e_metric.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -__all__ = ['E2EMetric'] - -from ppocr.utils.e2e_metric.Deteval import get_socre_A, get_socre_B, combine_results -from ppocr.utils.e2e_utils.extract_textpoint_slow import get_dict - - -class E2EMetric(object): - def __init__(self, - mode, - gt_mat_dir, - character_dict_path, - main_indicator='f_score_e2e', - **kwargs): - self.mode = mode - self.gt_mat_dir = gt_mat_dir - self.label_list = get_dict(character_dict_path) - self.max_index = len(self.label_list) - self.main_indicator = main_indicator - self.reset() - - def __call__(self, preds, batch, **kwargs): - if self.mode == 'A': - gt_polyons_batch = batch[2] - temp_gt_strs_batch = batch[3][0] - ignore_tags_batch = batch[4] - gt_strs_batch = [] - - for temp_list in temp_gt_strs_batch: - t = "" - for index in temp_list: - if index < self.max_index: - t += self.label_list[index] - gt_strs_batch.append(t) - - for pred, gt_polyons, gt_strs, ignore_tags in zip( - [preds], gt_polyons_batch, [gt_strs_batch], ignore_tags_batch): - # prepare gt - gt_info_list = [{ - 'points': gt_polyon, - 'text': gt_str, - 'ignore': ignore_tag - } for gt_polyon, gt_str, ignore_tag in - zip(gt_polyons, gt_strs, ignore_tags)] - # prepare det - e2e_info_list = [{ - 'points': det_polyon, - 'texts': pred_str - } for det_polyon, pred_str in - zip(pred['points'], pred['texts'])] - - result = get_socre_A(gt_info_list, e2e_info_list) - self.results.append(result) - else: - img_id = batch[5][0] - e2e_info_list = [{ - 'points': det_polyon, - 'texts': pred_str - } for det_polyon, pred_str in zip(preds['points'], preds['texts'])] - result = get_socre_B(self.gt_mat_dir, img_id, e2e_info_list) - self.results.append(result) - - def get_metric(self): - metrics = combine_results(self.results) - self.reset() - return metrics - - def reset(self): - self.results = [] # clear results diff --git a/backend/ppocr/metrics/eval_det_iou.py b/backend/ppocr/metrics/eval_det_iou.py deleted file mode 100644 index bc05e7df..00000000 --- a/backend/ppocr/metrics/eval_det_iou.py +++ /dev/null @@ -1,225 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -from collections import namedtuple -import numpy as np -from shapely.geometry import Polygon -""" -reference from : -https://github.com/MhLiao/DB/blob/3c32b808d4412680310d3d28eeb6a2d5bf1566c5/concern/icdar2015_eval/detection/iou.py#L8 -""" - - -class DetectionIoUEvaluator(object): - def __init__(self, iou_constraint=0.5, area_precision_constraint=0.5): - self.iou_constraint = iou_constraint - self.area_precision_constraint = area_precision_constraint - - def evaluate_image(self, gt, pred): - def get_union(pD, pG): - return Polygon(pD).union(Polygon(pG)).area - - def get_intersection_over_union(pD, pG): - return get_intersection(pD, pG) / get_union(pD, pG) - - def get_intersection(pD, pG): - return Polygon(pD).intersection(Polygon(pG)).area - - def compute_ap(confList, matchList, numGtCare): - correct = 0 - AP = 0 - if len(confList) > 0: - confList = np.array(confList) - matchList = np.array(matchList) - sorted_ind = np.argsort(-confList) - confList = confList[sorted_ind] - matchList = matchList[sorted_ind] - for n in range(len(confList)): - match = matchList[n] - if match: - correct += 1 - AP += float(correct) / (n + 1) - - if numGtCare > 0: - AP /= numGtCare - - return AP - - perSampleMetrics = {} - - matchedSum = 0 - - Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax') - - numGlobalCareGt = 0 - numGlobalCareDet = 0 - - arrGlobalConfidences = [] - arrGlobalMatches = [] - - recall = 0 - precision = 0 - hmean = 0 - - detMatched = 0 - - iouMat = np.empty([1, 1]) - - gtPols = [] - detPols = [] - - gtPolPoints = [] - detPolPoints = [] - - # Array of Ground Truth Polygons' keys marked as don't Care - gtDontCarePolsNum = [] - # Array of Detected Polygons' matched with a don't Care GT - detDontCarePolsNum = [] - - pairs = [] - detMatchedNums = [] - - arrSampleConfidences = [] - arrSampleMatch = [] - - evaluationLog = "" - - # print(len(gt)) - for n in range(len(gt)): - points = gt[n]['points'] - # transcription = gt[n]['text'] - dontCare = gt[n]['ignore'] - # points = Polygon(points) - # points = points.buffer(0) - if not Polygon(points).is_valid or not Polygon(points).is_simple: - continue - - gtPol = points - gtPols.append(gtPol) - gtPolPoints.append(points) - if dontCare: - gtDontCarePolsNum.append(len(gtPols) - 1) - - evaluationLog += "GT polygons: " + str(len(gtPols)) + ( - " (" + str(len(gtDontCarePolsNum)) + " don't care)\n" - if len(gtDontCarePolsNum) > 0 else "\n") - - for n in range(len(pred)): - points = pred[n]['points'] - # points = Polygon(points) - # points = points.buffer(0) - if not Polygon(points).is_valid or not Polygon(points).is_simple: - continue - - detPol = points - detPols.append(detPol) - detPolPoints.append(points) - if len(gtDontCarePolsNum) > 0: - for dontCarePol in gtDontCarePolsNum: - dontCarePol = gtPols[dontCarePol] - intersected_area = get_intersection(dontCarePol, detPol) - pdDimensions = Polygon(detPol).area - precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions - if (precision > self.area_precision_constraint): - detDontCarePolsNum.append(len(detPols) - 1) - break - - evaluationLog += "DET polygons: " + str(len(detPols)) + ( - " (" + str(len(detDontCarePolsNum)) + " don't care)\n" - if len(detDontCarePolsNum) > 0 else "\n") - - if len(gtPols) > 0 and len(detPols) > 0: - # Calculate IoU and precision matrixs - outputShape = [len(gtPols), len(detPols)] - iouMat = np.empty(outputShape) - gtRectMat = np.zeros(len(gtPols), np.int8) - detRectMat = np.zeros(len(detPols), np.int8) - for gtNum in range(len(gtPols)): - for detNum in range(len(detPols)): - pG = gtPols[gtNum] - pD = detPols[detNum] - iouMat[gtNum, detNum] = get_intersection_over_union(pD, pG) - - for gtNum in range(len(gtPols)): - for detNum in range(len(detPols)): - if gtRectMat[gtNum] == 0 and detRectMat[ - detNum] == 0 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum: - if iouMat[gtNum, detNum] > self.iou_constraint: - gtRectMat[gtNum] = 1 - detRectMat[detNum] = 1 - detMatched += 1 - pairs.append({'gt': gtNum, 'det': detNum}) - detMatchedNums.append(detNum) - evaluationLog += "Match GT #" + \ - str(gtNum) + " with Det #" + str(detNum) + "\n" - - numGtCare = (len(gtPols) - len(gtDontCarePolsNum)) - numDetCare = (len(detPols) - len(detDontCarePolsNum)) - if numGtCare == 0: - recall = float(1) - precision = float(0) if numDetCare > 0 else float(1) - else: - recall = float(detMatched) / numGtCare - precision = 0 if numDetCare == 0 else float(detMatched) / numDetCare - - hmean = 0 if (precision + recall) == 0 else 2.0 * \ - precision * recall / (precision + recall) - - matchedSum += detMatched - numGlobalCareGt += numGtCare - numGlobalCareDet += numDetCare - - perSampleMetrics = { - 'gtCare': numGtCare, - 'detCare': numDetCare, - 'detMatched': detMatched, - } - return perSampleMetrics - - def combine_results(self, results): - numGlobalCareGt = 0 - numGlobalCareDet = 0 - matchedSum = 0 - for result in results: - numGlobalCareGt += result['gtCare'] - numGlobalCareDet += result['detCare'] - matchedSum += result['detMatched'] - - methodRecall = 0 if numGlobalCareGt == 0 else float( - matchedSum) / numGlobalCareGt - methodPrecision = 0 if numGlobalCareDet == 0 else float( - matchedSum) / numGlobalCareDet - methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * \ - methodRecall * methodPrecision / ( - methodRecall + methodPrecision) - # print(methodRecall, methodPrecision, methodHmean) - # sys.exit(-1) - methodMetrics = { - 'precision': methodPrecision, - 'recall': methodRecall, - 'hmean': methodHmean - } - - return methodMetrics - - -if __name__ == '__main__': - evaluator = DetectionIoUEvaluator() - gts = [[{ - 'points': [(0, 0), (1, 0), (1, 1), (0, 1)], - 'text': 1234, - 'ignore': False, - }, { - 'points': [(2, 2), (3, 2), (3, 3), (2, 3)], - 'text': 5678, - 'ignore': False, - }]] - preds = [[{ - 'points': [(0.1, 0.1), (1, 0), (1, 1), (0, 1)], - 'text': 123, - 'ignore': False, - }]] - results = [] - for gt, pred in zip(gts, preds): - results.append(evaluator.evaluate_image(gt, pred)) - metrics = evaluator.combine_results(results) - print(metrics) diff --git a/backend/ppocr/metrics/kie_metric.py b/backend/ppocr/metrics/kie_metric.py deleted file mode 100644 index 28ab22b8..00000000 --- a/backend/ppocr/metrics/kie_metric.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# The code is refer from: https://github.com/open-mmlab/mmocr/blob/main/mmocr/core/evaluation/kie_metric.py - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import paddle - -__all__ = ['KIEMetric'] - - -class KIEMetric(object): - def __init__(self, main_indicator='hmean', **kwargs): - self.main_indicator = main_indicator - self.reset() - self.node = [] - self.gt = [] - - def __call__(self, preds, batch, **kwargs): - nodes, _ = preds - gts, tag = batch[4].squeeze(0), batch[5].tolist()[0] - gts = gts[:tag[0], :1].reshape([-1]) - self.node.append(nodes.numpy()) - self.gt.append(gts) - # result = self.compute_f1_score(nodes, gts) - # self.results.append(result) - - def compute_f1_score(self, preds, gts): - ignores = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25] - C = preds.shape[1] - classes = np.array(sorted(set(range(C)) - set(ignores))) - hist = np.bincount( - (gts * C).astype('int64') + preds.argmax(1), minlength=C - **2).reshape([C, C]).astype('float32') - diag = np.diag(hist) - recalls = diag / hist.sum(1).clip(min=1) - precisions = diag / hist.sum(0).clip(min=1) - f1 = 2 * recalls * precisions / (recalls + precisions).clip(min=1e-8) - return f1[classes] - - def combine_results(self, results): - node = np.concatenate(self.node, 0) - gts = np.concatenate(self.gt, 0) - results = self.compute_f1_score(node, gts) - data = {'hmean': results.mean()} - return data - - def get_metric(self): - - metrics = self.combine_results(self.results) - self.reset() - return metrics - - def reset(self): - self.results = [] # clear results - self.node = [] - self.gt = [] diff --git a/backend/ppocr/metrics/rec_metric.py b/backend/ppocr/metrics/rec_metric.py deleted file mode 100644 index 515b9372..00000000 --- a/backend/ppocr/metrics/rec_metric.py +++ /dev/null @@ -1,76 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import Levenshtein -import string - - -class RecMetric(object): - def __init__(self, - main_indicator='acc', - is_filter=False, - ignore_space=True, - **kwargs): - self.main_indicator = main_indicator - self.is_filter = is_filter - self.ignore_space = ignore_space - self.eps = 1e-5 - self.reset() - - def _normalize_text(self, text): - text = ''.join( - filter(lambda x: x in (string.digits + string.ascii_letters), text)) - return text.lower() - - def __call__(self, pred_label, *args, **kwargs): - preds, labels = pred_label - correct_num = 0 - all_num = 0 - norm_edit_dis = 0.0 - for (pred, pred_conf), (target, _) in zip(preds, labels): - if self.ignore_space: - pred = pred.replace(" ", "") - target = target.replace(" ", "") - if self.is_filter: - pred = self._normalize_text(pred) - target = self._normalize_text(target) - norm_edit_dis += Levenshtein.distance(pred, target) / max( - len(pred), len(target), 1) - if pred == target: - correct_num += 1 - all_num += 1 - self.correct_num += correct_num - self.all_num += all_num - self.norm_edit_dis += norm_edit_dis - return { - 'acc': correct_num / (all_num + self.eps), - 'norm_edit_dis': 1 - norm_edit_dis / (all_num + self.eps) - } - - def get_metric(self): - """ - return metrics { - 'acc': 0, - 'norm_edit_dis': 0, - } - """ - acc = 1.0 * self.correct_num / (self.all_num + self.eps) - norm_edit_dis = 1 - self.norm_edit_dis / (self.all_num + self.eps) - self.reset() - return {'acc': acc, 'norm_edit_dis': norm_edit_dis} - - def reset(self): - self.correct_num = 0 - self.all_num = 0 - self.norm_edit_dis = 0 diff --git a/backend/ppocr/metrics/table_metric.py b/backend/ppocr/metrics/table_metric.py deleted file mode 100644 index ca4d6474..00000000 --- a/backend/ppocr/metrics/table_metric.py +++ /dev/null @@ -1,51 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np - - -class TableMetric(object): - def __init__(self, main_indicator='acc', **kwargs): - self.main_indicator = main_indicator - self.eps = 1e-5 - self.reset() - - def __call__(self, pred, batch, *args, **kwargs): - structure_probs = pred['structure_probs'].numpy() - structure_labels = batch[1] - correct_num = 0 - all_num = 0 - structure_probs = np.argmax(structure_probs, axis=2) - structure_labels = structure_labels[:, 1:] - batch_size = structure_probs.shape[0] - for bno in range(batch_size): - all_num += 1 - if (structure_probs[bno] == structure_labels[bno]).all(): - correct_num += 1 - self.correct_num += correct_num - self.all_num += all_num - return {'acc': correct_num * 1.0 / (all_num + self.eps), } - - def get_metric(self): - """ - return metrics { - 'acc': 0, - } - """ - acc = 1.0 * self.correct_num / (self.all_num + self.eps) - self.reset() - return {'acc': acc} - - def reset(self): - self.correct_num = 0 - self.all_num = 0 diff --git a/backend/ppocr/metrics/vqa_token_re_metric.py b/backend/ppocr/metrics/vqa_token_re_metric.py deleted file mode 100644 index 8a13bc08..00000000 --- a/backend/ppocr/metrics/vqa_token_re_metric.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import paddle - -__all__ = ['KIEMetric'] - - -class VQAReTokenMetric(object): - def __init__(self, main_indicator='hmean', **kwargs): - self.main_indicator = main_indicator - self.reset() - - def __call__(self, preds, batch, **kwargs): - pred_relations, relations, entities = preds - self.pred_relations_list.extend(pred_relations) - self.relations_list.extend(relations) - self.entities_list.extend(entities) - - def get_metric(self): - gt_relations = [] - for b in range(len(self.relations_list)): - rel_sent = [] - for head, tail in zip(self.relations_list[b]["head"], - self.relations_list[b]["tail"]): - rel = {} - rel["head_id"] = head - rel["head"] = (self.entities_list[b]["start"][rel["head_id"]], - self.entities_list[b]["end"][rel["head_id"]]) - rel["head_type"] = self.entities_list[b]["label"][rel[ - "head_id"]] - - rel["tail_id"] = tail - rel["tail"] = (self.entities_list[b]["start"][rel["tail_id"]], - self.entities_list[b]["end"][rel["tail_id"]]) - rel["tail_type"] = self.entities_list[b]["label"][rel[ - "tail_id"]] - - rel["type"] = 1 - rel_sent.append(rel) - gt_relations.append(rel_sent) - re_metrics = self.re_score( - self.pred_relations_list, gt_relations, mode="boundaries") - metrics = { - "precision": re_metrics["ALL"]["p"], - "recall": re_metrics["ALL"]["r"], - "hmean": re_metrics["ALL"]["f1"], - } - self.reset() - return metrics - - def reset(self): - self.pred_relations_list = [] - self.relations_list = [] - self.entities_list = [] - - def re_score(self, pred_relations, gt_relations, mode="strict"): - """Evaluate RE predictions - - Args: - pred_relations (list) : list of list of predicted relations (several relations in each sentence) - gt_relations (list) : list of list of ground truth relations - - rel = { "head": (start_idx (inclusive), end_idx (exclusive)), - "tail": (start_idx (inclusive), end_idx (exclusive)), - "head_type": ent_type, - "tail_type": ent_type, - "type": rel_type} - - vocab (Vocab) : dataset vocabulary - mode (str) : in 'strict' or 'boundaries'""" - - assert mode in ["strict", "boundaries"] - - relation_types = [v for v in [0, 1] if not v == 0] - scores = { - rel: { - "tp": 0, - "fp": 0, - "fn": 0 - } - for rel in relation_types + ["ALL"] - } - - # Count GT relations and Predicted relations - n_sents = len(gt_relations) - n_rels = sum([len([rel for rel in sent]) for sent in gt_relations]) - n_found = sum([len([rel for rel in sent]) for sent in pred_relations]) - - # Count TP, FP and FN per type - for pred_sent, gt_sent in zip(pred_relations, gt_relations): - for rel_type in relation_types: - # strict mode takes argument types into account - if mode == "strict": - pred_rels = {(rel["head"], rel["head_type"], rel["tail"], - rel["tail_type"]) - for rel in pred_sent - if rel["type"] == rel_type} - gt_rels = {(rel["head"], rel["head_type"], rel["tail"], - rel["tail_type"]) - for rel in gt_sent if rel["type"] == rel_type} - - # boundaries mode only takes argument spans into account - elif mode == "boundaries": - pred_rels = {(rel["head"], rel["tail"]) - for rel in pred_sent - if rel["type"] == rel_type} - gt_rels = {(rel["head"], rel["tail"]) - for rel in gt_sent if rel["type"] == rel_type} - - scores[rel_type]["tp"] += len(pred_rels & gt_rels) - scores[rel_type]["fp"] += len(pred_rels - gt_rels) - scores[rel_type]["fn"] += len(gt_rels - pred_rels) - - # Compute per entity Precision / Recall / F1 - for rel_type in scores.keys(): - if scores[rel_type]["tp"]: - scores[rel_type]["p"] = scores[rel_type]["tp"] / ( - scores[rel_type]["fp"] + scores[rel_type]["tp"]) - scores[rel_type]["r"] = scores[rel_type]["tp"] / ( - scores[rel_type]["fn"] + scores[rel_type]["tp"]) - else: - scores[rel_type]["p"], scores[rel_type]["r"] = 0, 0 - - if not scores[rel_type]["p"] + scores[rel_type]["r"] == 0: - scores[rel_type]["f1"] = ( - 2 * scores[rel_type]["p"] * scores[rel_type]["r"] / - (scores[rel_type]["p"] + scores[rel_type]["r"])) - else: - scores[rel_type]["f1"] = 0 - - # Compute micro F1 Scores - tp = sum([scores[rel_type]["tp"] for rel_type in relation_types]) - fp = sum([scores[rel_type]["fp"] for rel_type in relation_types]) - fn = sum([scores[rel_type]["fn"] for rel_type in relation_types]) - - if tp: - precision = tp / (tp + fp) - recall = tp / (tp + fn) - f1 = 2 * precision * recall / (precision + recall) - - else: - precision, recall, f1 = 0, 0, 0 - - scores["ALL"]["p"] = precision - scores["ALL"]["r"] = recall - scores["ALL"]["f1"] = f1 - scores["ALL"]["tp"] = tp - scores["ALL"]["fp"] = fp - scores["ALL"]["fn"] = fn - - # Compute Macro F1 Scores - scores["ALL"]["Macro_f1"] = np.mean( - [scores[ent_type]["f1"] for ent_type in relation_types]) - scores["ALL"]["Macro_p"] = np.mean( - [scores[ent_type]["p"] for ent_type in relation_types]) - scores["ALL"]["Macro_r"] = np.mean( - [scores[ent_type]["r"] for ent_type in relation_types]) - - return scores diff --git a/backend/ppocr/metrics/vqa_token_ser_metric.py b/backend/ppocr/metrics/vqa_token_ser_metric.py deleted file mode 100644 index 286d8add..00000000 --- a/backend/ppocr/metrics/vqa_token_ser_metric.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import paddle - -__all__ = ['KIEMetric'] - - -class VQASerTokenMetric(object): - def __init__(self, main_indicator='hmean', **kwargs): - self.main_indicator = main_indicator - self.reset() - - def __call__(self, preds, batch, **kwargs): - preds, labels = preds - self.pred_list.extend(preds) - self.gt_list.extend(labels) - - def get_metric(self): - from seqeval.metrics import f1_score, precision_score, recall_score - metrics = { - "precision": precision_score(self.gt_list, self.pred_list), - "recall": recall_score(self.gt_list, self.pred_list), - "hmean": f1_score(self.gt_list, self.pred_list), - } - self.reset() - return metrics - - def reset(self): - self.pred_list = [] - self.gt_list = [] diff --git a/backend/ppocr/modeling/architectures/__init__.py b/backend/ppocr/modeling/architectures/__init__.py deleted file mode 100755 index e9a01cf0..00000000 --- a/backend/ppocr/modeling/architectures/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import copy -import importlib - -from .base_model import BaseModel -from .distillation_model import DistillationModel - -__all__ = ['build_model'] - - -def build_model(config): - config = copy.deepcopy(config) - if not "name" in config: - arch = BaseModel(config) - else: - name = config.pop("name") - mod = importlib.import_module(__name__) - arch = getattr(mod, name)(config) - return arch diff --git a/backend/ppocr/modeling/architectures/base_model.py b/backend/ppocr/modeling/architectures/base_model.py deleted file mode 100644 index c6b50d48..00000000 --- a/backend/ppocr/modeling/architectures/base_model.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from paddle import nn -from ppocr.modeling.transforms import build_transform -from ppocr.modeling.backbones import build_backbone -from ppocr.modeling.necks import build_neck -from ppocr.modeling.heads import build_head - -__all__ = ['BaseModel'] - - -class BaseModel(nn.Layer): - def __init__(self, config): - """ - the module for OCR. - args: - config (dict): the super parameters for module. - """ - super(BaseModel, self).__init__() - in_channels = config.get('in_channels', 3) - model_type = config['model_type'] - # build transfrom, - # for rec, transfrom can be TPS,None - # for det and cls, transfrom shoule to be None, - # if you make model differently, you can use transfrom in det and cls - if 'Transform' not in config or config['Transform'] is None: - self.use_transform = False - else: - self.use_transform = True - config['Transform']['in_channels'] = in_channels - self.transform = build_transform(config['Transform']) - in_channels = self.transform.out_channels - - # build backbone, backbone is need for del, rec and cls - config["Backbone"]['in_channels'] = in_channels - self.backbone = build_backbone(config["Backbone"], model_type) - in_channels = self.backbone.out_channels - - # build neck - # for rec, neck can be cnn,rnn or reshape(None) - # for det, neck can be FPN, BIFPN and so on. - # for cls, neck should be none - if 'Neck' not in config or config['Neck'] is None: - self.use_neck = False - else: - self.use_neck = True - config['Neck']['in_channels'] = in_channels - self.neck = build_neck(config['Neck']) - in_channels = self.neck.out_channels - - # # build head, head is need for det, rec and cls - if 'Head' not in config or config['Head'] is None: - self.use_head = False - else: - self.use_head = True - config["Head"]['in_channels'] = in_channels - self.head = build_head(config["Head"]) - - self.return_all_feats = config.get("return_all_feats", False) - - def forward(self, x, data=None): - y = dict() - if self.use_transform: - x = self.transform(x) - x = self.backbone(x) - y["backbone_out"] = x - if self.use_neck: - x = self.neck(x) - y["neck_out"] = x - if self.use_head: - x = self.head(x, targets=data) - # for multi head, save ctc neck out for udml - if isinstance(x, dict) and 'ctc_neck' in x.keys(): - y["neck_out"] = x["ctc_neck"] - y["head_out"] = x - elif isinstance(x, dict): - y.update(x) - else: - y["head_out"] = x - if self.return_all_feats: - if self.training: - return y - else: - return {"head_out": y["head_out"]} - else: - return x diff --git a/backend/ppocr/modeling/architectures/distillation_model.py b/backend/ppocr/modeling/architectures/distillation_model.py deleted file mode 100644 index cce8fd31..00000000 --- a/backend/ppocr/modeling/architectures/distillation_model.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn -from ppocr.modeling.transforms import build_transform -from ppocr.modeling.backbones import build_backbone -from ppocr.modeling.necks import build_neck -from ppocr.modeling.heads import build_head -from .base_model import BaseModel -from ppocr.utils.save_load import load_pretrained_params - -__all__ = ['DistillationModel'] - - -class DistillationModel(nn.Layer): - def __init__(self, config): - """ - the module for OCR distillation. - args: - config (dict): the super parameters for module. - """ - super().__init__() - self.model_list = [] - self.model_name_list = [] - for key in config["Models"]: - model_config = config["Models"][key] - freeze_params = False - pretrained = None - if "freeze_params" in model_config: - freeze_params = model_config.pop("freeze_params") - if "pretrained" in model_config: - pretrained = model_config.pop("pretrained") - model = BaseModel(model_config) - if pretrained is not None: - load_pretrained_params(model, pretrained) - if freeze_params: - for param in model.parameters(): - param.trainable = False - self.model_list.append(self.add_sublayer(key, model)) - self.model_name_list.append(key) - - def forward(self, x, data=None): - result_dict = dict() - for idx, model_name in enumerate(self.model_name_list): - result_dict[model_name] = self.model_list[idx](x, data) - return result_dict diff --git a/backend/ppocr/modeling/backbones/__init__.py b/backend/ppocr/modeling/backbones/__init__.py deleted file mode 100755 index 072d6e0f..00000000 --- a/backend/ppocr/modeling/backbones/__init__.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__all__ = ["build_backbone"] - - -def build_backbone(config, model_type): - if model_type == "det" or model_type == "table": - from .det_mobilenet_v3 import MobileNetV3 - from .det_resnet_vd import ResNet - from .det_resnet_vd_sast import ResNet_SAST - support_dict = ["MobileNetV3", "ResNet", "ResNet_SAST"] - elif model_type == "rec" or model_type == "cls": - from .rec_mobilenet_v3 import MobileNetV3 - from .rec_resnet_vd import ResNet - from .rec_resnet_fpn import ResNetFPN - from .rec_mv1_enhance import MobileNetV1Enhance - from .rec_nrtr_mtb import MTB - from .rec_resnet_31 import ResNet31 - from .rec_resnet_aster import ResNet_ASTER - from .rec_micronet import MicroNet - from .rec_efficientb3_pren import EfficientNetb3_PREN - from .rec_svtrnet import SVTRNet - support_dict = [ - 'MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB', - "ResNet31", "ResNet_ASTER", 'MicroNet', 'EfficientNetb3_PREN', - 'SVTRNet' - ] - elif model_type == "e2e": - from .e2e_resnet_vd_pg import ResNet - support_dict = ['ResNet'] - elif model_type == 'kie': - from .kie_unet_sdmgr import Kie_backbone - support_dict = ['Kie_backbone'] - elif model_type == "table": - from .table_resnet_vd import ResNet - from .table_mobilenet_v3 import MobileNetV3 - support_dict = ["ResNet", "MobileNetV3"] - elif model_type == 'vqa': - from .vqa_layoutlm import LayoutLMForSer, LayoutLMv2ForSer, LayoutLMv2ForRe, LayoutXLMForSer, LayoutXLMForRe - support_dict = [ - "LayoutLMForSer", "LayoutLMv2ForSer", 'LayoutLMv2ForRe', - "LayoutXLMForSer", 'LayoutXLMForRe' - ] - else: - raise NotImplementedError - - module_name = config.pop("name") - assert module_name in support_dict, Exception( - "when model typs is {}, backbone only support {}".format(model_type, - support_dict)) - module_class = eval(module_name)(**config) - return module_class diff --git a/backend/ppocr/modeling/backbones/det_mobilenet_v3.py b/backend/ppocr/modeling/backbones/det_mobilenet_v3.py deleted file mode 100755 index 05113ea8..00000000 --- a/backend/ppocr/modeling/backbones/det_mobilenet_v3.py +++ /dev/null @@ -1,268 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - -__all__ = ['MobileNetV3'] - - -def make_divisible(v, divisor=8, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -class MobileNetV3(nn.Layer): - def __init__(self, - in_channels=3, - model_name='large', - scale=0.5, - disable_se=False, - **kwargs): - """ - the MobilenetV3 backbone network for detection module. - Args: - params(dict): the super parameters for build network - """ - super(MobileNetV3, self).__init__() - - self.disable_se = disable_se - - if model_name == "large": - cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, False, 'relu', 1], - [3, 64, 24, False, 'relu', 2], - [3, 72, 24, False, 'relu', 1], - [5, 72, 40, True, 'relu', 2], - [5, 120, 40, True, 'relu', 1], - [5, 120, 40, True, 'relu', 1], - [3, 240, 80, False, 'hardswish', 2], - [3, 200, 80, False, 'hardswish', 1], - [3, 184, 80, False, 'hardswish', 1], - [3, 184, 80, False, 'hardswish', 1], - [3, 480, 112, True, 'hardswish', 1], - [3, 672, 112, True, 'hardswish', 1], - [5, 672, 160, True, 'hardswish', 2], - [5, 960, 160, True, 'hardswish', 1], - [5, 960, 160, True, 'hardswish', 1], - ] - cls_ch_squeeze = 960 - elif model_name == "small": - cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, True, 'relu', 2], - [3, 72, 24, False, 'relu', 2], - [3, 88, 24, False, 'relu', 1], - [5, 96, 40, True, 'hardswish', 2], - [5, 240, 40, True, 'hardswish', 1], - [5, 240, 40, True, 'hardswish', 1], - [5, 120, 48, True, 'hardswish', 1], - [5, 144, 48, True, 'hardswish', 1], - [5, 288, 96, True, 'hardswish', 2], - [5, 576, 96, True, 'hardswish', 1], - [5, 576, 96, True, 'hardswish', 1], - ] - cls_ch_squeeze = 576 - else: - raise NotImplementedError("mode[" + model_name + - "_model] is not implemented!") - - supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25] - assert scale in supported_scale, \ - "supported scale are {} but input scale is {}".format(supported_scale, scale) - inplanes = 16 - # conv1 - self.conv = ConvBNLayer( - in_channels=in_channels, - out_channels=make_divisible(inplanes * scale), - kernel_size=3, - stride=2, - padding=1, - groups=1, - if_act=True, - act='hardswish') - - self.stages = [] - self.out_channels = [] - block_list = [] - i = 0 - inplanes = make_divisible(inplanes * scale) - for (k, exp, c, se, nl, s) in cfg: - se = se and not self.disable_se - start_idx = 2 if model_name == 'large' else 0 - if s == 2 and i > start_idx: - self.out_channels.append(inplanes) - self.stages.append(nn.Sequential(*block_list)) - block_list = [] - block_list.append( - ResidualUnit( - in_channels=inplanes, - mid_channels=make_divisible(scale * exp), - out_channels=make_divisible(scale * c), - kernel_size=k, - stride=s, - use_se=se, - act=nl)) - inplanes = make_divisible(scale * c) - i += 1 - block_list.append( - ConvBNLayer( - in_channels=inplanes, - out_channels=make_divisible(scale * cls_ch_squeeze), - kernel_size=1, - stride=1, - padding=0, - groups=1, - if_act=True, - act='hardswish')) - self.stages.append(nn.Sequential(*block_list)) - self.out_channels.append(make_divisible(scale * cls_ch_squeeze)) - for i, stage in enumerate(self.stages): - self.add_sublayer(sublayer=stage, name="stage{}".format(i)) - - def forward(self, x): - x = self.conv(x) - out_list = [] - for stage in self.stages: - x = stage(x) - out_list.append(x) - return out_list - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - padding, - groups=1, - if_act=True, - act=None): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - bias_attr=False) - - self.bn = nn.BatchNorm(num_channels=out_channels, act=None) - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - if self.if_act: - if self.act == "relu": - x = F.relu(x) - elif self.act == "hardswish": - x = F.hardswish(x) - else: - print("The activation function({}) is selected incorrectly.". - format(self.act)) - exit() - return x - - -class ResidualUnit(nn.Layer): - def __init__(self, - in_channels, - mid_channels, - out_channels, - kernel_size, - stride, - use_se, - act=None): - super(ResidualUnit, self).__init__() - self.if_shortcut = stride == 1 and in_channels == out_channels - self.if_se = use_se - - self.expand_conv = ConvBNLayer( - in_channels=in_channels, - out_channels=mid_channels, - kernel_size=1, - stride=1, - padding=0, - if_act=True, - act=act) - self.bottleneck_conv = ConvBNLayer( - in_channels=mid_channels, - out_channels=mid_channels, - kernel_size=kernel_size, - stride=stride, - padding=int((kernel_size - 1) // 2), - groups=mid_channels, - if_act=True, - act=act) - if self.if_se: - self.mid_se = SEModule(mid_channels) - self.linear_conv = ConvBNLayer( - in_channels=mid_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - padding=0, - if_act=False, - act=None) - - def forward(self, inputs): - x = self.expand_conv(inputs) - x = self.bottleneck_conv(x) - if self.if_se: - x = self.mid_se(x) - x = self.linear_conv(x) - if self.if_shortcut: - x = paddle.add(inputs, x) - return x - - -class SEModule(nn.Layer): - def __init__(self, in_channels, reduction=4): - super(SEModule, self).__init__() - self.avg_pool = nn.AdaptiveAvgPool2D(1) - self.conv1 = nn.Conv2D( - in_channels=in_channels, - out_channels=in_channels // reduction, - kernel_size=1, - stride=1, - padding=0) - self.conv2 = nn.Conv2D( - in_channels=in_channels // reduction, - out_channels=in_channels, - kernel_size=1, - stride=1, - padding=0) - - def forward(self, inputs): - outputs = self.avg_pool(inputs) - outputs = self.conv1(outputs) - outputs = F.relu(outputs) - outputs = self.conv2(outputs) - outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5) - return inputs * outputs diff --git a/backend/ppocr/modeling/backbones/det_resnet_vd.py b/backend/ppocr/modeling/backbones/det_resnet_vd.py deleted file mode 100644 index 8c955a4a..00000000 --- a/backend/ppocr/modeling/backbones/det_resnet_vd.py +++ /dev/null @@ -1,351 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F - -from paddle.vision.ops import DeformConv2D -from paddle.regularizer import L2Decay -from paddle.nn.initializer import Normal, Constant, XavierUniform - -__all__ = ["ResNet"] - - -class DeformableConvV2(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - weight_attr=None, - bias_attr=None, - lr_scale=1, - regularizer=None, - skip_quant=False, - dcn_bias_regularizer=L2Decay(0.), - dcn_bias_lr_scale=2.): - super(DeformableConvV2, self).__init__() - self.offset_channel = 2 * kernel_size**2 * groups - self.mask_channel = kernel_size**2 * groups - - if bias_attr: - # in FCOS-DCN head, specifically need learning_rate and regularizer - dcn_bias_attr = ParamAttr( - initializer=Constant(value=0), - regularizer=dcn_bias_regularizer, - learning_rate=dcn_bias_lr_scale) - else: - # in ResNet backbone, do not need bias - dcn_bias_attr = False - self.conv_dcn = DeformConv2D( - in_channels, - out_channels, - kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2 * dilation, - dilation=dilation, - deformable_groups=groups, - weight_attr=weight_attr, - bias_attr=dcn_bias_attr) - - if lr_scale == 1 and regularizer is None: - offset_bias_attr = ParamAttr(initializer=Constant(0.)) - else: - offset_bias_attr = ParamAttr( - initializer=Constant(0.), - learning_rate=lr_scale, - regularizer=regularizer) - self.conv_offset = nn.Conv2D( - in_channels, - groups * 3 * kernel_size**2, - kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - weight_attr=ParamAttr(initializer=Constant(0.0)), - bias_attr=offset_bias_attr) - if skip_quant: - self.conv_offset.skip_quant = True - - def forward(self, x): - offset_mask = self.conv_offset(x) - offset, mask = paddle.split( - offset_mask, - num_or_sections=[self.offset_channel, self.mask_channel], - axis=1) - mask = F.sigmoid(mask) - y = self.conv_dcn(x, offset, mask=mask) - return y - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - is_vd_mode=False, - act=None, - is_dcn=False): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = nn.AvgPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - if not is_dcn: - self._conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - bias_attr=False) - else: - self._conv = DeformableConvV2( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=2, #groups, - bias_attr=False) - self._batch_norm = nn.BatchNorm(out_channels, act=act) - - def forward(self, inputs): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - def __init__( - self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - is_dcn=False, ): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - act='relu') - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - is_dcn=is_dcn) - self.conv2 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels * 4, - kernel_size=1, - act=None) - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels * 4, - kernel_size=1, - stride=1, - is_vd_mode=False if if_first else True) - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv2) - y = F.relu(y) - return y - - -class BasicBlock(nn.Layer): - def __init__( - self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, ): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu') - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - act=None) - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - is_vd_mode=False if if_first else True) - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv1) - y = F.relu(y) - return y - - -class ResNet(nn.Layer): - def __init__(self, - in_channels=3, - layers=50, - dcn_stage=None, - out_indices=None, - **kwargs): - super(ResNet, self).__init__() - - self.layers = layers - supported_layers = [18, 34, 50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format( - supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_channels = [64, 256, 512, - 1024] if layers >= 50 else [64, 64, 128, 256] - num_filters = [64, 128, 256, 512] - - self.dcn_stage = dcn_stage if dcn_stage is not None else [ - False, False, False, False - ] - self.out_indices = out_indices if out_indices is not None else [ - 0, 1, 2, 3 - ] - - self.conv1_1 = ConvBNLayer( - in_channels=in_channels, - out_channels=32, - kernel_size=3, - stride=2, - act='relu') - self.conv1_2 = ConvBNLayer( - in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - act='relu') - self.conv1_3 = ConvBNLayer( - in_channels=32, - out_channels=64, - kernel_size=3, - stride=1, - act='relu') - self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) - - self.stages = [] - self.out_channels = [] - if layers >= 50: - for block in range(len(depth)): - block_list = [] - shortcut = False - is_dcn = self.dcn_stage[block] - for i in range(depth[block]): - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - is_dcn=is_dcn)) - shortcut = True - block_list.append(bottleneck_block) - if block in self.out_indices: - self.out_channels.append(num_filters[block] * 4) - self.stages.append(nn.Sequential(*block_list)) - else: - for block in range(len(depth)): - block_list = [] - shortcut = False - # is_dcn = self.dcn_stage[block] - for i in range(depth[block]): - basic_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BasicBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block], - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0)) - shortcut = True - block_list.append(basic_block) - if block in self.out_indices: - self.out_channels.append(num_filters[block]) - self.stages.append(nn.Sequential(*block_list)) - - def forward(self, inputs): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - out = [] - for i, block in enumerate(self.stages): - y = block(y) - if i in self.out_indices: - out.append(y) - return out diff --git a/backend/ppocr/modeling/backbones/det_resnet_vd_sast.py b/backend/ppocr/modeling/backbones/det_resnet_vd_sast.py deleted file mode 100644 index c9376a8d..00000000 --- a/backend/ppocr/modeling/backbones/det_resnet_vd_sast.py +++ /dev/null @@ -1,285 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F - -__all__ = ["ResNet_SAST"] - - -class ConvBNLayer(nn.Layer): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - is_vd_mode=False, - act=None, - name=None, ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = nn.AvgPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = nn.BatchNorm( - out_channels, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels * 4, - kernel_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels * 4, - kernel_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv2) - y = F.relu(y) - return y - - -class BasicBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv1) - y = F.relu(y) - return y - - -class ResNet_SAST(nn.Layer): - def __init__(self, in_channels=3, layers=50, **kwargs): - super(ResNet_SAST, self).__init__() - - self.layers = layers - supported_layers = [18, 34, 50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format( - supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - # depth = [3, 4, 6, 3] - depth = [3, 4, 6, 3, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - # num_channels = [64, 256, 512, - # 1024] if layers >= 50 else [64, 64, 128, 256] - # num_filters = [64, 128, 256, 512] - num_channels = [64, 256, 512, - 1024, 2048] if layers >= 50 else [64, 64, 128, 256] - num_filters = [64, 128, 256, 512, 512] - - self.conv1_1 = ConvBNLayer( - in_channels=in_channels, - out_channels=32, - kernel_size=3, - stride=2, - act='relu', - name="conv1_1") - self.conv1_2 = ConvBNLayer( - in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - act='relu', - name="conv1_2") - self.conv1_3 = ConvBNLayer( - in_channels=32, - out_channels=64, - kernel_size=3, - stride=1, - act='relu', - name="conv1_3") - self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) - - self.stages = [] - self.out_channels = [3, 64] - if layers >= 50: - for block in range(len(depth)): - block_list = [] - shortcut = False - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - block_list.append(bottleneck_block) - self.out_channels.append(num_filters[block] * 4) - self.stages.append(nn.Sequential(*block_list)) - else: - for block in range(len(depth)): - block_list = [] - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - basic_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BasicBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block], - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - block_list.append(basic_block) - self.out_channels.append(num_filters[block]) - self.stages.append(nn.Sequential(*block_list)) - - def forward(self, inputs): - out = [inputs] - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - out.append(y) - y = self.pool2d_max(y) - for block in self.stages: - y = block(y) - out.append(y) - return out \ No newline at end of file diff --git a/backend/ppocr/modeling/backbones/e2e_resnet_vd_pg.py b/backend/ppocr/modeling/backbones/e2e_resnet_vd_pg.py deleted file mode 100644 index 97afd346..00000000 --- a/backend/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +++ /dev/null @@ -1,265 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F - -__all__ = ["ResNet"] - - -class ConvBNLayer(nn.Layer): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - is_vd_mode=False, - act=None, - name=None, ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = nn.AvgPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = nn.BatchNorm( - out_channels, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels * 4, - kernel_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels * 4, - kernel_size=1, - stride=stride, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv2) - y = F.relu(y) - return y - - -class BasicBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv1) - y = F.relu(y) - return y - - -class ResNet(nn.Layer): - def __init__(self, in_channels=3, layers=50, **kwargs): - super(ResNet, self).__init__() - - self.layers = layers - supported_layers = [18, 34, 50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format( - supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - # depth = [3, 4, 6, 3] - depth = [3, 4, 6, 3, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_channels = [64, 256, 512, 1024, - 2048] if layers >= 50 else [64, 64, 128, 256] - num_filters = [64, 128, 256, 512, 512] - - self.conv1_1 = ConvBNLayer( - in_channels=in_channels, - out_channels=64, - kernel_size=7, - stride=2, - act='relu', - name="conv1_1") - self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) - - self.stages = [] - self.out_channels = [3, 64] - # num_filters = [64, 128, 256, 512, 512] - if layers >= 50: - for block in range(len(depth)): - block_list = [] - shortcut = False - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - block_list.append(bottleneck_block) - self.out_channels.append(num_filters[block] * 4) - self.stages.append(nn.Sequential(*block_list)) - else: - for block in range(len(depth)): - block_list = [] - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - basic_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BasicBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block], - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - block_list.append(basic_block) - self.out_channels.append(num_filters[block]) - self.stages.append(nn.Sequential(*block_list)) - - def forward(self, inputs): - out = [inputs] - y = self.conv1_1(inputs) - out.append(y) - y = self.pool2d_max(y) - for block in self.stages: - y = block(y) - out.append(y) - return out diff --git a/backend/ppocr/modeling/backbones/kie_unet_sdmgr.py b/backend/ppocr/modeling/backbones/kie_unet_sdmgr.py deleted file mode 100644 index 545e4e75..00000000 --- a/backend/ppocr/modeling/backbones/kie_unet_sdmgr.py +++ /dev/null @@ -1,186 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import numpy as np -import cv2 - -__all__ = ["Kie_backbone"] - - -class Encoder(nn.Layer): - def __init__(self, num_channels, num_filters): - super(Encoder, self).__init__() - self.conv1 = nn.Conv2D( - num_channels, - num_filters, - kernel_size=3, - stride=1, - padding=1, - bias_attr=False) - self.bn1 = nn.BatchNorm(num_filters, act='relu') - - self.conv2 = nn.Conv2D( - num_filters, - num_filters, - kernel_size=3, - stride=1, - padding=1, - bias_attr=False) - self.bn2 = nn.BatchNorm(num_filters, act='relu') - - self.pool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) - - def forward(self, inputs): - x = self.conv1(inputs) - x = self.bn1(x) - x = self.conv2(x) - x = self.bn2(x) - x_pooled = self.pool(x) - return x, x_pooled - - -class Decoder(nn.Layer): - def __init__(self, num_channels, num_filters): - super(Decoder, self).__init__() - - self.conv1 = nn.Conv2D( - num_channels, - num_filters, - kernel_size=3, - stride=1, - padding=1, - bias_attr=False) - self.bn1 = nn.BatchNorm(num_filters, act='relu') - - self.conv2 = nn.Conv2D( - num_filters, - num_filters, - kernel_size=3, - stride=1, - padding=1, - bias_attr=False) - self.bn2 = nn.BatchNorm(num_filters, act='relu') - - self.conv0 = nn.Conv2D( - num_channels, - num_filters, - kernel_size=1, - stride=1, - padding=0, - bias_attr=False) - self.bn0 = nn.BatchNorm(num_filters, act='relu') - - def forward(self, inputs_prev, inputs): - x = self.conv0(inputs) - x = self.bn0(x) - x = paddle.nn.functional.interpolate( - x, scale_factor=2, mode='bilinear', align_corners=False) - x = paddle.concat([inputs_prev, x], axis=1) - x = self.conv1(x) - x = self.bn1(x) - x = self.conv2(x) - x = self.bn2(x) - return x - - -class UNet(nn.Layer): - def __init__(self): - super(UNet, self).__init__() - self.down1 = Encoder(num_channels=3, num_filters=16) - self.down2 = Encoder(num_channels=16, num_filters=32) - self.down3 = Encoder(num_channels=32, num_filters=64) - self.down4 = Encoder(num_channels=64, num_filters=128) - self.down5 = Encoder(num_channels=128, num_filters=256) - - self.up1 = Decoder(32, 16) - self.up2 = Decoder(64, 32) - self.up3 = Decoder(128, 64) - self.up4 = Decoder(256, 128) - self.out_channels = 16 - - def forward(self, inputs): - x1, _ = self.down1(inputs) - _, x2 = self.down2(x1) - _, x3 = self.down3(x2) - _, x4 = self.down4(x3) - _, x5 = self.down5(x4) - - x = self.up4(x4, x5) - x = self.up3(x3, x) - x = self.up2(x2, x) - x = self.up1(x1, x) - return x - - -class Kie_backbone(nn.Layer): - def __init__(self, in_channels, **kwargs): - super(Kie_backbone, self).__init__() - self.out_channels = 16 - self.img_feat = UNet() - self.maxpool = nn.MaxPool2D(kernel_size=7) - - def bbox2roi(self, bbox_list): - rois_list = [] - rois_num = [] - for img_id, bboxes in enumerate(bbox_list): - rois_num.append(bboxes.shape[0]) - rois_list.append(bboxes) - rois = paddle.concat(rois_list, 0) - rois_num = paddle.to_tensor(rois_num, dtype='int32') - return rois, rois_num - - def pre_process(self, img, relations, texts, gt_bboxes, tag, img_size): - img, relations, texts, gt_bboxes, tag, img_size = img.numpy( - ), relations.numpy(), texts.numpy(), gt_bboxes.numpy(), tag.numpy( - ).tolist(), img_size.numpy() - temp_relations, temp_texts, temp_gt_bboxes = [], [], [] - h, w = int(np.max(img_size[:, 0])), int(np.max(img_size[:, 1])) - img = paddle.to_tensor(img[:, :, :h, :w]) - batch = len(tag) - for i in range(batch): - num, recoder_len = tag[i][0], tag[i][1] - temp_relations.append( - paddle.to_tensor( - relations[i, :num, :num, :], dtype='float32')) - temp_texts.append( - paddle.to_tensor( - texts[i, :num, :recoder_len], dtype='float32')) - temp_gt_bboxes.append( - paddle.to_tensor( - gt_bboxes[i, :num, ...], dtype='float32')) - return img, temp_relations, temp_texts, temp_gt_bboxes - - def forward(self, inputs): - img = inputs[0] - relations, texts, gt_bboxes, tag, img_size = inputs[1], inputs[ - 2], inputs[3], inputs[5], inputs[-1] - img, relations, texts, gt_bboxes = self.pre_process( - img, relations, texts, gt_bboxes, tag, img_size) - x = self.img_feat(img) - boxes, rois_num = self.bbox2roi(gt_bboxes) - feats = paddle.fluid.layers.roi_align( - x, - boxes, - spatial_scale=1.0, - pooled_height=7, - pooled_width=7, - rois_num=rois_num) - feats = self.maxpool(feats).squeeze(-1).squeeze(-1) - return [relations, texts, feats] diff --git a/backend/ppocr/modeling/backbones/rec_efficientb3_pren.py b/backend/ppocr/modeling/backbones/rec_efficientb3_pren.py deleted file mode 100644 index 57eef178..00000000 --- a/backend/ppocr/modeling/backbones/rec_efficientb3_pren.py +++ /dev/null @@ -1,228 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Code is refer from: -https://github.com/RuijieJ/pren/blob/main/Nets/EfficientNet.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import namedtuple -import paddle -import paddle.nn as nn -import paddle.nn.functional as F - -__all__ = ['EfficientNetb3'] - - -class EffB3Params: - @staticmethod - def get_global_params(): - """ - The fllowing are efficientnetb3's arch superparams, but to fit for scene - text recognition task, the resolution(image_size) here is changed - from 300 to 64. - """ - GlobalParams = namedtuple('GlobalParams', [ - 'drop_connect_rate', 'width_coefficient', 'depth_coefficient', - 'depth_divisor', 'image_size' - ]) - global_params = GlobalParams( - drop_connect_rate=0.3, - width_coefficient=1.2, - depth_coefficient=1.4, - depth_divisor=8, - image_size=64) - return global_params - - @staticmethod - def get_block_params(): - BlockParams = namedtuple('BlockParams', [ - 'kernel_size', 'num_repeat', 'input_filters', 'output_filters', - 'expand_ratio', 'id_skip', 'se_ratio', 'stride' - ]) - block_params = [ - BlockParams(3, 1, 32, 16, 1, True, 0.25, 1), - BlockParams(3, 2, 16, 24, 6, True, 0.25, 2), - BlockParams(5, 2, 24, 40, 6, True, 0.25, 2), - BlockParams(3, 3, 40, 80, 6, True, 0.25, 2), - BlockParams(5, 3, 80, 112, 6, True, 0.25, 1), - BlockParams(5, 4, 112, 192, 6, True, 0.25, 2), - BlockParams(3, 1, 192, 320, 6, True, 0.25, 1) - ] - return block_params - - -class EffUtils: - @staticmethod - def round_filters(filters, global_params): - """Calculate and round number of filters based on depth multiplier.""" - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - filters *= multiplier - new_filters = int(filters + divisor / 2) // divisor * divisor - if new_filters < 0.9 * filters: - new_filters += divisor - return int(new_filters) - - @staticmethod - def round_repeats(repeats, global_params): - """Round number of filters based on depth multiplier.""" - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) - - -class ConvBlock(nn.Layer): - def __init__(self, block_params): - super(ConvBlock, self).__init__() - self.block_args = block_params - self.has_se = (self.block_args.se_ratio is not None) and \ - (0 < self.block_args.se_ratio <= 1) - self.id_skip = block_params.id_skip - - # expansion phase - self.input_filters = self.block_args.input_filters - output_filters = \ - self.block_args.input_filters * self.block_args.expand_ratio - if self.block_args.expand_ratio != 1: - self.expand_conv = nn.Conv2D( - self.input_filters, output_filters, 1, bias_attr=False) - self.bn0 = nn.BatchNorm(output_filters) - - # depthwise conv phase - k = self.block_args.kernel_size - s = self.block_args.stride - self.depthwise_conv = nn.Conv2D( - output_filters, - output_filters, - groups=output_filters, - kernel_size=k, - stride=s, - padding='same', - bias_attr=False) - self.bn1 = nn.BatchNorm(output_filters) - - # squeeze and excitation layer, if desired - if self.has_se: - num_squeezed_channels = max(1, - int(self.block_args.input_filters * - self.block_args.se_ratio)) - self.se_reduce = nn.Conv2D(output_filters, num_squeezed_channels, 1) - self.se_expand = nn.Conv2D(num_squeezed_channels, output_filters, 1) - - # output phase - self.final_oup = self.block_args.output_filters - self.project_conv = nn.Conv2D( - output_filters, self.final_oup, 1, bias_attr=False) - self.bn2 = nn.BatchNorm(self.final_oup) - self.swish = nn.Swish() - - def drop_connect(self, inputs, p, training): - if not training: - return inputs - - batch_size = inputs.shape[0] - keep_prob = 1 - p - random_tensor = keep_prob - random_tensor += paddle.rand([batch_size, 1, 1, 1], dtype=inputs.dtype) - random_tensor = paddle.to_tensor(random_tensor, place=inputs.place) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - def forward(self, inputs, drop_connect_rate=None): - # expansion and depthwise conv - x = inputs - if self.block_args.expand_ratio != 1: - x = self.swish(self.bn0(self.expand_conv(inputs))) - x = self.swish(self.bn1(self.depthwise_conv(x))) - - # squeeze and excitation - if self.has_se: - x_squeezed = F.adaptive_avg_pool2d(x, 1) - x_squeezed = self.se_expand(self.swish(self.se_reduce(x_squeezed))) - x = F.sigmoid(x_squeezed) * x - x = self.bn2(self.project_conv(x)) - - # skip conntection and drop connect - if self.id_skip and self.block_args.stride == 1 and \ - self.input_filters == self.final_oup: - if drop_connect_rate: - x = self.drop_connect( - x, p=drop_connect_rate, training=self.training) - x = x + inputs - return x - - -class EfficientNetb3_PREN(nn.Layer): - def __init__(self, in_channels): - super(EfficientNetb3_PREN, self).__init__() - self.blocks_params = EffB3Params.get_block_params() - self.global_params = EffB3Params.get_global_params() - self.out_channels = [] - # stem - stem_channels = EffUtils.round_filters(32, self.global_params) - self.conv_stem = nn.Conv2D( - in_channels, stem_channels, 3, 2, padding='same', bias_attr=False) - self.bn0 = nn.BatchNorm(stem_channels) - - self.blocks = [] - # to extract three feature maps for fpn based on efficientnetb3 backbone - self.concerned_block_idxes = [7, 17, 25] - concerned_idx = 0 - for i, block_params in enumerate(self.blocks_params): - block_params = block_params._replace( - input_filters=EffUtils.round_filters(block_params.input_filters, - self.global_params), - output_filters=EffUtils.round_filters( - block_params.output_filters, self.global_params), - num_repeat=EffUtils.round_repeats(block_params.num_repeat, - self.global_params)) - self.blocks.append( - self.add_sublayer("{}-0".format(i), ConvBlock(block_params))) - concerned_idx += 1 - if concerned_idx in self.concerned_block_idxes: - self.out_channels.append(block_params.output_filters) - if block_params.num_repeat > 1: - block_params = block_params._replace( - input_filters=block_params.output_filters, stride=1) - for j in range(block_params.num_repeat - 1): - self.blocks.append( - self.add_sublayer('{}-{}'.format(i, j + 1), - ConvBlock(block_params))) - concerned_idx += 1 - if concerned_idx in self.concerned_block_idxes: - self.out_channels.append(block_params.output_filters) - - self.swish = nn.Swish() - - def forward(self, inputs): - outs = [] - - x = self.swish(self.bn0(self.conv_stem(inputs))) - for idx, block in enumerate(self.blocks): - drop_connect_rate = self.global_params.drop_connect_rate - if drop_connect_rate: - drop_connect_rate *= float(idx) / len(self.blocks) - x = block(x, drop_connect_rate=drop_connect_rate) - if idx in self.concerned_block_idxes: - outs.append(x) - return outs diff --git a/backend/ppocr/modeling/backbones/rec_micronet.py b/backend/ppocr/modeling/backbones/rec_micronet.py deleted file mode 100644 index b0ae5a14..00000000 --- a/backend/ppocr/modeling/backbones/rec_micronet.py +++ /dev/null @@ -1,528 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/liyunsheng13/micronet/blob/main/backbone/micronet.py -https://github.com/liyunsheng13/micronet/blob/main/backbone/activation.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -import paddle.nn as nn - -from ppocr.modeling.backbones.det_mobilenet_v3 import make_divisible - -M0_cfgs = [ - # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r - [2, 1, 8, 3, 2, 2, 0, 4, 8, 2, 2, 2, 0, 1, 1], - [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 2, 1, 1], - [2, 1, 16, 5, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1], - [1, 1, 32, 5, 1, 4, 4, 4, 32, 4, 4, 2, 2, 1, 1], - [2, 1, 64, 5, 1, 4, 8, 8, 64, 8, 8, 2, 2, 1, 1], - [1, 1, 96, 3, 1, 4, 8, 8, 96, 8, 8, 2, 2, 1, 2], - [1, 1, 384, 3, 1, 4, 12, 12, 0, 0, 0, 2, 2, 1, 2], -] -M1_cfgs = [ - # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4 - [2, 1, 8, 3, 2, 2, 0, 6, 8, 2, 2, 2, 0, 1, 1], - [2, 1, 16, 3, 2, 2, 0, 8, 16, 4, 4, 2, 2, 1, 1], - [2, 1, 16, 5, 2, 2, 0, 16, 16, 4, 4, 2, 2, 1, 1], - [1, 1, 32, 5, 1, 6, 4, 4, 32, 4, 4, 2, 2, 1, 1], - [2, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 1], - [1, 1, 96, 3, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2], - [1, 1, 576, 3, 1, 6, 12, 12, 0, 0, 0, 2, 2, 1, 2], -] -M2_cfgs = [ - # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4 - [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 0, 1, 1], - [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1], - [1, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 2, 2, 1, 1], - [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 2, 2, 1, 1], - [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 2, 2, 1, 2], - [1, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 2], - [2, 1, 96, 5, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2], - [1, 1, 128, 3, 1, 6, 12, 12, 128, 8, 8, 2, 2, 1, 2], - [1, 1, 768, 3, 1, 6, 16, 16, 0, 0, 0, 2, 2, 1, 2], -] -M3_cfgs = [ - # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4 - [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 0, 2, 0, 1], - [2, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 0, 2, 0, 1], - [1, 1, 24, 3, 2, 2, 0, 24, 24, 4, 4, 0, 2, 0, 1], - [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 0, 2, 0, 1], - [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 0, 2, 0, 2], - [1, 1, 64, 5, 1, 6, 8, 8, 48, 8, 8, 0, 2, 0, 2], - [1, 1, 80, 5, 1, 6, 8, 8, 80, 8, 8, 0, 2, 0, 2], - [1, 1, 80, 5, 1, 6, 10, 10, 80, 8, 8, 0, 2, 0, 2], - [1, 1, 120, 5, 1, 6, 10, 10, 120, 10, 10, 0, 2, 0, 2], - [1, 1, 120, 5, 1, 6, 12, 12, 120, 10, 10, 0, 2, 0, 2], - [1, 1, 144, 3, 1, 6, 12, 12, 144, 12, 12, 0, 2, 0, 2], - [1, 1, 432, 3, 1, 3, 12, 12, 0, 0, 0, 0, 2, 0, 2], -] - - -def get_micronet_config(mode): - return eval(mode + '_cfgs') - - -class MaxGroupPooling(nn.Layer): - def __init__(self, channel_per_group=2): - super(MaxGroupPooling, self).__init__() - self.channel_per_group = channel_per_group - - def forward(self, x): - if self.channel_per_group == 1: - return x - # max op - b, c, h, w = x.shape - - # reshape - y = paddle.reshape(x, [b, c // self.channel_per_group, -1, h, w]) - out = paddle.max(y, axis=2) - return out - - -class SpatialSepConvSF(nn.Layer): - def __init__(self, inp, oups, kernel_size, stride): - super(SpatialSepConvSF, self).__init__() - - oup1, oup2 = oups - self.conv = nn.Sequential( - nn.Conv2D( - inp, - oup1, (kernel_size, 1), (stride, 1), (kernel_size // 2, 0), - bias_attr=False, - groups=1), - nn.BatchNorm2D(oup1), - nn.Conv2D( - oup1, - oup1 * oup2, (1, kernel_size), (1, stride), - (0, kernel_size // 2), - bias_attr=False, - groups=oup1), - nn.BatchNorm2D(oup1 * oup2), - ChannelShuffle(oup1), ) - - def forward(self, x): - out = self.conv(x) - return out - - -class ChannelShuffle(nn.Layer): - def __init__(self, groups): - super(ChannelShuffle, self).__init__() - self.groups = groups - - def forward(self, x): - b, c, h, w = x.shape - - channels_per_group = c // self.groups - - # reshape - x = paddle.reshape(x, [b, self.groups, channels_per_group, h, w]) - - x = paddle.transpose(x, (0, 2, 1, 3, 4)) - out = paddle.reshape(x, [b, -1, h, w]) - - return out - - -class StemLayer(nn.Layer): - def __init__(self, inp, oup, stride, groups=(4, 4)): - super(StemLayer, self).__init__() - - g1, g2 = groups - self.stem = nn.Sequential( - SpatialSepConvSF(inp, groups, 3, stride), - MaxGroupPooling(2) if g1 * g2 == 2 * oup else nn.ReLU6()) - - def forward(self, x): - out = self.stem(x) - return out - - -class DepthSpatialSepConv(nn.Layer): - def __init__(self, inp, expand, kernel_size, stride): - super(DepthSpatialSepConv, self).__init__() - - exp1, exp2 = expand - - hidden_dim = inp * exp1 - oup = inp * exp1 * exp2 - - self.conv = nn.Sequential( - nn.Conv2D( - inp, - inp * exp1, (kernel_size, 1), (stride, 1), - (kernel_size // 2, 0), - bias_attr=False, - groups=inp), - nn.BatchNorm2D(inp * exp1), - nn.Conv2D( - hidden_dim, - oup, (1, kernel_size), - 1, (0, kernel_size // 2), - bias_attr=False, - groups=hidden_dim), - nn.BatchNorm2D(oup)) - - def forward(self, x): - x = self.conv(x) - return x - - -class GroupConv(nn.Layer): - def __init__(self, inp, oup, groups=2): - super(GroupConv, self).__init__() - self.inp = inp - self.oup = oup - self.groups = groups - self.conv = nn.Sequential( - nn.Conv2D( - inp, oup, 1, 1, 0, bias_attr=False, groups=self.groups[0]), - nn.BatchNorm2D(oup)) - - def forward(self, x): - x = self.conv(x) - return x - - -class DepthConv(nn.Layer): - def __init__(self, inp, oup, kernel_size, stride): - super(DepthConv, self).__init__() - self.conv = nn.Sequential( - nn.Conv2D( - inp, - oup, - kernel_size, - stride, - kernel_size // 2, - bias_attr=False, - groups=inp), - nn.BatchNorm2D(oup)) - - def forward(self, x): - out = self.conv(x) - return out - - -class DYShiftMax(nn.Layer): - def __init__(self, - inp, - oup, - reduction=4, - act_max=1.0, - act_relu=True, - init_a=[0.0, 0.0], - init_b=[0.0, 0.0], - relu_before_pool=False, - g=None, - expansion=False): - super(DYShiftMax, self).__init__() - self.oup = oup - self.act_max = act_max * 2 - self.act_relu = act_relu - self.avg_pool = nn.Sequential(nn.ReLU() if relu_before_pool == True else - nn.Sequential(), nn.AdaptiveAvgPool2D(1)) - - self.exp = 4 if act_relu else 2 - self.init_a = init_a - self.init_b = init_b - - # determine squeeze - squeeze = make_divisible(inp // reduction, 4) - if squeeze < 4: - squeeze = 4 - - self.fc = nn.Sequential( - nn.Linear(inp, squeeze), - nn.ReLU(), nn.Linear(squeeze, oup * self.exp), nn.Hardsigmoid()) - - if g is None: - g = 1 - self.g = g[1] - if self.g != 1 and expansion: - self.g = inp // self.g - - self.gc = inp // self.g - index = paddle.to_tensor([range(inp)]) - index = paddle.reshape(index, [1, inp, 1, 1]) - index = paddle.reshape(index, [1, self.g, self.gc, 1, 1]) - indexgs = paddle.split(index, [1, self.g - 1], axis=1) - indexgs = paddle.concat((indexgs[1], indexgs[0]), axis=1) - indexs = paddle.split(indexgs, [1, self.gc - 1], axis=2) - indexs = paddle.concat((indexs[1], indexs[0]), axis=2) - self.index = paddle.reshape(indexs, [inp]) - self.expansion = expansion - - def forward(self, x): - x_in = x - x_out = x - - b, c, _, _ = x_in.shape - y = self.avg_pool(x_in) - y = paddle.reshape(y, [b, c]) - y = self.fc(y) - y = paddle.reshape(y, [b, self.oup * self.exp, 1, 1]) - y = (y - 0.5) * self.act_max - - n2, c2, h2, w2 = x_out.shape - x2 = paddle.to_tensor(x_out.numpy()[:, self.index.numpy(), :, :]) - - if self.exp == 4: - temp = y.shape - a1, b1, a2, b2 = paddle.split(y, temp[1] // self.oup, axis=1) - - a1 = a1 + self.init_a[0] - a2 = a2 + self.init_a[1] - - b1 = b1 + self.init_b[0] - b2 = b2 + self.init_b[1] - - z1 = x_out * a1 + x2 * b1 - z2 = x_out * a2 + x2 * b2 - - out = paddle.maximum(z1, z2) - - elif self.exp == 2: - temp = y.shape - a1, b1 = paddle.split(y, temp[1] // self.oup, axis=1) - a1 = a1 + self.init_a[0] - b1 = b1 + self.init_b[0] - out = x_out * a1 + x2 * b1 - - return out - - -class DYMicroBlock(nn.Layer): - def __init__(self, - inp, - oup, - kernel_size=3, - stride=1, - ch_exp=(2, 2), - ch_per_group=4, - groups_1x1=(1, 1), - depthsep=True, - shuffle=False, - activation_cfg=None): - super(DYMicroBlock, self).__init__() - - self.identity = stride == 1 and inp == oup - - y1, y2, y3 = activation_cfg['dy'] - act_reduction = 8 * activation_cfg['ratio'] - init_a = activation_cfg['init_a'] - init_b = activation_cfg['init_b'] - - t1 = ch_exp - gs1 = ch_per_group - hidden_fft, g1, g2 = groups_1x1 - hidden_dim2 = inp * t1[0] * t1[1] - - if gs1[0] == 0: - self.layers = nn.Sequential( - DepthSpatialSepConv(inp, t1, kernel_size, stride), - DYShiftMax( - hidden_dim2, - hidden_dim2, - act_max=2.0, - act_relu=True if y2 == 2 else False, - init_a=init_a, - reduction=act_reduction, - init_b=init_b, - g=gs1, - expansion=False) if y2 > 0 else nn.ReLU6(), - ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(), - ChannelShuffle(hidden_dim2 // 2) - if shuffle and y2 != 0 else nn.Sequential(), - GroupConv(hidden_dim2, oup, (g1, g2)), - DYShiftMax( - oup, - oup, - act_max=2.0, - act_relu=False, - init_a=[1.0, 0.0], - reduction=act_reduction // 2, - init_b=[0.0, 0.0], - g=(g1, g2), - expansion=False) if y3 > 0 else nn.Sequential(), - ChannelShuffle(g2) if shuffle else nn.Sequential(), - ChannelShuffle(oup // 2) - if shuffle and oup % 2 == 0 and y3 != 0 else nn.Sequential(), ) - elif g2 == 0: - self.layers = nn.Sequential( - GroupConv(inp, hidden_dim2, gs1), - DYShiftMax( - hidden_dim2, - hidden_dim2, - act_max=2.0, - act_relu=False, - init_a=[1.0, 0.0], - reduction=act_reduction, - init_b=[0.0, 0.0], - g=gs1, - expansion=False) if y3 > 0 else nn.Sequential(), ) - else: - self.layers = nn.Sequential( - GroupConv(inp, hidden_dim2, gs1), - DYShiftMax( - hidden_dim2, - hidden_dim2, - act_max=2.0, - act_relu=True if y1 == 2 else False, - init_a=init_a, - reduction=act_reduction, - init_b=init_b, - g=gs1, - expansion=False) if y1 > 0 else nn.ReLU6(), - ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(), - DepthSpatialSepConv(hidden_dim2, (1, 1), kernel_size, stride) - if depthsep else - DepthConv(hidden_dim2, hidden_dim2, kernel_size, stride), - nn.Sequential(), - DYShiftMax( - hidden_dim2, - hidden_dim2, - act_max=2.0, - act_relu=True if y2 == 2 else False, - init_a=init_a, - reduction=act_reduction, - init_b=init_b, - g=gs1, - expansion=True) if y2 > 0 else nn.ReLU6(), - ChannelShuffle(hidden_dim2 // 4) - if shuffle and y1 != 0 and y2 != 0 else nn.Sequential() - if y1 == 0 and y2 == 0 else ChannelShuffle(hidden_dim2 // 2), - GroupConv(hidden_dim2, oup, (g1, g2)), - DYShiftMax( - oup, - oup, - act_max=2.0, - act_relu=False, - init_a=[1.0, 0.0], - reduction=act_reduction // 2 - if oup < hidden_dim2 else act_reduction, - init_b=[0.0, 0.0], - g=(g1, g2), - expansion=False) if y3 > 0 else nn.Sequential(), - ChannelShuffle(g2) if shuffle else nn.Sequential(), - ChannelShuffle(oup // 2) - if shuffle and y3 != 0 else nn.Sequential(), ) - - def forward(self, x): - identity = x - out = self.layers(x) - - if self.identity: - out = out + identity - - return out - - -class MicroNet(nn.Layer): - """ - the MicroNet backbone network for recognition module. - Args: - mode(str): {'M0', 'M1', 'M2', 'M3'} - Four models are proposed based on four different computational costs (4M, 6M, 12M, 21M MAdds) - Default: 'M3'. - """ - - def __init__(self, mode='M3', **kwargs): - super(MicroNet, self).__init__() - - self.cfgs = get_micronet_config(mode) - - activation_cfg = {} - if mode == 'M0': - input_channel = 4 - stem_groups = 2, 2 - out_ch = 384 - activation_cfg['init_a'] = 1.0, 1.0 - activation_cfg['init_b'] = 0.0, 0.0 - elif mode == 'M1': - input_channel = 6 - stem_groups = 3, 2 - out_ch = 576 - activation_cfg['init_a'] = 1.0, 1.0 - activation_cfg['init_b'] = 0.0, 0.0 - elif mode == 'M2': - input_channel = 8 - stem_groups = 4, 2 - out_ch = 768 - activation_cfg['init_a'] = 1.0, 1.0 - activation_cfg['init_b'] = 0.0, 0.0 - elif mode == 'M3': - input_channel = 12 - stem_groups = 4, 3 - out_ch = 432 - activation_cfg['init_a'] = 1.0, 0.5 - activation_cfg['init_b'] = 0.0, 0.5 - else: - raise NotImplementedError("mode[" + mode + - "_model] is not implemented!") - - layers = [StemLayer(3, input_channel, stride=2, groups=stem_groups)] - - for idx, val in enumerate(self.cfgs): - s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r = val - - t1 = (c1, c2) - gs1 = (g1, g2) - gs2 = (c3, g3, g4) - activation_cfg['dy'] = [y1, y2, y3] - activation_cfg['ratio'] = r - - output_channel = c - layers.append( - DYMicroBlock( - input_channel, - output_channel, - kernel_size=ks, - stride=s, - ch_exp=t1, - ch_per_group=gs1, - groups_1x1=gs2, - depthsep=True, - shuffle=True, - activation_cfg=activation_cfg, )) - input_channel = output_channel - for i in range(1, n): - layers.append( - DYMicroBlock( - input_channel, - output_channel, - kernel_size=ks, - stride=1, - ch_exp=t1, - ch_per_group=gs1, - groups_1x1=gs2, - depthsep=True, - shuffle=True, - activation_cfg=activation_cfg, )) - input_channel = output_channel - self.features = nn.Sequential(*layers) - - self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0) - - self.out_channels = make_divisible(out_ch) - - def forward(self, x): - x = self.features(x) - x = self.pool(x) - return x diff --git a/backend/ppocr/modeling/backbones/rec_mobilenet_v3.py b/backend/ppocr/modeling/backbones/rec_mobilenet_v3.py deleted file mode 100644 index 917e000d..00000000 --- a/backend/ppocr/modeling/backbones/rec_mobilenet_v3.py +++ /dev/null @@ -1,138 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle import nn - -from ppocr.modeling.backbones.det_mobilenet_v3 import ResidualUnit, ConvBNLayer, make_divisible - -__all__ = ['MobileNetV3'] - - -class MobileNetV3(nn.Layer): - def __init__(self, - in_channels=3, - model_name='small', - scale=0.5, - large_stride=None, - small_stride=None, - disable_se=False, - **kwargs): - super(MobileNetV3, self).__init__() - self.disable_se = disable_se - if small_stride is None: - small_stride = [2, 2, 2, 2] - if large_stride is None: - large_stride = [1, 2, 2, 2] - - assert isinstance(large_stride, list), "large_stride type must " \ - "be list but got {}".format(type(large_stride)) - assert isinstance(small_stride, list), "small_stride type must " \ - "be list but got {}".format(type(small_stride)) - assert len(large_stride) == 4, "large_stride length must be " \ - "4 but got {}".format(len(large_stride)) - assert len(small_stride) == 4, "small_stride length must be " \ - "4 but got {}".format(len(small_stride)) - - if model_name == "large": - cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, False, 'relu', large_stride[0]], - [3, 64, 24, False, 'relu', (large_stride[1], 1)], - [3, 72, 24, False, 'relu', 1], - [5, 72, 40, True, 'relu', (large_stride[2], 1)], - [5, 120, 40, True, 'relu', 1], - [5, 120, 40, True, 'relu', 1], - [3, 240, 80, False, 'hardswish', 1], - [3, 200, 80, False, 'hardswish', 1], - [3, 184, 80, False, 'hardswish', 1], - [3, 184, 80, False, 'hardswish', 1], - [3, 480, 112, True, 'hardswish', 1], - [3, 672, 112, True, 'hardswish', 1], - [5, 672, 160, True, 'hardswish', (large_stride[3], 1)], - [5, 960, 160, True, 'hardswish', 1], - [5, 960, 160, True, 'hardswish', 1], - ] - cls_ch_squeeze = 960 - elif model_name == "small": - cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, True, 'relu', (small_stride[0], 1)], - [3, 72, 24, False, 'relu', (small_stride[1], 1)], - [3, 88, 24, False, 'relu', 1], - [5, 96, 40, True, 'hardswish', (small_stride[2], 1)], - [5, 240, 40, True, 'hardswish', 1], - [5, 240, 40, True, 'hardswish', 1], - [5, 120, 48, True, 'hardswish', 1], - [5, 144, 48, True, 'hardswish', 1], - [5, 288, 96, True, 'hardswish', (small_stride[3], 1)], - [5, 576, 96, True, 'hardswish', 1], - [5, 576, 96, True, 'hardswish', 1], - ] - cls_ch_squeeze = 576 - else: - raise NotImplementedError("mode[" + model_name + - "_model] is not implemented!") - - supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25] - assert scale in supported_scale, \ - "supported scales are {} but input scale is {}".format(supported_scale, scale) - - inplanes = 16 - # conv1 - self.conv1 = ConvBNLayer( - in_channels=in_channels, - out_channels=make_divisible(inplanes * scale), - kernel_size=3, - stride=2, - padding=1, - groups=1, - if_act=True, - act='hardswish') - i = 0 - block_list = [] - inplanes = make_divisible(inplanes * scale) - for (k, exp, c, se, nl, s) in cfg: - se = se and not self.disable_se - block_list.append( - ResidualUnit( - in_channels=inplanes, - mid_channels=make_divisible(scale * exp), - out_channels=make_divisible(scale * c), - kernel_size=k, - stride=s, - use_se=se, - act=nl)) - inplanes = make_divisible(scale * c) - i += 1 - self.blocks = nn.Sequential(*block_list) - - self.conv2 = ConvBNLayer( - in_channels=inplanes, - out_channels=make_divisible(scale * cls_ch_squeeze), - kernel_size=1, - stride=1, - padding=0, - groups=1, - if_act=True, - act='hardswish') - - self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0) - self.out_channels = make_divisible(scale * cls_ch_squeeze) - - def forward(self, x): - x = self.conv1(x) - x = self.blocks(x) - x = self.conv2(x) - x = self.pool(x) - return x diff --git a/backend/ppocr/modeling/backbones/rec_mv1_enhance.py b/backend/ppocr/modeling/backbones/rec_mv1_enhance.py deleted file mode 100644 index bb6af5e8..00000000 --- a/backend/ppocr/modeling/backbones/rec_mv1_enhance.py +++ /dev/null @@ -1,256 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This code is refer from: https://github.com/PaddlePaddle/PaddleClas/blob/develop/ppcls/arch/backbone/legendary_models/pp_lcnet.py - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import numpy as np -import paddle -from paddle import ParamAttr, reshape, transpose -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2D, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D -from paddle.nn.initializer import KaimingNormal -from paddle.regularizer import L2Decay -from paddle.nn.functional import hardswish, hardsigmoid - - -class ConvBNLayer(nn.Layer): - def __init__(self, - num_channels, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - act='hard_swish'): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2D( - in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - weight_attr=ParamAttr(initializer=KaimingNormal()), - bias_attr=False) - - self._batch_norm = BatchNorm( - num_filters, - act=act, - param_attr=ParamAttr(regularizer=L2Decay(0.0)), - bias_attr=ParamAttr(regularizer=L2Decay(0.0))) - - def forward(self, inputs): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class DepthwiseSeparable(nn.Layer): - def __init__(self, - num_channels, - num_filters1, - num_filters2, - num_groups, - stride, - scale, - dw_size=3, - padding=1, - use_se=False): - super(DepthwiseSeparable, self).__init__() - self.use_se = use_se - self._depthwise_conv = ConvBNLayer( - num_channels=num_channels, - num_filters=int(num_filters1 * scale), - filter_size=dw_size, - stride=stride, - padding=padding, - num_groups=int(num_groups * scale)) - if use_se: - self._se = SEModule(int(num_filters1 * scale)) - self._pointwise_conv = ConvBNLayer( - num_channels=int(num_filters1 * scale), - filter_size=1, - num_filters=int(num_filters2 * scale), - stride=1, - padding=0) - - def forward(self, inputs): - y = self._depthwise_conv(inputs) - if self.use_se: - y = self._se(y) - y = self._pointwise_conv(y) - return y - - -class MobileNetV1Enhance(nn.Layer): - def __init__(self, - in_channels=3, - scale=0.5, - last_conv_stride=1, - last_pool_type='max', - **kwargs): - super().__init__() - self.scale = scale - self.block_list = [] - - self.conv1 = ConvBNLayer( - num_channels=3, - filter_size=3, - channels=3, - num_filters=int(32 * scale), - stride=2, - padding=1) - - conv2_1 = DepthwiseSeparable( - num_channels=int(32 * scale), - num_filters1=32, - num_filters2=64, - num_groups=32, - stride=1, - scale=scale) - self.block_list.append(conv2_1) - - conv2_2 = DepthwiseSeparable( - num_channels=int(64 * scale), - num_filters1=64, - num_filters2=128, - num_groups=64, - stride=1, - scale=scale) - self.block_list.append(conv2_2) - - conv3_1 = DepthwiseSeparable( - num_channels=int(128 * scale), - num_filters1=128, - num_filters2=128, - num_groups=128, - stride=1, - scale=scale) - self.block_list.append(conv3_1) - - conv3_2 = DepthwiseSeparable( - num_channels=int(128 * scale), - num_filters1=128, - num_filters2=256, - num_groups=128, - stride=(2, 1), - scale=scale) - self.block_list.append(conv3_2) - - conv4_1 = DepthwiseSeparable( - num_channels=int(256 * scale), - num_filters1=256, - num_filters2=256, - num_groups=256, - stride=1, - scale=scale) - self.block_list.append(conv4_1) - - conv4_2 = DepthwiseSeparable( - num_channels=int(256 * scale), - num_filters1=256, - num_filters2=512, - num_groups=256, - stride=(2, 1), - scale=scale) - self.block_list.append(conv4_2) - - for _ in range(5): - conv5 = DepthwiseSeparable( - num_channels=int(512 * scale), - num_filters1=512, - num_filters2=512, - num_groups=512, - stride=1, - dw_size=5, - padding=2, - scale=scale, - use_se=False) - self.block_list.append(conv5) - - conv5_6 = DepthwiseSeparable( - num_channels=int(512 * scale), - num_filters1=512, - num_filters2=1024, - num_groups=512, - stride=(2, 1), - dw_size=5, - padding=2, - scale=scale, - use_se=True) - self.block_list.append(conv5_6) - - conv6 = DepthwiseSeparable( - num_channels=int(1024 * scale), - num_filters1=1024, - num_filters2=1024, - num_groups=1024, - stride=last_conv_stride, - dw_size=5, - padding=2, - use_se=True, - scale=scale) - self.block_list.append(conv6) - - self.block_list = nn.Sequential(*self.block_list) - if last_pool_type == 'avg': - self.pool = nn.AvgPool2D(kernel_size=2, stride=2, padding=0) - else: - self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0) - self.out_channels = int(1024 * scale) - - def forward(self, inputs): - y = self.conv1(inputs) - y = self.block_list(y) - y = self.pool(y) - return y - - -class SEModule(nn.Layer): - def __init__(self, channel, reduction=4): - super(SEModule, self).__init__() - self.avg_pool = AdaptiveAvgPool2D(1) - self.conv1 = Conv2D( - in_channels=channel, - out_channels=channel // reduction, - kernel_size=1, - stride=1, - padding=0, - weight_attr=ParamAttr(), - bias_attr=ParamAttr()) - self.conv2 = Conv2D( - in_channels=channel // reduction, - out_channels=channel, - kernel_size=1, - stride=1, - padding=0, - weight_attr=ParamAttr(), - bias_attr=ParamAttr()) - - def forward(self, inputs): - outputs = self.avg_pool(inputs) - outputs = self.conv1(outputs) - outputs = F.relu(outputs) - outputs = self.conv2(outputs) - outputs = hardsigmoid(outputs) - return paddle.multiply(x=inputs, y=outputs) diff --git a/backend/ppocr/modeling/backbones/rec_nrtr_mtb.py b/backend/ppocr/modeling/backbones/rec_nrtr_mtb.py deleted file mode 100644 index 22e02a63..00000000 --- a/backend/ppocr/modeling/backbones/rec_nrtr_mtb.py +++ /dev/null @@ -1,48 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle import nn -import paddle - - -class MTB(nn.Layer): - def __init__(self, cnn_num, in_channels): - super(MTB, self).__init__() - self.block = nn.Sequential() - self.out_channels = in_channels - self.cnn_num = cnn_num - if self.cnn_num == 2: - for i in range(self.cnn_num): - self.block.add_sublayer( - 'conv_{}'.format(i), - nn.Conv2D( - in_channels=in_channels - if i == 0 else 32 * (2**(i - 1)), - out_channels=32 * (2**i), - kernel_size=3, - stride=2, - padding=1)) - self.block.add_sublayer('relu_{}'.format(i), nn.ReLU()) - self.block.add_sublayer('bn_{}'.format(i), - nn.BatchNorm2D(32 * (2**i))) - - def forward(self, images): - x = self.block(images) - if self.cnn_num == 2: - # (b, w, h, c) - x = paddle.transpose(x, [0, 3, 2, 1]) - x_shape = paddle.shape(x) - x = paddle.reshape( - x, [x_shape[0], x_shape[1], x_shape[2] * x_shape[3]]) - return x diff --git a/backend/ppocr/modeling/backbones/rec_resnet_31.py b/backend/ppocr/modeling/backbones/rec_resnet_31.py deleted file mode 100644 index 96517013..00000000 --- a/backend/ppocr/modeling/backbones/rec_resnet_31.py +++ /dev/null @@ -1,210 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/layers/conv_layer.py -https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/backbones/resnet31_ocr.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -import numpy as np - -__all__ = ["ResNet31"] - - -def conv3x3(in_channel, out_channel, stride=1): - return nn.Conv2D( - in_channel, - out_channel, - kernel_size=3, - stride=stride, - padding=1, - bias_attr=False) - - -class BasicBlock(nn.Layer): - expansion = 1 - - def __init__(self, in_channels, channels, stride=1, downsample=False): - super().__init__() - self.conv1 = conv3x3(in_channels, channels, stride) - self.bn1 = nn.BatchNorm2D(channels) - self.relu = nn.ReLU() - self.conv2 = conv3x3(channels, channels) - self.bn2 = nn.BatchNorm2D(channels) - self.downsample = downsample - if downsample: - self.downsample = nn.Sequential( - nn.Conv2D( - in_channels, - channels * self.expansion, - 1, - stride, - bias_attr=False), - nn.BatchNorm2D(channels * self.expansion), ) - else: - self.downsample = nn.Sequential() - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - - if self.downsample: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class ResNet31(nn.Layer): - ''' - Args: - in_channels (int): Number of channels of input image tensor. - layers (list[int]): List of BasicBlock number for each stage. - channels (list[int]): List of out_channels of Conv2d layer. - out_indices (None | Sequence[int]): Indices of output stages. - last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage. - ''' - - def __init__(self, - in_channels=3, - layers=[1, 2, 5, 3], - channels=[64, 128, 256, 256, 512, 512, 512], - out_indices=None, - last_stage_pool=False): - super(ResNet31, self).__init__() - assert isinstance(in_channels, int) - assert isinstance(last_stage_pool, bool) - - self.out_indices = out_indices - self.last_stage_pool = last_stage_pool - - # conv 1 (Conv Conv) - self.conv1_1 = nn.Conv2D( - in_channels, channels[0], kernel_size=3, stride=1, padding=1) - self.bn1_1 = nn.BatchNorm2D(channels[0]) - self.relu1_1 = nn.ReLU() - - self.conv1_2 = nn.Conv2D( - channels[0], channels[1], kernel_size=3, stride=1, padding=1) - self.bn1_2 = nn.BatchNorm2D(channels[1]) - self.relu1_2 = nn.ReLU() - - # conv 2 (Max-pooling, Residual block, Conv) - self.pool2 = nn.MaxPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - self.block2 = self._make_layer(channels[1], channels[2], layers[0]) - self.conv2 = nn.Conv2D( - channels[2], channels[2], kernel_size=3, stride=1, padding=1) - self.bn2 = nn.BatchNorm2D(channels[2]) - self.relu2 = nn.ReLU() - - # conv 3 (Max-pooling, Residual block, Conv) - self.pool3 = nn.MaxPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - self.block3 = self._make_layer(channels[2], channels[3], layers[1]) - self.conv3 = nn.Conv2D( - channels[3], channels[3], kernel_size=3, stride=1, padding=1) - self.bn3 = nn.BatchNorm2D(channels[3]) - self.relu3 = nn.ReLU() - - # conv 4 (Max-pooling, Residual block, Conv) - self.pool4 = nn.MaxPool2D( - kernel_size=(2, 1), stride=(2, 1), padding=0, ceil_mode=True) - self.block4 = self._make_layer(channels[3], channels[4], layers[2]) - self.conv4 = nn.Conv2D( - channels[4], channels[4], kernel_size=3, stride=1, padding=1) - self.bn4 = nn.BatchNorm2D(channels[4]) - self.relu4 = nn.ReLU() - - # conv 5 ((Max-pooling), Residual block, Conv) - self.pool5 = None - if self.last_stage_pool: - self.pool5 = nn.MaxPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - self.block5 = self._make_layer(channels[4], channels[5], layers[3]) - self.conv5 = nn.Conv2D( - channels[5], channels[5], kernel_size=3, stride=1, padding=1) - self.bn5 = nn.BatchNorm2D(channels[5]) - self.relu5 = nn.ReLU() - - self.out_channels = channels[-1] - - def _make_layer(self, input_channels, output_channels, blocks): - layers = [] - for _ in range(blocks): - downsample = None - if input_channels != output_channels: - downsample = nn.Sequential( - nn.Conv2D( - input_channels, - output_channels, - kernel_size=1, - stride=1, - bias_attr=False), - nn.BatchNorm2D(output_channels), ) - - layers.append( - BasicBlock( - input_channels, output_channels, downsample=downsample)) - input_channels = output_channels - return nn.Sequential(*layers) - - def forward(self, x): - x = self.conv1_1(x) - x = self.bn1_1(x) - x = self.relu1_1(x) - - x = self.conv1_2(x) - x = self.bn1_2(x) - x = self.relu1_2(x) - - outs = [] - for i in range(4): - layer_index = i + 2 - pool_layer = getattr(self, f'pool{layer_index}') - block_layer = getattr(self, f'block{layer_index}') - conv_layer = getattr(self, f'conv{layer_index}') - bn_layer = getattr(self, f'bn{layer_index}') - relu_layer = getattr(self, f'relu{layer_index}') - - if pool_layer is not None: - x = pool_layer(x) - x = block_layer(x) - x = conv_layer(x) - x = bn_layer(x) - x = relu_layer(x) - - outs.append(x) - - if self.out_indices is not None: - return tuple([outs[i] for i in self.out_indices]) - - return x diff --git a/backend/ppocr/modeling/backbones/rec_resnet_aster.py b/backend/ppocr/modeling/backbones/rec_resnet_aster.py deleted file mode 100644 index 6a2710df..00000000 --- a/backend/ppocr/modeling/backbones/rec_resnet_aster.py +++ /dev/null @@ -1,143 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/resnet_aster.py -""" -import paddle -import paddle.nn as nn - -import sys -import math - - -def conv3x3(in_planes, out_planes, stride=1): - """3x3 convolution with padding""" - return nn.Conv2D( - in_planes, - out_planes, - kernel_size=3, - stride=stride, - padding=1, - bias_attr=False) - - -def conv1x1(in_planes, out_planes, stride=1): - """1x1 convolution""" - return nn.Conv2D( - in_planes, out_planes, kernel_size=1, stride=stride, bias_attr=False) - - -def get_sinusoid_encoding(n_position, feat_dim, wave_length=10000): - # [n_position] - positions = paddle.arange(0, n_position) - # [feat_dim] - dim_range = paddle.arange(0, feat_dim) - dim_range = paddle.pow(wave_length, 2 * (dim_range // 2) / feat_dim) - # [n_position, feat_dim] - angles = paddle.unsqueeze( - positions, axis=1) / paddle.unsqueeze( - dim_range, axis=0) - angles = paddle.cast(angles, "float32") - angles[:, 0::2] = paddle.sin(angles[:, 0::2]) - angles[:, 1::2] = paddle.cos(angles[:, 1::2]) - return angles - - -class AsterBlock(nn.Layer): - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(AsterBlock, self).__init__() - self.conv1 = conv1x1(inplanes, planes, stride) - self.bn1 = nn.BatchNorm2D(planes) - self.relu = nn.ReLU() - self.conv2 = conv3x3(planes, planes) - self.bn2 = nn.BatchNorm2D(planes) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - out = self.conv2(out) - out = self.bn2(out) - - if self.downsample is not None: - residual = self.downsample(x) - out += residual - out = self.relu(out) - return out - - -class ResNet_ASTER(nn.Layer): - """For aster or crnn""" - - def __init__(self, with_lstm=True, n_group=1, in_channels=3): - super(ResNet_ASTER, self).__init__() - self.with_lstm = with_lstm - self.n_group = n_group - - self.layer0 = nn.Sequential( - nn.Conv2D( - in_channels, - 32, - kernel_size=(3, 3), - stride=1, - padding=1, - bias_attr=False), - nn.BatchNorm2D(32), - nn.ReLU()) - - self.inplanes = 32 - self.layer1 = self._make_layer(32, 3, [2, 2]) # [16, 50] - self.layer2 = self._make_layer(64, 4, [2, 2]) # [8, 25] - self.layer3 = self._make_layer(128, 6, [2, 1]) # [4, 25] - self.layer4 = self._make_layer(256, 6, [2, 1]) # [2, 25] - self.layer5 = self._make_layer(512, 3, [2, 1]) # [1, 25] - - if with_lstm: - self.rnn = nn.LSTM(512, 256, direction="bidirect", num_layers=2) - self.out_channels = 2 * 256 - else: - self.out_channels = 512 - - def _make_layer(self, planes, blocks, stride): - downsample = None - if stride != [1, 1] or self.inplanes != planes: - downsample = nn.Sequential( - conv1x1(self.inplanes, planes, stride), nn.BatchNorm2D(planes)) - - layers = [] - layers.append(AsterBlock(self.inplanes, planes, stride, downsample)) - self.inplanes = planes - for _ in range(1, blocks): - layers.append(AsterBlock(self.inplanes, planes)) - return nn.Sequential(*layers) - - def forward(self, x): - x0 = self.layer0(x) - x1 = self.layer1(x0) - x2 = self.layer2(x1) - x3 = self.layer3(x2) - x4 = self.layer4(x3) - x5 = self.layer5(x4) - - cnn_feat = x5.squeeze(2) # [N, c, w] - cnn_feat = paddle.transpose(cnn_feat, perm=[0, 2, 1]) - if self.with_lstm: - rnn_feat, _ = self.rnn(cnn_feat) - return rnn_feat - else: - return cnn_feat diff --git a/backend/ppocr/modeling/backbones/rec_resnet_fpn.py b/backend/ppocr/modeling/backbones/rec_resnet_fpn.py deleted file mode 100644 index a7e876a2..00000000 --- a/backend/ppocr/modeling/backbones/rec_resnet_fpn.py +++ /dev/null @@ -1,307 +0,0 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn, ParamAttr -from paddle.nn import functional as F -import paddle.fluid as fluid -import paddle -import numpy as np - -__all__ = ["ResNetFPN"] - - -class ResNetFPN(nn.Layer): - def __init__(self, in_channels=1, layers=50, **kwargs): - super(ResNetFPN, self).__init__() - supported_layers = { - 18: { - 'depth': [2, 2, 2, 2], - 'block_class': BasicBlock - }, - 34: { - 'depth': [3, 4, 6, 3], - 'block_class': BasicBlock - }, - 50: { - 'depth': [3, 4, 6, 3], - 'block_class': BottleneckBlock - }, - 101: { - 'depth': [3, 4, 23, 3], - 'block_class': BottleneckBlock - }, - 152: { - 'depth': [3, 8, 36, 3], - 'block_class': BottleneckBlock - } - } - stride_list = [(2, 2), (2, 2), (1, 1), (1, 1)] - num_filters = [64, 128, 256, 512] - self.depth = supported_layers[layers]['depth'] - self.F = [] - self.conv = ConvBNLayer( - in_channels=in_channels, - out_channels=64, - kernel_size=7, - stride=2, - act="relu", - name="conv1") - self.block_list = [] - in_ch = 64 - if layers >= 50: - for block in range(len(self.depth)): - for i in range(self.depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - block_list = self.add_sublayer( - "bottleneckBlock_{}_{}".format(block, i), - BottleneckBlock( - in_channels=in_ch, - out_channels=num_filters[block], - stride=stride_list[block] if i == 0 else 1, - name=conv_name)) - in_ch = num_filters[block] * 4 - self.block_list.append(block_list) - self.F.append(block_list) - else: - for block in range(len(self.depth)): - for i in range(self.depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - if i == 0 and block != 0: - stride = (2, 1) - else: - stride = (1, 1) - basic_block = self.add_sublayer( - conv_name, - BasicBlock( - in_channels=in_ch, - out_channels=num_filters[block], - stride=stride_list[block] if i == 0 else 1, - is_first=block == i == 0, - name=conv_name)) - in_ch = basic_block.out_channels - self.block_list.append(basic_block) - out_ch_list = [in_ch // 4, in_ch // 2, in_ch] - self.base_block = [] - self.conv_trans = [] - self.bn_block = [] - for i in [-2, -3]: - in_channels = out_ch_list[i + 1] + out_ch_list[i] - - self.base_block.append( - self.add_sublayer( - "F_{}_base_block_0".format(i), - nn.Conv2D( - in_channels=in_channels, - out_channels=out_ch_list[i], - kernel_size=1, - weight_attr=ParamAttr(trainable=True), - bias_attr=ParamAttr(trainable=True)))) - self.base_block.append( - self.add_sublayer( - "F_{}_base_block_1".format(i), - nn.Conv2D( - in_channels=out_ch_list[i], - out_channels=out_ch_list[i], - kernel_size=3, - padding=1, - weight_attr=ParamAttr(trainable=True), - bias_attr=ParamAttr(trainable=True)))) - self.base_block.append( - self.add_sublayer( - "F_{}_base_block_2".format(i), - nn.BatchNorm( - num_channels=out_ch_list[i], - act="relu", - param_attr=ParamAttr(trainable=True), - bias_attr=ParamAttr(trainable=True)))) - self.base_block.append( - self.add_sublayer( - "F_{}_base_block_3".format(i), - nn.Conv2D( - in_channels=out_ch_list[i], - out_channels=512, - kernel_size=1, - bias_attr=ParamAttr(trainable=True), - weight_attr=ParamAttr(trainable=True)))) - self.out_channels = 512 - - def __call__(self, x): - x = self.conv(x) - fpn_list = [] - F = [] - for i in range(len(self.depth)): - fpn_list.append(np.sum(self.depth[:i + 1])) - - for i, block in enumerate(self.block_list): - x = block(x) - for number in fpn_list: - if i + 1 == number: - F.append(x) - base = F[-1] - - j = 0 - for i, block in enumerate(self.base_block): - if i % 3 == 0 and i < 6: - j = j + 1 - b, c, w, h = F[-j - 1].shape - if [w, h] == list(base.shape[2:]): - base = base - else: - base = self.conv_trans[j - 1](base) - base = self.bn_block[j - 1](base) - base = paddle.concat([base, F[-j - 1]], axis=1) - base = block(base) - return base - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=2 if stride == (1, 1) else kernel_size, - dilation=2 if stride == (1, 1) else 1, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + '.conv2d.output.1.w_0'), - bias_attr=False, ) - - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name=name + '.output.1.w_0'), - bias_attr=ParamAttr(name=name + '.output.1.b_0'), - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance") - - def __call__(self, x): - x = self.conv(x) - x = self.bn(x) - return x - - -class ShortCut(nn.Layer): - def __init__(self, in_channels, out_channels, stride, name, is_first=False): - super(ShortCut, self).__init__() - self.use_conv = True - - if in_channels != out_channels or stride != 1 or is_first == True: - if stride == (1, 1): - self.conv = ConvBNLayer( - in_channels, out_channels, 1, 1, name=name) - else: # stride==(2,2) - self.conv = ConvBNLayer( - in_channels, out_channels, 1, stride, name=name) - else: - self.use_conv = False - - def forward(self, x): - if self.use_conv: - x = self.conv(x) - return x - - -class BottleneckBlock(nn.Layer): - def __init__(self, in_channels, out_channels, stride, name): - super(BottleneckBlock, self).__init__() - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - - self.conv2 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels * 4, - kernel_size=1, - act=None, - name=name + "_branch2c") - - self.short = ShortCut( - in_channels=in_channels, - out_channels=out_channels * 4, - stride=stride, - is_first=False, - name=name + "_branch1") - self.out_channels = out_channels * 4 - - def forward(self, x): - y = self.conv0(x) - y = self.conv1(y) - y = self.conv2(y) - y = y + self.short(x) - y = F.relu(y) - return y - - -class BasicBlock(nn.Layer): - def __init__(self, in_channels, out_channels, stride, name, is_first): - super(BasicBlock, self).__init__() - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - act=None, - name=name + "_branch2b") - self.short = ShortCut( - in_channels=in_channels, - out_channels=out_channels, - stride=stride, - is_first=is_first, - name=name + "_branch1") - self.out_channels = out_channels - - def forward(self, x): - y = self.conv0(x) - y = self.conv1(y) - y = y + self.short(x) - return F.relu(y) diff --git a/backend/ppocr/modeling/backbones/rec_resnet_vd.py b/backend/ppocr/modeling/backbones/rec_resnet_vd.py deleted file mode 100644 index 0187deb9..00000000 --- a/backend/ppocr/modeling/backbones/rec_resnet_vd.py +++ /dev/null @@ -1,286 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F - -__all__ = ["ResNet"] - - -class ConvBNLayer(nn.Layer): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - is_vd_mode=False, - act=None, - name=None, ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = nn.AvgPool2D( - kernel_size=stride, stride=stride, padding=0, ceil_mode=True) - self._conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=1 if is_vd_mode else stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = nn.BatchNorm( - out_channels, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels * 4, - kernel_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels * 4, - kernel_size=1, - stride=stride, - is_vd_mode=not if_first and stride[0] != 1, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv2) - y = F.relu(y) - return y - - -class BasicBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - stride=stride, - is_vd_mode=not if_first and stride[0] != 1, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv1) - y = F.relu(y) - return y - - -class ResNet(nn.Layer): - def __init__(self, in_channels=3, layers=50, **kwargs): - super(ResNet, self).__init__() - - self.layers = layers - supported_layers = [18, 34, 50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format( - supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_channels = [64, 256, 512, - 1024] if layers >= 50 else [64, 64, 128, 256] - num_filters = [64, 128, 256, 512] - - self.conv1_1 = ConvBNLayer( - in_channels=in_channels, - out_channels=32, - kernel_size=3, - stride=1, - act='relu', - name="conv1_1") - self.conv1_2 = ConvBNLayer( - in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - act='relu', - name="conv1_2") - self.conv1_3 = ConvBNLayer( - in_channels=32, - out_channels=64, - kernel_size=3, - stride=1, - act='relu', - name="conv1_3") - self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - if layers >= 50: - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if layers in [101, 152, 200] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - - if i == 0 and block != 0: - stride = (2, 1) - else: - stride = (1, 1) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - out_channels=num_filters[block], - stride=stride, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - self.block_list.append(bottleneck_block) - self.out_channels = num_filters[block] * 4 - else: - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - if i == 0 and block != 0: - stride = (2, 1) - else: - stride = (1, 1) - - basic_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BasicBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block], - out_channels=num_filters[block], - stride=stride, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - self.block_list.append(basic_block) - self.out_channels = num_filters[block] - self.out_pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0) - - def forward(self, inputs): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.out_pool(y) - return y diff --git a/backend/ppocr/modeling/backbones/rec_svtrnet.py b/backend/ppocr/modeling/backbones/rec_svtrnet.py deleted file mode 100644 index c57bf463..00000000 --- a/backend/ppocr/modeling/backbones/rec_svtrnet.py +++ /dev/null @@ -1,584 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle import ParamAttr -from paddle.nn.initializer import KaimingNormal -import numpy as np -import paddle -import paddle.nn as nn -from paddle.nn.initializer import TruncatedNormal, Constant, Normal - -trunc_normal_ = TruncatedNormal(std=.02) -normal_ = Normal -zeros_ = Constant(value=0.) -ones_ = Constant(value=1.) - - -def drop_path(x, drop_prob=0., training=False): - """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). - the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... - See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... - """ - if drop_prob == 0. or not training: - return x - keep_prob = paddle.to_tensor(1 - drop_prob) - shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1) - random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype) - random_tensor = paddle.floor(random_tensor) # binarize - output = x.divide(keep_prob) * random_tensor - return output - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size=3, - stride=1, - padding=0, - bias_attr=False, - groups=1, - act=nn.GELU): - super().__init__() - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - weight_attr=paddle.ParamAttr( - initializer=nn.initializer.KaimingUniform()), - bias_attr=bias_attr) - self.norm = nn.BatchNorm2D(out_channels) - self.act = act() - - def forward(self, inputs): - out = self.conv(inputs) - out = self.norm(out) - out = self.act(out) - return out - - -class DropPath(nn.Layer): - """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). - """ - - def __init__(self, drop_prob=None): - super(DropPath, self).__init__() - self.drop_prob = drop_prob - - def forward(self, x): - return drop_path(x, self.drop_prob, self.training) - - -class Identity(nn.Layer): - def __init__(self): - super(Identity, self).__init__() - - def forward(self, input): - return input - - -class Mlp(nn.Layer): - def __init__(self, - in_features, - hidden_features=None, - out_features=None, - act_layer=nn.GELU, - drop=0.): - super().__init__() - out_features = out_features or in_features - hidden_features = hidden_features or in_features - self.fc1 = nn.Linear(in_features, hidden_features) - self.act = act_layer() - self.fc2 = nn.Linear(hidden_features, out_features) - self.drop = nn.Dropout(drop) - - def forward(self, x): - x = self.fc1(x) - x = self.act(x) - x = self.drop(x) - x = self.fc2(x) - x = self.drop(x) - return x - - -class ConvMixer(nn.Layer): - def __init__( - self, - dim, - num_heads=8, - HW=[8, 25], - local_k=[3, 3], ): - super().__init__() - self.HW = HW - self.dim = dim - self.local_mixer = nn.Conv2D( - dim, - dim, - local_k, - 1, [local_k[0] // 2, local_k[1] // 2], - groups=num_heads, - weight_attr=ParamAttr(initializer=KaimingNormal())) - - def forward(self, x): - h = self.HW[0] - w = self.HW[1] - x = x.transpose([0, 2, 1]).reshape([0, self.dim, h, w]) - x = self.local_mixer(x) - x = x.flatten(2).transpose([0, 2, 1]) - return x - - -class Attention(nn.Layer): - def __init__(self, - dim, - num_heads=8, - mixer='Global', - HW=[8, 25], - local_k=[7, 11], - qkv_bias=False, - qk_scale=None, - attn_drop=0., - proj_drop=0.): - super().__init__() - self.num_heads = num_heads - head_dim = dim // num_heads - self.scale = qk_scale or head_dim**-0.5 - - self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias) - self.attn_drop = nn.Dropout(attn_drop) - self.proj = nn.Linear(dim, dim) - self.proj_drop = nn.Dropout(proj_drop) - self.HW = HW - if HW is not None: - H = HW[0] - W = HW[1] - self.N = H * W - self.C = dim - if mixer == 'Local' and HW is not None: - hk = local_k[0] - wk = local_k[1] - mask = paddle.ones([H * W, H + hk - 1, W + wk - 1], dtype='float32') - for h in range(0, H): - for w in range(0, W): - mask[h * W + w, h:h + hk, w:w + wk] = 0. - mask_paddle = mask[:, hk // 2:H + hk // 2, wk // 2:W + wk // - 2].flatten(1) - mask_inf = paddle.full([H * W, H * W], '-inf', dtype='float32') - mask = paddle.where(mask_paddle < 1, mask_paddle, mask_inf) - self.mask = mask.unsqueeze([0, 1]) - self.mixer = mixer - - def forward(self, x): - if self.HW is not None: - N = self.N - C = self.C - else: - _, N, C = x.shape - qkv = self.qkv(x).reshape((0, N, 3, self.num_heads, C // - self.num_heads)).transpose((2, 0, 3, 1, 4)) - q, k, v = qkv[0] * self.scale, qkv[1], qkv[2] - - attn = (q.matmul(k.transpose((0, 1, 3, 2)))) - if self.mixer == 'Local': - attn += self.mask - attn = nn.functional.softmax(attn, axis=-1) - attn = self.attn_drop(attn) - - x = (attn.matmul(v)).transpose((0, 2, 1, 3)).reshape((0, N, C)) - x = self.proj(x) - x = self.proj_drop(x) - return x - - -class Block(nn.Layer): - def __init__(self, - dim, - num_heads, - mixer='Global', - local_mixer=[7, 11], - HW=[8, 25], - mlp_ratio=4., - qkv_bias=False, - qk_scale=None, - drop=0., - attn_drop=0., - drop_path=0., - act_layer=nn.GELU, - norm_layer='nn.LayerNorm', - epsilon=1e-6, - prenorm=True): - super().__init__() - if isinstance(norm_layer, str): - self.norm1 = eval(norm_layer)(dim, epsilon=epsilon) - else: - self.norm1 = norm_layer(dim) - if mixer == 'Global' or mixer == 'Local': - self.mixer = Attention( - dim, - num_heads=num_heads, - mixer=mixer, - HW=HW, - local_k=local_mixer, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - attn_drop=attn_drop, - proj_drop=drop) - elif mixer == 'Conv': - self.mixer = ConvMixer( - dim, num_heads=num_heads, HW=HW, local_k=local_mixer) - else: - raise TypeError("The mixer must be one of [Global, Local, Conv]") - - self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity() - if isinstance(norm_layer, str): - self.norm2 = eval(norm_layer)(dim, epsilon=epsilon) - else: - self.norm2 = norm_layer(dim) - mlp_hidden_dim = int(dim * mlp_ratio) - self.mlp_ratio = mlp_ratio - self.mlp = Mlp(in_features=dim, - hidden_features=mlp_hidden_dim, - act_layer=act_layer, - drop=drop) - self.prenorm = prenorm - - def forward(self, x): - if self.prenorm: - x = self.norm1(x + self.drop_path(self.mixer(x))) - x = self.norm2(x + self.drop_path(self.mlp(x))) - else: - x = x + self.drop_path(self.mixer(self.norm1(x))) - x = x + self.drop_path(self.mlp(self.norm2(x))) - return x - - -class PatchEmbed(nn.Layer): - """ Image to Patch Embedding - """ - - def __init__(self, - img_size=[32, 100], - in_channels=3, - embed_dim=768, - sub_num=2): - super().__init__() - num_patches = (img_size[1] // (2 ** sub_num)) * \ - (img_size[0] // (2 ** sub_num)) - self.img_size = img_size - self.num_patches = num_patches - self.embed_dim = embed_dim - self.norm = None - if sub_num == 2: - self.proj = nn.Sequential( - ConvBNLayer( - in_channels=in_channels, - out_channels=embed_dim // 2, - kernel_size=3, - stride=2, - padding=1, - act=nn.GELU, - bias_attr=None), - ConvBNLayer( - in_channels=embed_dim // 2, - out_channels=embed_dim, - kernel_size=3, - stride=2, - padding=1, - act=nn.GELU, - bias_attr=None)) - if sub_num == 3: - self.proj = nn.Sequential( - ConvBNLayer( - in_channels=in_channels, - out_channels=embed_dim // 4, - kernel_size=3, - stride=2, - padding=1, - act=nn.GELU, - bias_attr=None), - ConvBNLayer( - in_channels=embed_dim // 4, - out_channels=embed_dim // 2, - kernel_size=3, - stride=2, - padding=1, - act=nn.GELU, - bias_attr=None), - ConvBNLayer( - in_channels=embed_dim // 2, - out_channels=embed_dim, - kernel_size=3, - stride=2, - padding=1, - act=nn.GELU, - bias_attr=None)) - - def forward(self, x): - B, C, H, W = x.shape - assert H == self.img_size[0] and W == self.img_size[1], \ - f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." - x = self.proj(x).flatten(2).transpose((0, 2, 1)) - return x - - -class SubSample(nn.Layer): - def __init__(self, - in_channels, - out_channels, - types='Pool', - stride=[2, 1], - sub_norm='nn.LayerNorm', - act=None): - super().__init__() - self.types = types - if types == 'Pool': - self.avgpool = nn.AvgPool2D( - kernel_size=[3, 5], stride=stride, padding=[1, 2]) - self.maxpool = nn.MaxPool2D( - kernel_size=[3, 5], stride=stride, padding=[1, 2]) - self.proj = nn.Linear(in_channels, out_channels) - else: - self.conv = nn.Conv2D( - in_channels, - out_channels, - kernel_size=3, - stride=stride, - padding=1, - weight_attr=ParamAttr(initializer=KaimingNormal())) - self.norm = eval(sub_norm)(out_channels) - if act is not None: - self.act = act() - else: - self.act = None - - def forward(self, x): - - if self.types == 'Pool': - x1 = self.avgpool(x) - x2 = self.maxpool(x) - x = (x1 + x2) * 0.5 - out = self.proj(x.flatten(2).transpose((0, 2, 1))) - else: - x = self.conv(x) - out = x.flatten(2).transpose((0, 2, 1)) - out = self.norm(out) - if self.act is not None: - out = self.act(out) - - return out - - -class SVTRNet(nn.Layer): - def __init__( - self, - img_size=[32, 100], - in_channels=3, - embed_dim=[64, 128, 256], - depth=[3, 6, 3], - num_heads=[2, 4, 8], - mixer=['Local'] * 6 + ['Global'] * - 6, # Local atten, Global atten, Conv - local_mixer=[[7, 11], [7, 11], [7, 11]], - patch_merging='Conv', # Conv, Pool, None - mlp_ratio=4, - qkv_bias=True, - qk_scale=None, - drop_rate=0., - last_drop=0.1, - attn_drop_rate=0., - drop_path_rate=0.1, - norm_layer='nn.LayerNorm', - sub_norm='nn.LayerNorm', - epsilon=1e-6, - out_channels=192, - out_char_num=25, - block_unit='Block', - act='nn.GELU', - last_stage=True, - sub_num=2, - prenorm=True, - use_lenhead=False, - **kwargs): - super().__init__() - self.img_size = img_size - self.embed_dim = embed_dim - self.out_channels = out_channels - self.prenorm = prenorm - patch_merging = None if patch_merging != 'Conv' and patch_merging != 'Pool' else patch_merging - self.patch_embed = PatchEmbed( - img_size=img_size, - in_channels=in_channels, - embed_dim=embed_dim[0], - sub_num=sub_num) - num_patches = self.patch_embed.num_patches - self.HW = [img_size[0] // (2**sub_num), img_size[1] // (2**sub_num)] - self.pos_embed = self.create_parameter( - shape=[1, num_patches, embed_dim[0]], default_initializer=zeros_) - self.add_parameter("pos_embed", self.pos_embed) - self.pos_drop = nn.Dropout(p=drop_rate) - Block_unit = eval(block_unit) - - dpr = np.linspace(0, drop_path_rate, sum(depth)) - self.blocks1 = nn.LayerList([ - Block_unit( - dim=embed_dim[0], - num_heads=num_heads[0], - mixer=mixer[0:depth[0]][i], - HW=self.HW, - local_mixer=local_mixer[0], - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - act_layer=eval(act), - attn_drop=attn_drop_rate, - drop_path=dpr[0:depth[0]][i], - norm_layer=norm_layer, - epsilon=epsilon, - prenorm=prenorm) for i in range(depth[0]) - ]) - if patch_merging is not None: - self.sub_sample1 = SubSample( - embed_dim[0], - embed_dim[1], - sub_norm=sub_norm, - stride=[2, 1], - types=patch_merging) - HW = [self.HW[0] // 2, self.HW[1]] - else: - HW = self.HW - self.patch_merging = patch_merging - self.blocks2 = nn.LayerList([ - Block_unit( - dim=embed_dim[1], - num_heads=num_heads[1], - mixer=mixer[depth[0]:depth[0] + depth[1]][i], - HW=HW, - local_mixer=local_mixer[1], - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - act_layer=eval(act), - attn_drop=attn_drop_rate, - drop_path=dpr[depth[0]:depth[0] + depth[1]][i], - norm_layer=norm_layer, - epsilon=epsilon, - prenorm=prenorm) for i in range(depth[1]) - ]) - if patch_merging is not None: - self.sub_sample2 = SubSample( - embed_dim[1], - embed_dim[2], - sub_norm=sub_norm, - stride=[2, 1], - types=patch_merging) - HW = [self.HW[0] // 4, self.HW[1]] - else: - HW = self.HW - self.blocks3 = nn.LayerList([ - Block_unit( - dim=embed_dim[2], - num_heads=num_heads[2], - mixer=mixer[depth[0] + depth[1]:][i], - HW=HW, - local_mixer=local_mixer[2], - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - act_layer=eval(act), - attn_drop=attn_drop_rate, - drop_path=dpr[depth[0] + depth[1]:][i], - norm_layer=norm_layer, - epsilon=epsilon, - prenorm=prenorm) for i in range(depth[2]) - ]) - self.last_stage = last_stage - if last_stage: - self.avg_pool = nn.AdaptiveAvgPool2D([1, out_char_num]) - self.last_conv = nn.Conv2D( - in_channels=embed_dim[2], - out_channels=self.out_channels, - kernel_size=1, - stride=1, - padding=0, - bias_attr=False) - self.hardswish = nn.Hardswish() - self.dropout = nn.Dropout(p=last_drop, mode="downscale_in_infer") - if not prenorm: - self.norm = eval(norm_layer)(embed_dim[-1], epsilon=epsilon) - self.use_lenhead = use_lenhead - if use_lenhead: - self.len_conv = nn.Linear(embed_dim[2], self.out_channels) - self.hardswish_len = nn.Hardswish() - self.dropout_len = nn.Dropout( - p=last_drop, mode="downscale_in_infer") - - trunc_normal_(self.pos_embed) - self.apply(self._init_weights) - - def _init_weights(self, m): - if isinstance(m, nn.Linear): - trunc_normal_(m.weight) - if isinstance(m, nn.Linear) and m.bias is not None: - zeros_(m.bias) - elif isinstance(m, nn.LayerNorm): - zeros_(m.bias) - ones_(m.weight) - - def forward_features(self, x): - x = self.patch_embed(x) - x = x + self.pos_embed - x = self.pos_drop(x) - for blk in self.blocks1: - x = blk(x) - if self.patch_merging is not None: - x = self.sub_sample1( - x.transpose([0, 2, 1]).reshape( - [0, self.embed_dim[0], self.HW[0], self.HW[1]])) - for blk in self.blocks2: - x = blk(x) - if self.patch_merging is not None: - x = self.sub_sample2( - x.transpose([0, 2, 1]).reshape( - [0, self.embed_dim[1], self.HW[0] // 2, self.HW[1]])) - for blk in self.blocks3: - x = blk(x) - if not self.prenorm: - x = self.norm(x) - return x - - def forward(self, x): - x = self.forward_features(x) - if self.use_lenhead: - len_x = self.len_conv(x.mean(1)) - len_x = self.dropout_len(self.hardswish_len(len_x)) - if self.last_stage: - if self.patch_merging is not None: - h = self.HW[0] // 4 - else: - h = self.HW[0] - x = self.avg_pool( - x.transpose([0, 2, 1]).reshape( - [0, self.embed_dim[2], h, self.HW[1]])) - x = self.last_conv(x) - x = self.hardswish(x) - x = self.dropout(x) - if self.use_lenhead: - return x, len_x - return x diff --git a/backend/ppocr/modeling/backbones/vqa_layoutlm.py b/backend/ppocr/modeling/backbones/vqa_layoutlm.py deleted file mode 100644 index ede5b7a3..00000000 --- a/backend/ppocr/modeling/backbones/vqa_layoutlm.py +++ /dev/null @@ -1,172 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -from paddle import nn - -from paddlenlp.transformers import LayoutXLMModel, LayoutXLMForTokenClassification, LayoutXLMForRelationExtraction -from paddlenlp.transformers import LayoutLMModel, LayoutLMForTokenClassification -from paddlenlp.transformers import LayoutLMv2Model, LayoutLMv2ForTokenClassification, LayoutLMv2ForRelationExtraction - -__all__ = ["LayoutXLMForSer", 'LayoutLMForSer'] - -pretrained_model_dict = { - LayoutXLMModel: 'layoutxlm-base-uncased', - LayoutLMModel: 'layoutlm-base-uncased', - LayoutLMv2Model: 'layoutlmv2-base-uncased' -} - - -class NLPBaseModel(nn.Layer): - def __init__(self, - base_model_class, - model_class, - type='ser', - pretrained=True, - checkpoints=None, - **kwargs): - super(NLPBaseModel, self).__init__() - if checkpoints is not None: - self.model = model_class.from_pretrained(checkpoints) - else: - pretrained_model_name = pretrained_model_dict[base_model_class] - if pretrained: - base_model = base_model_class.from_pretrained( - pretrained_model_name) - else: - base_model = base_model_class( - **base_model_class.pretrained_init_configuration[ - pretrained_model_name]) - if type == 'ser': - self.model = model_class( - base_model, num_classes=kwargs['num_classes'], dropout=None) - else: - self.model = model_class(base_model, dropout=None) - self.out_channels = 1 - - -class LayoutLMForSer(NLPBaseModel): - def __init__(self, num_classes, pretrained=True, checkpoints=None, - **kwargs): - super(LayoutLMForSer, self).__init__( - LayoutLMModel, - LayoutLMForTokenClassification, - 'ser', - pretrained, - checkpoints, - num_classes=num_classes) - - def forward(self, x): - x = self.model( - input_ids=x[0], - bbox=x[2], - attention_mask=x[4], - token_type_ids=x[5], - position_ids=None, - output_hidden_states=False) - return x - - -class LayoutLMv2ForSer(NLPBaseModel): - def __init__(self, num_classes, pretrained=True, checkpoints=None, - **kwargs): - super(LayoutLMv2ForSer, self).__init__( - LayoutLMv2Model, - LayoutLMv2ForTokenClassification, - 'ser', - pretrained, - checkpoints, - num_classes=num_classes) - - def forward(self, x): - x = self.model( - input_ids=x[0], - bbox=x[2], - image=x[3], - attention_mask=x[4], - token_type_ids=x[5], - position_ids=None, - head_mask=None, - labels=None) - return x[0] - - -class LayoutXLMForSer(NLPBaseModel): - def __init__(self, num_classes, pretrained=True, checkpoints=None, - **kwargs): - super(LayoutXLMForSer, self).__init__( - LayoutXLMModel, - LayoutXLMForTokenClassification, - 'ser', - pretrained, - checkpoints, - num_classes=num_classes) - - def forward(self, x): - x = self.model( - input_ids=x[0], - bbox=x[2], - image=x[3], - attention_mask=x[4], - token_type_ids=x[5], - position_ids=None, - head_mask=None, - labels=None) - return x[0] - - -class LayoutLMv2ForRe(NLPBaseModel): - def __init__(self, pretrained=True, checkpoints=None, **kwargs): - super(LayoutLMv2ForRe, self).__init__(LayoutLMv2Model, - LayoutLMv2ForRelationExtraction, - 're', pretrained, checkpoints) - - def forward(self, x): - x = self.model( - input_ids=x[0], - bbox=x[1], - labels=None, - image=x[2], - attention_mask=x[3], - token_type_ids=x[4], - position_ids=None, - head_mask=None, - entities=x[5], - relations=x[6]) - return x - - -class LayoutXLMForRe(NLPBaseModel): - def __init__(self, pretrained=True, checkpoints=None, **kwargs): - super(LayoutXLMForRe, self).__init__(LayoutXLMModel, - LayoutXLMForRelationExtraction, - 're', pretrained, checkpoints) - - def forward(self, x): - x = self.model( - input_ids=x[0], - bbox=x[1], - labels=None, - image=x[2], - attention_mask=x[3], - token_type_ids=x[4], - position_ids=None, - head_mask=None, - entities=x[5], - relations=x[6]) - return x diff --git a/backend/ppocr/modeling/heads/__init__.py b/backend/ppocr/modeling/heads/__init__.py deleted file mode 100755 index 1670ea38..00000000 --- a/backend/ppocr/modeling/heads/__init__.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__all__ = ['build_head'] - - -def build_head(config): - # det head - from .det_db_head import DBHead - from .det_east_head import EASTHead - from .det_sast_head import SASTHead - from .det_pse_head import PSEHead - from .det_fce_head import FCEHead - from .e2e_pg_head import PGHead - - # rec head - from .rec_ctc_head import CTCHead - from .rec_att_head import AttentionHead - from .rec_srn_head import SRNHead - from .rec_nrtr_head import Transformer - from .rec_sar_head import SARHead - from .rec_aster_head import AsterHead - from .rec_pren_head import PRENHead - from .rec_multi_head import MultiHead - - # cls head - from .cls_head import ClsHead - - #kie head - from .kie_sdmgr_head import SDMGRHead - - from .table_att_head import TableAttentionHead - - support_dict = [ - 'DBHead', 'PSEHead', 'FCEHead', 'EASTHead', 'SASTHead', 'CTCHead', - 'ClsHead', 'AttentionHead', 'SRNHead', 'PGHead', 'Transformer', - 'TableAttentionHead', 'SARHead', 'AsterHead', 'SDMGRHead', 'PRENHead', - 'MultiHead' - ] - - #table head - - module_name = config.pop('name') - assert module_name in support_dict, Exception('head only support {}'.format( - support_dict)) - module_class = eval(module_name)(**config) - return module_class diff --git a/backend/ppocr/modeling/heads/cls_head.py b/backend/ppocr/modeling/heads/cls_head.py deleted file mode 100644 index 91bfa615..00000000 --- a/backend/ppocr/modeling/heads/cls_head.py +++ /dev/null @@ -1,52 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn, ParamAttr -import paddle.nn.functional as F - - -class ClsHead(nn.Layer): - """ - Class orientation - - Args: - - params(dict): super parameters for build Class network - """ - - def __init__(self, in_channels, class_dim, **kwargs): - super(ClsHead, self).__init__() - self.pool = nn.AdaptiveAvgPool2D(1) - stdv = 1.0 / math.sqrt(in_channels * 1.0) - self.fc = nn.Linear( - in_channels, - class_dim, - weight_attr=ParamAttr( - name="fc_0.w_0", - initializer=nn.initializer.Uniform(-stdv, stdv)), - bias_attr=ParamAttr(name="fc_0.b_0"), ) - - def forward(self, x, targets=None): - x = self.pool(x) - x = paddle.reshape(x, shape=[x.shape[0], x.shape[1]]) - x = self.fc(x) - if not self.training: - x = F.softmax(x, axis=1) - return x diff --git a/backend/ppocr/modeling/heads/det_db_head.py b/backend/ppocr/modeling/heads/det_db_head.py deleted file mode 100644 index a686ae5a..00000000 --- a/backend/ppocr/modeling/heads/det_db_head.py +++ /dev/null @@ -1,118 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -def get_bias_attr(k): - stdv = 1.0 / math.sqrt(k * 1.0) - initializer = paddle.nn.initializer.Uniform(-stdv, stdv) - bias_attr = ParamAttr(initializer=initializer) - return bias_attr - - -class Head(nn.Layer): - def __init__(self, in_channels, name_list, kernel_list=[3, 2, 2], **kwargs): - super(Head, self).__init__() - - self.conv1 = nn.Conv2D( - in_channels=in_channels, - out_channels=in_channels // 4, - kernel_size=kernel_list[0], - padding=int(kernel_list[0] // 2), - weight_attr=ParamAttr(), - bias_attr=False) - self.conv_bn1 = nn.BatchNorm( - num_channels=in_channels // 4, - param_attr=ParamAttr( - initializer=paddle.nn.initializer.Constant(value=1.0)), - bias_attr=ParamAttr( - initializer=paddle.nn.initializer.Constant(value=1e-4)), - act='relu') - self.conv2 = nn.Conv2DTranspose( - in_channels=in_channels // 4, - out_channels=in_channels // 4, - kernel_size=kernel_list[1], - stride=2, - weight_attr=ParamAttr( - initializer=paddle.nn.initializer.KaimingUniform()), - bias_attr=get_bias_attr(in_channels // 4)) - self.conv_bn2 = nn.BatchNorm( - num_channels=in_channels // 4, - param_attr=ParamAttr( - initializer=paddle.nn.initializer.Constant(value=1.0)), - bias_attr=ParamAttr( - initializer=paddle.nn.initializer.Constant(value=1e-4)), - act="relu") - self.conv3 = nn.Conv2DTranspose( - in_channels=in_channels // 4, - out_channels=1, - kernel_size=kernel_list[2], - stride=2, - weight_attr=ParamAttr( - initializer=paddle.nn.initializer.KaimingUniform()), - bias_attr=get_bias_attr(in_channels // 4), ) - - def forward(self, x): - x = self.conv1(x) - x = self.conv_bn1(x) - x = self.conv2(x) - x = self.conv_bn2(x) - x = self.conv3(x) - x = F.sigmoid(x) - return x - - -class DBHead(nn.Layer): - """ - Differentiable Binarization (DB) for text detection: - see https://arxiv.org/abs/1911.08947 - args: - params(dict): super parameters for build DB network - """ - - def __init__(self, in_channels, k=50, **kwargs): - super(DBHead, self).__init__() - self.k = k - binarize_name_list = [ - 'conv2d_56', 'batch_norm_47', 'conv2d_transpose_0', 'batch_norm_48', - 'conv2d_transpose_1', 'binarize' - ] - thresh_name_list = [ - 'conv2d_57', 'batch_norm_49', 'conv2d_transpose_2', 'batch_norm_50', - 'conv2d_transpose_3', 'thresh' - ] - self.binarize = Head(in_channels, binarize_name_list, **kwargs) - self.thresh = Head(in_channels, thresh_name_list, **kwargs) - - def step_function(self, x, y): - return paddle.reciprocal(1 + paddle.exp(-self.k * (x - y))) - - def forward(self, x, targets=None): - shrink_maps = self.binarize(x) - if not self.training: - return {'maps': shrink_maps} - - threshold_maps = self.thresh(x) - binary_maps = self.step_function(shrink_maps, threshold_maps) - y = paddle.concat([shrink_maps, threshold_maps, binary_maps], axis=1) - return {'maps': y} diff --git a/backend/ppocr/modeling/heads/det_east_head.py b/backend/ppocr/modeling/heads/det_east_head.py deleted file mode 100644 index 004eb5d7..00000000 --- a/backend/ppocr/modeling/heads/det_east_head.py +++ /dev/null @@ -1,121 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - padding, - groups=1, - if_act=True, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance") - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - return x - - -class EASTHead(nn.Layer): - """ - """ - def __init__(self, in_channels, model_name, **kwargs): - super(EASTHead, self).__init__() - self.model_name = model_name - if self.model_name == "large": - num_outputs = [128, 64, 1, 8] - else: - num_outputs = [64, 32, 1, 8] - - self.det_conv1 = ConvBNLayer( - in_channels=in_channels, - out_channels=num_outputs[0], - kernel_size=3, - stride=1, - padding=1, - if_act=True, - act='relu', - name="det_head1") - self.det_conv2 = ConvBNLayer( - in_channels=num_outputs[0], - out_channels=num_outputs[1], - kernel_size=3, - stride=1, - padding=1, - if_act=True, - act='relu', - name="det_head2") - self.score_conv = ConvBNLayer( - in_channels=num_outputs[1], - out_channels=num_outputs[2], - kernel_size=1, - stride=1, - padding=0, - if_act=False, - act=None, - name="f_score") - self.geo_conv = ConvBNLayer( - in_channels=num_outputs[1], - out_channels=num_outputs[3], - kernel_size=1, - stride=1, - padding=0, - if_act=False, - act=None, - name="f_geo") - - def forward(self, x, targets=None): - f_det = self.det_conv1(x) - f_det = self.det_conv2(f_det) - f_score = self.score_conv(f_det) - f_score = F.sigmoid(f_score) - f_geo = self.geo_conv(f_det) - f_geo = (F.sigmoid(f_geo) - 0.5) * 2 * 800 - - pred = {'f_score': f_score, 'f_geo': f_geo} - return pred diff --git a/backend/ppocr/modeling/heads/det_fce_head.py b/backend/ppocr/modeling/heads/det_fce_head.py deleted file mode 100644 index 9503989f..00000000 --- a/backend/ppocr/modeling/heads/det_fce_head.py +++ /dev/null @@ -1,99 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/dense_heads/fce_head.py -""" - -from paddle import nn -from paddle import ParamAttr -import paddle.nn.functional as F -from paddle.nn.initializer import Normal -import paddle -from functools import partial - - -def multi_apply(func, *args, **kwargs): - pfunc = partial(func, **kwargs) if kwargs else func - map_results = map(pfunc, *args) - return tuple(map(list, zip(*map_results))) - - -class FCEHead(nn.Layer): - """The class for implementing FCENet head. - FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped Text - Detection. - - [https://arxiv.org/abs/2104.10442] - - Args: - in_channels (int): The number of input channels. - scales (list[int]) : The scale of each layer. - fourier_degree (int) : The maximum Fourier transform degree k. - """ - - def __init__(self, in_channels, fourier_degree=5): - super().__init__() - assert isinstance(in_channels, int) - - self.downsample_ratio = 1.0 - self.in_channels = in_channels - self.fourier_degree = fourier_degree - self.out_channels_cls = 4 - self.out_channels_reg = (2 * self.fourier_degree + 1) * 2 - - self.out_conv_cls = nn.Conv2D( - in_channels=self.in_channels, - out_channels=self.out_channels_cls, - kernel_size=3, - stride=1, - padding=1, - groups=1, - weight_attr=ParamAttr( - name='cls_weights', - initializer=Normal( - mean=0., std=0.01)), - bias_attr=True) - self.out_conv_reg = nn.Conv2D( - in_channels=self.in_channels, - out_channels=self.out_channels_reg, - kernel_size=3, - stride=1, - padding=1, - groups=1, - weight_attr=ParamAttr( - name='reg_weights', - initializer=Normal( - mean=0., std=0.01)), - bias_attr=True) - - def forward(self, feats, targets=None): - cls_res, reg_res = multi_apply(self.forward_single, feats) - level_num = len(cls_res) - outs = {} - if not self.training: - for i in range(level_num): - tr_pred = F.softmax(cls_res[i][:, 0:2, :, :], axis=1) - tcl_pred = F.softmax(cls_res[i][:, 2:, :, :], axis=1) - outs['level_{}'.format(i)] = paddle.concat( - [tr_pred, tcl_pred, reg_res[i]], axis=1) - else: - preds = [[cls_res[i], reg_res[i]] for i in range(level_num)] - outs['levels'] = preds - return outs - - def forward_single(self, x): - cls_predict = self.out_conv_cls(x) - reg_predict = self.out_conv_reg(x) - return cls_predict, reg_predict diff --git a/backend/ppocr/modeling/heads/det_pse_head.py b/backend/ppocr/modeling/heads/det_pse_head.py deleted file mode 100644 index 32a5b48e..00000000 --- a/backend/ppocr/modeling/heads/det_pse_head.py +++ /dev/null @@ -1,37 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py -""" - -from paddle import nn - - -class PSEHead(nn.Layer): - def __init__(self, in_channels, hidden_dim=256, out_channels=7, **kwargs): - super(PSEHead, self).__init__() - self.conv1 = nn.Conv2D( - in_channels, hidden_dim, kernel_size=3, stride=1, padding=1) - self.bn1 = nn.BatchNorm2D(hidden_dim) - self.relu1 = nn.ReLU() - - self.conv2 = nn.Conv2D( - hidden_dim, out_channels, kernel_size=1, stride=1, padding=0) - - def forward(self, x, **kwargs): - out = self.conv1(x) - out = self.relu1(self.bn1(out)) - out = self.conv2(out) - return {'maps': out} diff --git a/backend/ppocr/modeling/heads/det_sast_head.py b/backend/ppocr/modeling/heads/det_sast_head.py deleted file mode 100644 index 7a88a2db..00000000 --- a/backend/ppocr/modeling/heads/det_sast_head.py +++ /dev/null @@ -1,128 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - groups=1, - if_act=True, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance") - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - return x - - -class SAST_Header1(nn.Layer): - def __init__(self, in_channels, **kwargs): - super(SAST_Header1, self).__init__() - out_channels = [64, 64, 128] - self.score_conv = nn.Sequential( - ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_score1'), - ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_score2'), - ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_score3'), - ConvBNLayer(out_channels[2], 1, 3, 1, act=None, name='f_score4') - ) - self.border_conv = nn.Sequential( - ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_border1'), - ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_border2'), - ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_border3'), - ConvBNLayer(out_channels[2], 4, 3, 1, act=None, name='f_border4') - ) - - def forward(self, x): - f_score = self.score_conv(x) - f_score = F.sigmoid(f_score) - f_border = self.border_conv(x) - return f_score, f_border - - -class SAST_Header2(nn.Layer): - def __init__(self, in_channels, **kwargs): - super(SAST_Header2, self).__init__() - out_channels = [64, 64, 128] - self.tvo_conv = nn.Sequential( - ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_tvo1'), - ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_tvo2'), - ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_tvo3'), - ConvBNLayer(out_channels[2], 8, 3, 1, act=None, name='f_tvo4') - ) - self.tco_conv = nn.Sequential( - ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_tco1'), - ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_tco2'), - ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_tco3'), - ConvBNLayer(out_channels[2], 2, 3, 1, act=None, name='f_tco4') - ) - - def forward(self, x): - f_tvo = self.tvo_conv(x) - f_tco = self.tco_conv(x) - return f_tvo, f_tco - - -class SASTHead(nn.Layer): - """ - """ - def __init__(self, in_channels, **kwargs): - super(SASTHead, self).__init__() - - self.head1 = SAST_Header1(in_channels) - self.head2 = SAST_Header2(in_channels) - - def forward(self, x, targets=None): - f_score, f_border = self.head1(x) - f_tvo, f_tco = self.head2(x) - - predicts = {} - predicts['f_score'] = f_score - predicts['f_border'] = f_border - predicts['f_tvo'] = f_tvo - predicts['f_tco'] = f_tco - return predicts \ No newline at end of file diff --git a/backend/ppocr/modeling/heads/e2e_pg_head.py b/backend/ppocr/modeling/heads/e2e_pg_head.py deleted file mode 100644 index 274e1cda..00000000 --- a/backend/ppocr/modeling/heads/e2e_pg_head.py +++ /dev/null @@ -1,253 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - padding, - groups=1, - if_act=True, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance", - use_global_stats=False) - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - return x - - -class PGHead(nn.Layer): - """ - """ - - def __init__(self, in_channels, **kwargs): - super(PGHead, self).__init__() - self.conv_f_score1 = ConvBNLayer( - in_channels=in_channels, - out_channels=64, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_score{}".format(1)) - self.conv_f_score2 = ConvBNLayer( - in_channels=64, - out_channels=64, - kernel_size=3, - stride=1, - padding=1, - act='relu', - name="conv_f_score{}".format(2)) - self.conv_f_score3 = ConvBNLayer( - in_channels=64, - out_channels=128, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_score{}".format(3)) - - self.conv1 = nn.Conv2D( - in_channels=128, - out_channels=1, - kernel_size=3, - stride=1, - padding=1, - groups=1, - weight_attr=ParamAttr(name="conv_f_score{}".format(4)), - bias_attr=False) - - self.conv_f_boder1 = ConvBNLayer( - in_channels=in_channels, - out_channels=64, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_boder{}".format(1)) - self.conv_f_boder2 = ConvBNLayer( - in_channels=64, - out_channels=64, - kernel_size=3, - stride=1, - padding=1, - act='relu', - name="conv_f_boder{}".format(2)) - self.conv_f_boder3 = ConvBNLayer( - in_channels=64, - out_channels=128, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_boder{}".format(3)) - self.conv2 = nn.Conv2D( - in_channels=128, - out_channels=4, - kernel_size=3, - stride=1, - padding=1, - groups=1, - weight_attr=ParamAttr(name="conv_f_boder{}".format(4)), - bias_attr=False) - self.conv_f_char1 = ConvBNLayer( - in_channels=in_channels, - out_channels=128, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_char{}".format(1)) - self.conv_f_char2 = ConvBNLayer( - in_channels=128, - out_channels=128, - kernel_size=3, - stride=1, - padding=1, - act='relu', - name="conv_f_char{}".format(2)) - self.conv_f_char3 = ConvBNLayer( - in_channels=128, - out_channels=256, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_char{}".format(3)) - self.conv_f_char4 = ConvBNLayer( - in_channels=256, - out_channels=256, - kernel_size=3, - stride=1, - padding=1, - act='relu', - name="conv_f_char{}".format(4)) - self.conv_f_char5 = ConvBNLayer( - in_channels=256, - out_channels=256, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_char{}".format(5)) - self.conv3 = nn.Conv2D( - in_channels=256, - out_channels=37, - kernel_size=3, - stride=1, - padding=1, - groups=1, - weight_attr=ParamAttr(name="conv_f_char{}".format(6)), - bias_attr=False) - - self.conv_f_direc1 = ConvBNLayer( - in_channels=in_channels, - out_channels=64, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_direc{}".format(1)) - self.conv_f_direc2 = ConvBNLayer( - in_channels=64, - out_channels=64, - kernel_size=3, - stride=1, - padding=1, - act='relu', - name="conv_f_direc{}".format(2)) - self.conv_f_direc3 = ConvBNLayer( - in_channels=64, - out_channels=128, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_direc{}".format(3)) - self.conv4 = nn.Conv2D( - in_channels=128, - out_channels=2, - kernel_size=3, - stride=1, - padding=1, - groups=1, - weight_attr=ParamAttr(name="conv_f_direc{}".format(4)), - bias_attr=False) - - def forward(self, x, targets=None): - f_score = self.conv_f_score1(x) - f_score = self.conv_f_score2(f_score) - f_score = self.conv_f_score3(f_score) - f_score = self.conv1(f_score) - f_score = F.sigmoid(f_score) - - # f_border - f_border = self.conv_f_boder1(x) - f_border = self.conv_f_boder2(f_border) - f_border = self.conv_f_boder3(f_border) - f_border = self.conv2(f_border) - - f_char = self.conv_f_char1(x) - f_char = self.conv_f_char2(f_char) - f_char = self.conv_f_char3(f_char) - f_char = self.conv_f_char4(f_char) - f_char = self.conv_f_char5(f_char) - f_char = self.conv3(f_char) - - f_direction = self.conv_f_direc1(x) - f_direction = self.conv_f_direc2(f_direction) - f_direction = self.conv_f_direc3(f_direction) - f_direction = self.conv4(f_direction) - - predicts = {} - predicts['f_score'] = f_score - predicts['f_border'] = f_border - predicts['f_char'] = f_char - predicts['f_direction'] = f_direction - return predicts diff --git a/backend/ppocr/modeling/heads/kie_sdmgr_head.py b/backend/ppocr/modeling/heads/kie_sdmgr_head.py deleted file mode 100644 index ac5f73fa..00000000 --- a/backend/ppocr/modeling/heads/kie_sdmgr_head.py +++ /dev/null @@ -1,207 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# reference from : https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/kie/heads/sdmgr_head.py - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class SDMGRHead(nn.Layer): - def __init__(self, - in_channels, - num_chars=92, - visual_dim=16, - fusion_dim=1024, - node_input=32, - node_embed=256, - edge_input=5, - edge_embed=256, - num_gnn=2, - num_classes=26, - bidirectional=False): - super().__init__() - - self.fusion = Block([visual_dim, node_embed], node_embed, fusion_dim) - self.node_embed = nn.Embedding(num_chars, node_input, 0) - hidden = node_embed // 2 if bidirectional else node_embed - self.rnn = nn.LSTM( - input_size=node_input, hidden_size=hidden, num_layers=1) - self.edge_embed = nn.Linear(edge_input, edge_embed) - self.gnn_layers = nn.LayerList( - [GNNLayer(node_embed, edge_embed) for _ in range(num_gnn)]) - self.node_cls = nn.Linear(node_embed, num_classes) - self.edge_cls = nn.Linear(edge_embed, 2) - - def forward(self, input, targets): - relations, texts, x = input - node_nums, char_nums = [], [] - for text in texts: - node_nums.append(text.shape[0]) - char_nums.append(paddle.sum((text > -1).astype(int), axis=-1)) - - max_num = max([char_num.max() for char_num in char_nums]) - all_nodes = paddle.concat([ - paddle.concat( - [text, paddle.zeros( - (text.shape[0], max_num - text.shape[1]))], -1) - for text in texts - ]) - temp = paddle.clip(all_nodes, min=0).astype(int) - embed_nodes = self.node_embed(temp) - rnn_nodes, _ = self.rnn(embed_nodes) - - b, h, w = rnn_nodes.shape - nodes = paddle.zeros([b, w]) - all_nums = paddle.concat(char_nums) - valid = paddle.nonzero((all_nums > 0).astype(int)) - temp_all_nums = ( - paddle.gather(all_nums, valid) - 1).unsqueeze(-1).unsqueeze(-1) - temp_all_nums = paddle.expand(temp_all_nums, [ - temp_all_nums.shape[0], temp_all_nums.shape[1], rnn_nodes.shape[-1] - ]) - temp_all_nodes = paddle.gather(rnn_nodes, valid) - N, C, A = temp_all_nodes.shape - one_hot = F.one_hot( - temp_all_nums[:, 0, :], num_classes=C).transpose([0, 2, 1]) - one_hot = paddle.multiply( - temp_all_nodes, one_hot.astype("float32")).sum(axis=1, keepdim=True) - t = one_hot.expand([N, 1, A]).squeeze(1) - nodes = paddle.scatter(nodes, valid.squeeze(1), t) - - if x is not None: - nodes = self.fusion([x, nodes]) - - all_edges = paddle.concat( - [rel.reshape([-1, rel.shape[-1]]) for rel in relations]) - embed_edges = self.edge_embed(all_edges.astype('float32')) - embed_edges = F.normalize(embed_edges) - - for gnn_layer in self.gnn_layers: - nodes, cat_nodes = gnn_layer(nodes, embed_edges, node_nums) - - node_cls, edge_cls = self.node_cls(nodes), self.edge_cls(cat_nodes) - return node_cls, edge_cls - - -class GNNLayer(nn.Layer): - def __init__(self, node_dim=256, edge_dim=256): - super().__init__() - self.in_fc = nn.Linear(node_dim * 2 + edge_dim, node_dim) - self.coef_fc = nn.Linear(node_dim, 1) - self.out_fc = nn.Linear(node_dim, node_dim) - self.relu = nn.ReLU() - - def forward(self, nodes, edges, nums): - start, cat_nodes = 0, [] - for num in nums: - sample_nodes = nodes[start:start + num] - cat_nodes.append( - paddle.concat([ - paddle.expand(sample_nodes.unsqueeze(1), [-1, num, -1]), - paddle.expand(sample_nodes.unsqueeze(0), [num, -1, -1]) - ], -1).reshape([num**2, -1])) - start += num - cat_nodes = paddle.concat([paddle.concat(cat_nodes), edges], -1) - cat_nodes = self.relu(self.in_fc(cat_nodes)) - coefs = self.coef_fc(cat_nodes) - - start, residuals = 0, [] - for num in nums: - residual = F.softmax( - -paddle.eye(num).unsqueeze(-1) * 1e9 + - coefs[start:start + num**2].reshape([num, num, -1]), 1) - residuals.append((residual * cat_nodes[start:start + num**2] - .reshape([num, num, -1])).sum(1)) - start += num**2 - - nodes += self.relu(self.out_fc(paddle.concat(residuals))) - return [nodes, cat_nodes] - - -class Block(nn.Layer): - def __init__(self, - input_dims, - output_dim, - mm_dim=1600, - chunks=20, - rank=15, - shared=False, - dropout_input=0., - dropout_pre_lin=0., - dropout_output=0., - pos_norm='before_cat'): - super().__init__() - self.rank = rank - self.dropout_input = dropout_input - self.dropout_pre_lin = dropout_pre_lin - self.dropout_output = dropout_output - assert (pos_norm in ['before_cat', 'after_cat']) - self.pos_norm = pos_norm - # Modules - self.linear0 = nn.Linear(input_dims[0], mm_dim) - self.linear1 = (self.linear0 - if shared else nn.Linear(input_dims[1], mm_dim)) - self.merge_linears0 = nn.LayerList() - self.merge_linears1 = nn.LayerList() - self.chunks = self.chunk_sizes(mm_dim, chunks) - for size in self.chunks: - ml0 = nn.Linear(size, size * rank) - self.merge_linears0.append(ml0) - ml1 = ml0 if shared else nn.Linear(size, size * rank) - self.merge_linears1.append(ml1) - self.linear_out = nn.Linear(mm_dim, output_dim) - - def forward(self, x): - x0 = self.linear0(x[0]) - x1 = self.linear1(x[1]) - bs = x1.shape[0] - if self.dropout_input > 0: - x0 = F.dropout(x0, p=self.dropout_input, training=self.training) - x1 = F.dropout(x1, p=self.dropout_input, training=self.training) - x0_chunks = paddle.split(x0, self.chunks, -1) - x1_chunks = paddle.split(x1, self.chunks, -1) - zs = [] - for x0_c, x1_c, m0, m1 in zip(x0_chunks, x1_chunks, self.merge_linears0, - self.merge_linears1): - m = m0(x0_c) * m1(x1_c) # bs x split_size*rank - m = m.reshape([bs, self.rank, -1]) - z = paddle.sum(m, 1) - if self.pos_norm == 'before_cat': - z = paddle.sqrt(F.relu(z)) - paddle.sqrt(F.relu(-z)) - z = F.normalize(z) - zs.append(z) - z = paddle.concat(zs, 1) - if self.pos_norm == 'after_cat': - z = paddle.sqrt(F.relu(z)) - paddle.sqrt(F.relu(-z)) - z = F.normalize(z) - - if self.dropout_pre_lin > 0: - z = F.dropout(z, p=self.dropout_pre_lin, training=self.training) - z = self.linear_out(z) - if self.dropout_output > 0: - z = F.dropout(z, p=self.dropout_output, training=self.training) - return z - - def chunk_sizes(self, dim, chunks): - split_size = (dim + chunks - 1) // chunks - sizes_list = [split_size] * chunks - sizes_list[-1] = sizes_list[-1] - (sum(sizes_list) - dim) - return sizes_list diff --git a/backend/ppocr/modeling/heads/multiheadAttention.py b/backend/ppocr/modeling/heads/multiheadAttention.py deleted file mode 100755 index 900865ba..00000000 --- a/backend/ppocr/modeling/heads/multiheadAttention.py +++ /dev/null @@ -1,163 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle.nn import Linear -from paddle.nn.initializer import XavierUniform as xavier_uniform_ -from paddle.nn.initializer import Constant as constant_ -from paddle.nn.initializer import XavierNormal as xavier_normal_ - -zeros_ = constant_(value=0.) -ones_ = constant_(value=1.) - - -class MultiheadAttention(nn.Layer): - """Allows the model to jointly attend to information - from different representation subspaces. - See reference: Attention Is All You Need - - .. math:: - \text{MultiHead}(Q, K, V) = \text{Concat}(head_1,\dots,head_h)W^O - \text{where} head_i = \text{Attention}(QW_i^Q, KW_i^K, VW_i^V) - - Args: - embed_dim: total dimension of the model - num_heads: parallel attention layers, or heads - - """ - - def __init__(self, - embed_dim, - num_heads, - dropout=0., - bias=True, - add_bias_kv=False, - add_zero_attn=False): - super(MultiheadAttention, self).__init__() - self.embed_dim = embed_dim - self.num_heads = num_heads - self.dropout = dropout - self.head_dim = embed_dim // num_heads - assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads" - self.scaling = self.head_dim**-0.5 - self.out_proj = Linear(embed_dim, embed_dim, bias_attr=bias) - self._reset_parameters() - self.conv1 = paddle.nn.Conv2D( - in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1)) - self.conv2 = paddle.nn.Conv2D( - in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1)) - self.conv3 = paddle.nn.Conv2D( - in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1)) - - def _reset_parameters(self): - xavier_uniform_(self.out_proj.weight) - - def forward(self, - query, - key, - value, - key_padding_mask=None, - incremental_state=None, - attn_mask=None): - """ - Inputs of forward function - query: [target length, batch size, embed dim] - key: [sequence length, batch size, embed dim] - value: [sequence length, batch size, embed dim] - key_padding_mask: if True, mask padding based on batch size - incremental_state: if provided, previous time steps are cashed - need_weights: output attn_output_weights - static_kv: key and value are static - - Outputs of forward function - attn_output: [target length, batch size, embed dim] - attn_output_weights: [batch size, target length, sequence length] - """ - q_shape = paddle.shape(query) - src_shape = paddle.shape(key) - q = self._in_proj_q(query) - k = self._in_proj_k(key) - v = self._in_proj_v(value) - q *= self.scaling - q = paddle.transpose( - paddle.reshape( - q, [q_shape[0], q_shape[1], self.num_heads, self.head_dim]), - [1, 2, 0, 3]) - k = paddle.transpose( - paddle.reshape( - k, [src_shape[0], q_shape[1], self.num_heads, self.head_dim]), - [1, 2, 0, 3]) - v = paddle.transpose( - paddle.reshape( - v, [src_shape[0], q_shape[1], self.num_heads, self.head_dim]), - [1, 2, 0, 3]) - if key_padding_mask is not None: - assert key_padding_mask.shape[0] == q_shape[1] - assert key_padding_mask.shape[1] == src_shape[0] - attn_output_weights = paddle.matmul(q, - paddle.transpose(k, [0, 1, 3, 2])) - if attn_mask is not None: - attn_mask = paddle.unsqueeze(paddle.unsqueeze(attn_mask, 0), 0) - attn_output_weights += attn_mask - if key_padding_mask is not None: - attn_output_weights = paddle.reshape( - attn_output_weights, - [q_shape[1], self.num_heads, q_shape[0], src_shape[0]]) - key = paddle.unsqueeze(paddle.unsqueeze(key_padding_mask, 1), 2) - key = paddle.cast(key, 'float32') - y = paddle.full( - shape=paddle.shape(key), dtype='float32', fill_value='-inf') - y = paddle.where(key == 0., key, y) - attn_output_weights += y - attn_output_weights = F.softmax( - attn_output_weights.astype('float32'), - axis=-1, - dtype=paddle.float32 if attn_output_weights.dtype == paddle.float16 - else attn_output_weights.dtype) - attn_output_weights = F.dropout( - attn_output_weights, p=self.dropout, training=self.training) - - attn_output = paddle.matmul(attn_output_weights, v) - attn_output = paddle.reshape( - paddle.transpose(attn_output, [2, 0, 1, 3]), - [q_shape[0], q_shape[1], self.embed_dim]) - attn_output = self.out_proj(attn_output) - - return attn_output - - def _in_proj_q(self, query): - query = paddle.transpose(query, [1, 2, 0]) - query = paddle.unsqueeze(query, axis=2) - res = self.conv1(query) - res = paddle.squeeze(res, axis=2) - res = paddle.transpose(res, [2, 0, 1]) - return res - - def _in_proj_k(self, key): - key = paddle.transpose(key, [1, 2, 0]) - key = paddle.unsqueeze(key, axis=2) - res = self.conv2(key) - res = paddle.squeeze(res, axis=2) - res = paddle.transpose(res, [2, 0, 1]) - return res - - def _in_proj_v(self, value): - value = paddle.transpose(value, [1, 2, 0]) #(1, 2, 0) - value = paddle.unsqueeze(value, axis=2) - res = self.conv3(value) - res = paddle.squeeze(res, axis=2) - res = paddle.transpose(res, [2, 0, 1]) - return res diff --git a/backend/ppocr/modeling/heads/rec_aster_head.py b/backend/ppocr/modeling/heads/rec_aster_head.py deleted file mode 100644 index c95e8fd3..00000000 --- a/backend/ppocr/modeling/heads/rec_aster_head.py +++ /dev/null @@ -1,393 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/attention_recognition_head.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import sys - -import paddle -from paddle import nn -from paddle.nn import functional as F - - -class AsterHead(nn.Layer): - def __init__(self, - in_channels, - out_channels, - sDim, - attDim, - max_len_labels, - time_step=25, - beam_width=5, - **kwargs): - super(AsterHead, self).__init__() - self.num_classes = out_channels - self.in_planes = in_channels - self.sDim = sDim - self.attDim = attDim - self.max_len_labels = max_len_labels - self.decoder = AttentionRecognitionHead(in_channels, out_channels, sDim, - attDim, max_len_labels) - self.time_step = time_step - self.embeder = Embedding(self.time_step, in_channels) - self.beam_width = beam_width - self.eos = self.num_classes - 3 - - def forward(self, x, targets=None, embed=None): - return_dict = {} - embedding_vectors = self.embeder(x) - - if self.training: - rec_targets, rec_lengths, _ = targets - rec_pred = self.decoder([x, rec_targets, rec_lengths], - embedding_vectors) - return_dict['rec_pred'] = rec_pred - return_dict['embedding_vectors'] = embedding_vectors - else: - rec_pred, rec_pred_scores = self.decoder.beam_search( - x, self.beam_width, self.eos, embedding_vectors) - return_dict['rec_pred'] = rec_pred - return_dict['rec_pred_scores'] = rec_pred_scores - return_dict['embedding_vectors'] = embedding_vectors - - return return_dict - - -class Embedding(nn.Layer): - def __init__(self, in_timestep, in_planes, mid_dim=4096, embed_dim=300): - super(Embedding, self).__init__() - self.in_timestep = in_timestep - self.in_planes = in_planes - self.embed_dim = embed_dim - self.mid_dim = mid_dim - self.eEmbed = nn.Linear( - in_timestep * in_planes, - self.embed_dim) # Embed encoder output to a word-embedding like - - def forward(self, x): - x = paddle.reshape(x, [paddle.shape(x)[0], -1]) - x = self.eEmbed(x) - return x - - -class AttentionRecognitionHead(nn.Layer): - """ - input: [b x 16 x 64 x in_planes] - output: probability sequence: [b x T x num_classes] - """ - - def __init__(self, in_channels, out_channels, sDim, attDim, max_len_labels): - super(AttentionRecognitionHead, self).__init__() - self.num_classes = out_channels # this is the output classes. So it includes the . - self.in_planes = in_channels - self.sDim = sDim - self.attDim = attDim - self.max_len_labels = max_len_labels - - self.decoder = DecoderUnit( - sDim=sDim, xDim=in_channels, yDim=self.num_classes, attDim=attDim) - - def forward(self, x, embed): - x, targets, lengths = x - batch_size = paddle.shape(x)[0] - # Decoder - state = self.decoder.get_initial_state(embed) - outputs = [] - for i in range(max(lengths)): - if i == 0: - y_prev = paddle.full( - shape=[batch_size], fill_value=self.num_classes) - else: - y_prev = targets[:, i - 1] - output, state = self.decoder(x, state, y_prev) - outputs.append(output) - outputs = paddle.concat([_.unsqueeze(1) for _ in outputs], 1) - return outputs - - # inference stage. - def sample(self, x): - x, _, _ = x - batch_size = x.size(0) - # Decoder - state = paddle.zeros([1, batch_size, self.sDim]) - - predicted_ids, predicted_scores = [], [] - for i in range(self.max_len_labels): - if i == 0: - y_prev = paddle.full( - shape=[batch_size], fill_value=self.num_classes) - else: - y_prev = predicted - - output, state = self.decoder(x, state, y_prev) - output = F.softmax(output, axis=1) - score, predicted = output.max(1) - predicted_ids.append(predicted.unsqueeze(1)) - predicted_scores.append(score.unsqueeze(1)) - predicted_ids = paddle.concat([predicted_ids, 1]) - predicted_scores = paddle.concat([predicted_scores, 1]) - # return predicted_ids.squeeze(), predicted_scores.squeeze() - return predicted_ids, predicted_scores - - def beam_search(self, x, beam_width, eos, embed): - def _inflate(tensor, times, dim): - repeat_dims = [1] * tensor.dim() - repeat_dims[dim] = times - output = paddle.tile(tensor, repeat_dims) - return output - - # https://github.com/IBM/pytorch-seq2seq/blob/fede87655ddce6c94b38886089e05321dc9802af/seq2seq/models/TopKDecoder.py - batch_size, l, d = x.shape - x = paddle.tile( - paddle.transpose( - x.unsqueeze(1), perm=[1, 0, 2, 3]), [beam_width, 1, 1, 1]) - inflated_encoder_feats = paddle.reshape( - paddle.transpose( - x, perm=[1, 0, 2, 3]), [-1, l, d]) - - # Initialize the decoder - state = self.decoder.get_initial_state(embed, tile_times=beam_width) - - pos_index = paddle.reshape( - paddle.arange(batch_size) * beam_width, shape=[-1, 1]) - - # Initialize the scores - sequence_scores = paddle.full( - shape=[batch_size * beam_width, 1], fill_value=-float('Inf')) - index = [i * beam_width for i in range(0, batch_size)] - sequence_scores[index] = 0.0 - - # Initialize the input vector - y_prev = paddle.full( - shape=[batch_size * beam_width], fill_value=self.num_classes) - - # Store decisions for backtracking - stored_scores = list() - stored_predecessors = list() - stored_emitted_symbols = list() - - for i in range(self.max_len_labels): - output, state = self.decoder(inflated_encoder_feats, state, y_prev) - state = paddle.unsqueeze(state, axis=0) - log_softmax_output = paddle.nn.functional.log_softmax( - output, axis=1) - - sequence_scores = _inflate(sequence_scores, self.num_classes, 1) - sequence_scores += log_softmax_output - scores, candidates = paddle.topk( - paddle.reshape(sequence_scores, [batch_size, -1]), - beam_width, - axis=1) - - # Reshape input = (bk, 1) and sequence_scores = (bk, 1) - y_prev = paddle.reshape( - candidates % self.num_classes, shape=[batch_size * beam_width]) - sequence_scores = paddle.reshape( - scores, shape=[batch_size * beam_width, 1]) - - # Update fields for next timestep - pos_index = paddle.expand_as(pos_index, candidates) - predecessors = paddle.cast( - candidates / self.num_classes + pos_index, dtype='int64') - predecessors = paddle.reshape( - predecessors, shape=[batch_size * beam_width, 1]) - state = paddle.index_select( - state, index=predecessors.squeeze(), axis=1) - - # Update sequence socres and erase scores for symbol so that they aren't expanded - stored_scores.append(sequence_scores.clone()) - y_prev = paddle.reshape(y_prev, shape=[-1, 1]) - eos_prev = paddle.full_like(y_prev, fill_value=eos) - mask = eos_prev == y_prev - mask = paddle.nonzero(mask) - if mask.dim() > 0: - sequence_scores = sequence_scores.numpy() - mask = mask.numpy() - sequence_scores[mask] = -float('inf') - sequence_scores = paddle.to_tensor(sequence_scores) - - # Cache results for backtracking - stored_predecessors.append(predecessors) - y_prev = paddle.squeeze(y_prev) - stored_emitted_symbols.append(y_prev) - - # Do backtracking to return the optimal values - #====== backtrak ======# - # Initialize return variables given different types - p = list() - l = [[self.max_len_labels] * beam_width for _ in range(batch_size) - ] # Placeholder for lengths of top-k sequences - - # the last step output of the beams are not sorted - # thus they are sorted here - sorted_score, sorted_idx = paddle.topk( - paddle.reshape( - stored_scores[-1], shape=[batch_size, beam_width]), - beam_width) - - # initialize the sequence scores with the sorted last step beam scores - s = sorted_score.clone() - - batch_eos_found = [0] * batch_size # the number of EOS found - # in the backward loop below for each batch - t = self.max_len_labels - 1 - # initialize the back pointer with the sorted order of the last step beams. - # add pos_index for indexing variable with b*k as the first dimension. - t_predecessors = paddle.reshape( - sorted_idx + pos_index.expand_as(sorted_idx), - shape=[batch_size * beam_width]) - while t >= 0: - # Re-order the variables with the back pointer - current_symbol = paddle.index_select( - stored_emitted_symbols[t], index=t_predecessors, axis=0) - t_predecessors = paddle.index_select( - stored_predecessors[t].squeeze(), index=t_predecessors, axis=0) - eos_indices = stored_emitted_symbols[t] == eos - eos_indices = paddle.nonzero(eos_indices) - - if eos_indices.dim() > 0: - for i in range(eos_indices.shape[0] - 1, -1, -1): - # Indices of the EOS symbol for both variables - # with b*k as the first dimension, and b, k for - # the first two dimensions - idx = eos_indices[i] - b_idx = int(idx[0] / beam_width) - # The indices of the replacing position - # according to the replacement strategy noted above - res_k_idx = beam_width - (batch_eos_found[b_idx] % - beam_width) - 1 - batch_eos_found[b_idx] += 1 - res_idx = b_idx * beam_width + res_k_idx - - # Replace the old information in return variables - # with the new ended sequence information - t_predecessors[res_idx] = stored_predecessors[t][idx[0]] - current_symbol[res_idx] = stored_emitted_symbols[t][idx[0]] - s[b_idx, res_k_idx] = stored_scores[t][idx[0], 0] - l[b_idx][res_k_idx] = t + 1 - - # record the back tracked results - p.append(current_symbol) - t -= 1 - - # Sort and re-order again as the added ended sequences may change - # the order (very unlikely) - s, re_sorted_idx = s.topk(beam_width) - for b_idx in range(batch_size): - l[b_idx] = [ - l[b_idx][k_idx.item()] for k_idx in re_sorted_idx[b_idx, :] - ] - - re_sorted_idx = paddle.reshape( - re_sorted_idx + pos_index.expand_as(re_sorted_idx), - [batch_size * beam_width]) - - # Reverse the sequences and re-order at the same time - # It is reversed because the backtracking happens in reverse time order - p = [ - paddle.reshape( - paddle.index_select(step, re_sorted_idx, 0), - shape=[batch_size, beam_width, -1]) for step in reversed(p) - ] - p = paddle.concat(p, -1)[:, 0, :] - return p, paddle.ones_like(p) - - -class AttentionUnit(nn.Layer): - def __init__(self, sDim, xDim, attDim): - super(AttentionUnit, self).__init__() - - self.sDim = sDim - self.xDim = xDim - self.attDim = attDim - - self.sEmbed = nn.Linear(sDim, attDim) - self.xEmbed = nn.Linear(xDim, attDim) - self.wEmbed = nn.Linear(attDim, 1) - - def forward(self, x, sPrev): - batch_size, T, _ = x.shape # [b x T x xDim] - x = paddle.reshape(x, [-1, self.xDim]) # [(b x T) x xDim] - xProj = self.xEmbed(x) # [(b x T) x attDim] - xProj = paddle.reshape(xProj, [batch_size, T, -1]) # [b x T x attDim] - - sPrev = sPrev.squeeze(0) - sProj = self.sEmbed(sPrev) # [b x attDim] - sProj = paddle.unsqueeze(sProj, 1) # [b x 1 x attDim] - sProj = paddle.expand(sProj, - [batch_size, T, self.attDim]) # [b x T x attDim] - - sumTanh = paddle.tanh(sProj + xProj) - sumTanh = paddle.reshape(sumTanh, [-1, self.attDim]) - - vProj = self.wEmbed(sumTanh) # [(b x T) x 1] - vProj = paddle.reshape(vProj, [batch_size, T]) - alpha = F.softmax( - vProj, axis=1) # attention weights for each sample in the minibatch - return alpha - - -class DecoderUnit(nn.Layer): - def __init__(self, sDim, xDim, yDim, attDim): - super(DecoderUnit, self).__init__() - self.sDim = sDim - self.xDim = xDim - self.yDim = yDim - self.attDim = attDim - self.emdDim = attDim - - self.attention_unit = AttentionUnit(sDim, xDim, attDim) - self.tgt_embedding = nn.Embedding( - yDim + 1, self.emdDim, weight_attr=nn.initializer.Normal( - std=0.01)) # the last is used for - self.gru = nn.GRUCell(input_size=xDim + self.emdDim, hidden_size=sDim) - self.fc = nn.Linear( - sDim, - yDim, - weight_attr=nn.initializer.Normal(std=0.01), - bias_attr=nn.initializer.Constant(value=0)) - self.embed_fc = nn.Linear(300, self.sDim) - - def get_initial_state(self, embed, tile_times=1): - assert embed.shape[1] == 300 - state = self.embed_fc(embed) # N * sDim - if tile_times != 1: - state = state.unsqueeze(1) - trans_state = paddle.transpose(state, perm=[1, 0, 2]) - state = paddle.tile(trans_state, repeat_times=[tile_times, 1, 1]) - trans_state = paddle.transpose(state, perm=[1, 0, 2]) - state = paddle.reshape(trans_state, shape=[-1, self.sDim]) - state = state.unsqueeze(0) # 1 * N * sDim - return state - - def forward(self, x, sPrev, yPrev): - # x: feature sequence from the image decoder. - batch_size, T, _ = x.shape - alpha = self.attention_unit(x, sPrev) - context = paddle.squeeze(paddle.matmul(alpha.unsqueeze(1), x), axis=1) - yPrev = paddle.cast(yPrev, dtype="int64") - yProj = self.tgt_embedding(yPrev) - - concat_context = paddle.concat([yProj, context], 1) - concat_context = paddle.squeeze(concat_context, 1) - sPrev = paddle.squeeze(sPrev, 0) - output, state = self.gru(concat_context, sPrev) - output = paddle.squeeze(output, axis=1) - output = self.fc(output) - return output, state \ No newline at end of file diff --git a/backend/ppocr/modeling/heads/rec_att_head.py b/backend/ppocr/modeling/heads/rec_att_head.py deleted file mode 100644 index ab8b119f..00000000 --- a/backend/ppocr/modeling/heads/rec_att_head.py +++ /dev/null @@ -1,202 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -import paddle.nn as nn -import paddle.nn.functional as F -import numpy as np - - -class AttentionHead(nn.Layer): - def __init__(self, in_channels, out_channels, hidden_size, **kwargs): - super(AttentionHead, self).__init__() - self.input_size = in_channels - self.hidden_size = hidden_size - self.num_classes = out_channels - - self.attention_cell = AttentionGRUCell( - in_channels, hidden_size, out_channels, use_gru=False) - self.generator = nn.Linear(hidden_size, out_channels) - - def _char_to_onehot(self, input_char, onehot_dim): - input_ont_hot = F.one_hot(input_char, onehot_dim) - return input_ont_hot - - def forward(self, inputs, targets=None, batch_max_length=25): - batch_size = paddle.shape(inputs)[0] - num_steps = batch_max_length - - hidden = paddle.zeros((batch_size, self.hidden_size)) - output_hiddens = [] - - if targets is not None: - for i in range(num_steps): - char_onehots = self._char_to_onehot( - targets[:, i], onehot_dim=self.num_classes) - (outputs, hidden), alpha = self.attention_cell(hidden, inputs, - char_onehots) - output_hiddens.append(paddle.unsqueeze(outputs, axis=1)) - output = paddle.concat(output_hiddens, axis=1) - probs = self.generator(output) - else: - targets = paddle.zeros(shape=[batch_size], dtype="int32") - probs = None - char_onehots = None - outputs = None - alpha = None - - for i in range(num_steps): - char_onehots = self._char_to_onehot( - targets, onehot_dim=self.num_classes) - (outputs, hidden), alpha = self.attention_cell(hidden, inputs, - char_onehots) - probs_step = self.generator(outputs) - if probs is None: - probs = paddle.unsqueeze(probs_step, axis=1) - else: - probs = paddle.concat( - [probs, paddle.unsqueeze( - probs_step, axis=1)], axis=1) - next_input = probs_step.argmax(axis=1) - targets = next_input - if not self.training: - probs = paddle.nn.functional.softmax(probs, axis=2) - return probs - - -class AttentionGRUCell(nn.Layer): - def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False): - super(AttentionGRUCell, self).__init__() - self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False) - self.h2h = nn.Linear(hidden_size, hidden_size) - self.score = nn.Linear(hidden_size, 1, bias_attr=False) - - self.rnn = nn.GRUCell( - input_size=input_size + num_embeddings, hidden_size=hidden_size) - - self.hidden_size = hidden_size - - def forward(self, prev_hidden, batch_H, char_onehots): - - batch_H_proj = self.i2h(batch_H) - prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden), axis=1) - - res = paddle.add(batch_H_proj, prev_hidden_proj) - res = paddle.tanh(res) - e = self.score(res) - - alpha = F.softmax(e, axis=1) - alpha = paddle.transpose(alpha, [0, 2, 1]) - context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1) - concat_context = paddle.concat([context, char_onehots], 1) - - cur_hidden = self.rnn(concat_context, prev_hidden) - - return cur_hidden, alpha - - -class AttentionLSTM(nn.Layer): - def __init__(self, in_channels, out_channels, hidden_size, **kwargs): - super(AttentionLSTM, self).__init__() - self.input_size = in_channels - self.hidden_size = hidden_size - self.num_classes = out_channels - - self.attention_cell = AttentionLSTMCell( - in_channels, hidden_size, out_channels, use_gru=False) - self.generator = nn.Linear(hidden_size, out_channels) - - def _char_to_onehot(self, input_char, onehot_dim): - input_ont_hot = F.one_hot(input_char, onehot_dim) - return input_ont_hot - - def forward(self, inputs, targets=None, batch_max_length=25): - batch_size = inputs.shape[0] - num_steps = batch_max_length - - hidden = (paddle.zeros((batch_size, self.hidden_size)), paddle.zeros( - (batch_size, self.hidden_size))) - output_hiddens = [] - - if targets is not None: - for i in range(num_steps): - # one-hot vectors for a i-th char - char_onehots = self._char_to_onehot( - targets[:, i], onehot_dim=self.num_classes) - hidden, alpha = self.attention_cell(hidden, inputs, - char_onehots) - - hidden = (hidden[1][0], hidden[1][1]) - output_hiddens.append(paddle.unsqueeze(hidden[0], axis=1)) - output = paddle.concat(output_hiddens, axis=1) - probs = self.generator(output) - - else: - targets = paddle.zeros(shape=[batch_size], dtype="int32") - probs = None - - for i in range(num_steps): - char_onehots = self._char_to_onehot( - targets, onehot_dim=self.num_classes) - hidden, alpha = self.attention_cell(hidden, inputs, - char_onehots) - probs_step = self.generator(hidden[0]) - hidden = (hidden[1][0], hidden[1][1]) - if probs is None: - probs = paddle.unsqueeze(probs_step, axis=1) - else: - probs = paddle.concat( - [probs, paddle.unsqueeze( - probs_step, axis=1)], axis=1) - - next_input = probs_step.argmax(axis=1) - - targets = next_input - - return probs - - -class AttentionLSTMCell(nn.Layer): - def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False): - super(AttentionLSTMCell, self).__init__() - self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False) - self.h2h = nn.Linear(hidden_size, hidden_size) - self.score = nn.Linear(hidden_size, 1, bias_attr=False) - if not use_gru: - self.rnn = nn.LSTMCell( - input_size=input_size + num_embeddings, hidden_size=hidden_size) - else: - self.rnn = nn.GRUCell( - input_size=input_size + num_embeddings, hidden_size=hidden_size) - - self.hidden_size = hidden_size - - def forward(self, prev_hidden, batch_H, char_onehots): - batch_H_proj = self.i2h(batch_H) - prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden[0]), axis=1) - res = paddle.add(batch_H_proj, prev_hidden_proj) - res = paddle.tanh(res) - e = self.score(res) - - alpha = F.softmax(e, axis=1) - alpha = paddle.transpose(alpha, [0, 2, 1]) - context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1) - concat_context = paddle.concat([context, char_onehots], 1) - cur_hidden = self.rnn(concat_context, prev_hidden) - - return cur_hidden, alpha diff --git a/backend/ppocr/modeling/heads/rec_ctc_head.py b/backend/ppocr/modeling/heads/rec_ctc_head.py deleted file mode 100755 index 6c1cf065..00000000 --- a/backend/ppocr/modeling/heads/rec_ctc_head.py +++ /dev/null @@ -1,87 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle -from paddle import ParamAttr, nn -from paddle.nn import functional as F - - -def get_para_bias_attr(l2_decay, k): - regularizer = paddle.regularizer.L2Decay(l2_decay) - stdv = 1.0 / math.sqrt(k * 1.0) - initializer = nn.initializer.Uniform(-stdv, stdv) - weight_attr = ParamAttr(regularizer=regularizer, initializer=initializer) - bias_attr = ParamAttr(regularizer=regularizer, initializer=initializer) - return [weight_attr, bias_attr] - - -class CTCHead(nn.Layer): - def __init__(self, - in_channels, - out_channels, - fc_decay=0.0004, - mid_channels=None, - return_feats=False, - **kwargs): - super(CTCHead, self).__init__() - if mid_channels is None: - weight_attr, bias_attr = get_para_bias_attr( - l2_decay=fc_decay, k=in_channels) - self.fc = nn.Linear( - in_channels, - out_channels, - weight_attr=weight_attr, - bias_attr=bias_attr) - else: - weight_attr1, bias_attr1 = get_para_bias_attr( - l2_decay=fc_decay, k=in_channels) - self.fc1 = nn.Linear( - in_channels, - mid_channels, - weight_attr=weight_attr1, - bias_attr=bias_attr1) - - weight_attr2, bias_attr2 = get_para_bias_attr( - l2_decay=fc_decay, k=mid_channels) - self.fc2 = nn.Linear( - mid_channels, - out_channels, - weight_attr=weight_attr2, - bias_attr=bias_attr2) - self.out_channels = out_channels - self.mid_channels = mid_channels - self.return_feats = return_feats - - def forward(self, x, targets=None): - if self.mid_channels is None: - predicts = self.fc(x) - else: - x = self.fc1(x) - predicts = self.fc2(x) - - if self.return_feats: - result = (x, predicts) - else: - result = predicts - if not self.training: - predicts = F.softmax(predicts, axis=2) - result = predicts - - return result diff --git a/backend/ppocr/modeling/heads/rec_multi_head.py b/backend/ppocr/modeling/heads/rec_multi_head.py deleted file mode 100644 index ef78bf98..00000000 --- a/backend/ppocr/modeling/heads/rec_multi_head.py +++ /dev/null @@ -1,73 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F - -from ppocr.modeling.necks.rnn import Im2Seq, EncoderWithRNN, EncoderWithFC, SequenceEncoder, EncoderWithSVTR -from .rec_ctc_head import CTCHead -from .rec_sar_head import SARHead - - -class MultiHead(nn.Layer): - def __init__(self, in_channels, out_channels_list, **kwargs): - super().__init__() - self.head_list = kwargs.pop('head_list') - self.gtc_head = 'sar' - assert len(self.head_list) >= 2 - for idx, head_name in enumerate(self.head_list): - name = list(head_name)[0] - if name == 'SARHead': - # sar head - sar_args = self.head_list[idx][name] - self.sar_head = eval(name)(in_channels=in_channels, \ - out_channels=out_channels_list['SARLabelDecode'], **sar_args) - elif name == 'CTCHead': - # ctc neck - self.encoder_reshape = Im2Seq(in_channels) - neck_args = self.head_list[idx][name]['Neck'] - encoder_type = neck_args.pop('name') - self.encoder = encoder_type - self.ctc_encoder = SequenceEncoder(in_channels=in_channels, \ - encoder_type=encoder_type, **neck_args) - # ctc head - head_args = self.head_list[idx][name]['Head'] - self.ctc_head = eval(name)(in_channels=self.ctc_encoder.out_channels, \ - out_channels=out_channels_list['CTCLabelDecode'], **head_args) - else: - raise NotImplementedError( - '{} is not supported in MultiHead yet'.format(name)) - - def forward(self, x, targets=None): - ctc_encoder = self.ctc_encoder(x) - ctc_out = self.ctc_head(ctc_encoder, targets) - head_out = dict() - head_out['ctc'] = ctc_out - head_out['ctc_neck'] = ctc_encoder - # eval mode - if not self.training: - return ctc_out - if self.gtc_head == 'sar': - sar_out = self.sar_head(x, targets[1:]) - head_out['sar'] = sar_out - return head_out - else: - return head_out diff --git a/backend/ppocr/modeling/heads/rec_nrtr_head.py b/backend/ppocr/modeling/heads/rec_nrtr_head.py deleted file mode 100644 index 38ba0c91..00000000 --- a/backend/ppocr/modeling/heads/rec_nrtr_head.py +++ /dev/null @@ -1,826 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import paddle -import copy -from paddle import nn -import paddle.nn.functional as F -from paddle.nn import LayerList -from paddle.nn.initializer import XavierNormal as xavier_uniform_ -from paddle.nn import Dropout, Linear, LayerNorm, Conv2D -import numpy as np -from ppocr.modeling.heads.multiheadAttention import MultiheadAttention -from paddle.nn.initializer import Constant as constant_ -from paddle.nn.initializer import XavierNormal as xavier_normal_ - -zeros_ = constant_(value=0.) -ones_ = constant_(value=1.) - - -class Transformer(nn.Layer): - """A transformer model. User is able to modify the attributes as needed. The architechture - is based on the paper "Attention Is All You Need". Ashish Vaswani, Noam Shazeer, - Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and - Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information - Processing Systems, pages 6000-6010. - - Args: - d_model: the number of expected features in the encoder/decoder inputs (default=512). - nhead: the number of heads in the multiheadattention models (default=8). - num_encoder_layers: the number of sub-encoder-layers in the encoder (default=6). - num_decoder_layers: the number of sub-decoder-layers in the decoder (default=6). - dim_feedforward: the dimension of the feedforward network model (default=2048). - dropout: the dropout value (default=0.1). - custom_encoder: custom encoder (default=None). - custom_decoder: custom decoder (default=None). - - """ - - def __init__(self, - d_model=512, - nhead=8, - num_encoder_layers=6, - beam_size=0, - num_decoder_layers=6, - dim_feedforward=1024, - attention_dropout_rate=0.0, - residual_dropout_rate=0.1, - custom_encoder=None, - custom_decoder=None, - in_channels=0, - out_channels=0, - scale_embedding=True): - super(Transformer, self).__init__() - self.out_channels = out_channels + 1 - self.embedding = Embeddings( - d_model=d_model, - vocab=self.out_channels, - padding_idx=0, - scale_embedding=scale_embedding) - self.positional_encoding = PositionalEncoding( - dropout=residual_dropout_rate, - dim=d_model, ) - if custom_encoder is not None: - self.encoder = custom_encoder - else: - if num_encoder_layers > 0: - encoder_layer = TransformerEncoderLayer( - d_model, nhead, dim_feedforward, attention_dropout_rate, - residual_dropout_rate) - self.encoder = TransformerEncoder(encoder_layer, - num_encoder_layers) - else: - self.encoder = None - - if custom_decoder is not None: - self.decoder = custom_decoder - else: - decoder_layer = TransformerDecoderLayer( - d_model, nhead, dim_feedforward, attention_dropout_rate, - residual_dropout_rate) - self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers) - - self._reset_parameters() - self.beam_size = beam_size - self.d_model = d_model - self.nhead = nhead - self.tgt_word_prj = nn.Linear( - d_model, self.out_channels, bias_attr=False) - w0 = np.random.normal(0.0, d_model**-0.5, - (d_model, self.out_channels)).astype(np.float32) - self.tgt_word_prj.weight.set_value(w0) - self.apply(self._init_weights) - - def _init_weights(self, m): - - if isinstance(m, nn.Conv2D): - xavier_normal_(m.weight) - if m.bias is not None: - zeros_(m.bias) - - def forward_train(self, src, tgt): - tgt = tgt[:, :-1] - - tgt_key_padding_mask = self.generate_padding_mask(tgt) - tgt = self.embedding(tgt).transpose([1, 0, 2]) - tgt = self.positional_encoding(tgt) - tgt_mask = self.generate_square_subsequent_mask(tgt.shape[0]) - - if self.encoder is not None: - src = self.positional_encoding(src.transpose([1, 0, 2])) - memory = self.encoder(src) - else: - memory = src.squeeze(2).transpose([2, 0, 1]) - output = self.decoder( - tgt, - memory, - tgt_mask=tgt_mask, - memory_mask=None, - tgt_key_padding_mask=tgt_key_padding_mask, - memory_key_padding_mask=None) - output = output.transpose([1, 0, 2]) - logit = self.tgt_word_prj(output) - return logit - - def forward(self, src, targets=None): - """Take in and process masked source/target sequences. - Args: - src: the sequence to the encoder (required). - tgt: the sequence to the decoder (required). - Shape: - - src: :math:`(S, N, E)`. - - tgt: :math:`(T, N, E)`. - Examples: - >>> output = transformer_model(src, tgt) - """ - - if self.training: - max_len = targets[1].max() - tgt = targets[0][:, :2 + max_len] - return self.forward_train(src, tgt) - else: - if self.beam_size > 0: - return self.forward_beam(src) - else: - return self.forward_test(src) - - def forward_test(self, src): - bs = paddle.shape(src)[0] - if self.encoder is not None: - src = self.positional_encoding(paddle.transpose(src, [1, 0, 2])) - memory = self.encoder(src) - else: - memory = paddle.transpose(paddle.squeeze(src, 2), [2, 0, 1]) - dec_seq = paddle.full((bs, 1), 2, dtype=paddle.int64) - dec_prob = paddle.full((bs, 1), 1., dtype=paddle.float32) - for len_dec_seq in range(1, 25): - dec_seq_embed = paddle.transpose(self.embedding(dec_seq), [1, 0, 2]) - dec_seq_embed = self.positional_encoding(dec_seq_embed) - tgt_mask = self.generate_square_subsequent_mask( - paddle.shape(dec_seq_embed)[0]) - output = self.decoder( - dec_seq_embed, - memory, - tgt_mask=tgt_mask, - memory_mask=None, - tgt_key_padding_mask=None, - memory_key_padding_mask=None) - dec_output = paddle.transpose(output, [1, 0, 2]) - dec_output = dec_output[:, -1, :] - word_prob = F.softmax(self.tgt_word_prj(dec_output), axis=1) - preds_idx = paddle.argmax(word_prob, axis=1) - if paddle.equal_all( - preds_idx, - paddle.full( - paddle.shape(preds_idx), 3, dtype='int64')): - break - preds_prob = paddle.max(word_prob, axis=1) - dec_seq = paddle.concat( - [dec_seq, paddle.reshape(preds_idx, [-1, 1])], axis=1) - dec_prob = paddle.concat( - [dec_prob, paddle.reshape(preds_prob, [-1, 1])], axis=1) - return [dec_seq, dec_prob] - - def forward_beam(self, images): - ''' Translation work in one batch ''' - - def get_inst_idx_to_tensor_position_map(inst_idx_list): - ''' Indicate the position of an instance in a tensor. ''' - return { - inst_idx: tensor_position - for tensor_position, inst_idx in enumerate(inst_idx_list) - } - - def collect_active_part(beamed_tensor, curr_active_inst_idx, - n_prev_active_inst, n_bm): - ''' Collect tensor parts associated to active instances. ''' - - beamed_tensor_shape = paddle.shape(beamed_tensor) - n_curr_active_inst = len(curr_active_inst_idx) - new_shape = (n_curr_active_inst * n_bm, beamed_tensor_shape[1], - beamed_tensor_shape[2]) - - beamed_tensor = beamed_tensor.reshape([n_prev_active_inst, -1]) - beamed_tensor = beamed_tensor.index_select( - curr_active_inst_idx, axis=0) - beamed_tensor = beamed_tensor.reshape(new_shape) - - return beamed_tensor - - def collate_active_info(src_enc, inst_idx_to_position_map, - active_inst_idx_list): - # Sentences which are still active are collected, - # so the decoder will not run on completed sentences. - - n_prev_active_inst = len(inst_idx_to_position_map) - active_inst_idx = [ - inst_idx_to_position_map[k] for k in active_inst_idx_list - ] - active_inst_idx = paddle.to_tensor(active_inst_idx, dtype='int64') - active_src_enc = collect_active_part( - src_enc.transpose([1, 0, 2]), active_inst_idx, - n_prev_active_inst, n_bm).transpose([1, 0, 2]) - active_inst_idx_to_position_map = get_inst_idx_to_tensor_position_map( - active_inst_idx_list) - return active_src_enc, active_inst_idx_to_position_map - - def beam_decode_step(inst_dec_beams, len_dec_seq, enc_output, - inst_idx_to_position_map, n_bm, - memory_key_padding_mask): - ''' Decode and update beam status, and then return active beam idx ''' - - def prepare_beam_dec_seq(inst_dec_beams, len_dec_seq): - dec_partial_seq = [ - b.get_current_state() for b in inst_dec_beams if not b.done - ] - dec_partial_seq = paddle.stack(dec_partial_seq) - dec_partial_seq = dec_partial_seq.reshape([-1, len_dec_seq]) - return dec_partial_seq - - def predict_word(dec_seq, enc_output, n_active_inst, n_bm, - memory_key_padding_mask): - dec_seq = paddle.transpose(self.embedding(dec_seq), [1, 0, 2]) - dec_seq = self.positional_encoding(dec_seq) - tgt_mask = self.generate_square_subsequent_mask( - paddle.shape(dec_seq)[0]) - dec_output = self.decoder( - dec_seq, - enc_output, - tgt_mask=tgt_mask, - tgt_key_padding_mask=None, - memory_key_padding_mask=memory_key_padding_mask, ) - dec_output = paddle.transpose(dec_output, [1, 0, 2]) - dec_output = dec_output[:, - -1, :] # Pick the last step: (bh * bm) * d_h - word_prob = F.softmax(self.tgt_word_prj(dec_output), axis=1) - word_prob = paddle.reshape(word_prob, [n_active_inst, n_bm, -1]) - return word_prob - - def collect_active_inst_idx_list(inst_beams, word_prob, - inst_idx_to_position_map): - active_inst_idx_list = [] - for inst_idx, inst_position in inst_idx_to_position_map.items(): - is_inst_complete = inst_beams[inst_idx].advance(word_prob[ - inst_position]) - if not is_inst_complete: - active_inst_idx_list += [inst_idx] - - return active_inst_idx_list - - n_active_inst = len(inst_idx_to_position_map) - dec_seq = prepare_beam_dec_seq(inst_dec_beams, len_dec_seq) - word_prob = predict_word(dec_seq, enc_output, n_active_inst, n_bm, - None) - # Update the beam with predicted word prob information and collect incomplete instances - active_inst_idx_list = collect_active_inst_idx_list( - inst_dec_beams, word_prob, inst_idx_to_position_map) - return active_inst_idx_list - - def collect_hypothesis_and_scores(inst_dec_beams, n_best): - all_hyp, all_scores = [], [] - for inst_idx in range(len(inst_dec_beams)): - scores, tail_idxs = inst_dec_beams[inst_idx].sort_scores() - all_scores += [scores[:n_best]] - hyps = [ - inst_dec_beams[inst_idx].get_hypothesis(i) - for i in tail_idxs[:n_best] - ] - all_hyp += [hyps] - return all_hyp, all_scores - - with paddle.no_grad(): - #-- Encode - if self.encoder is not None: - src = self.positional_encoding(images.transpose([1, 0, 2])) - src_enc = self.encoder(src) - else: - src_enc = images.squeeze(2).transpose([0, 2, 1]) - - n_bm = self.beam_size - src_shape = paddle.shape(src_enc) - inst_dec_beams = [Beam(n_bm) for _ in range(1)] - active_inst_idx_list = list(range(1)) - # Repeat data for beam search - src_enc = paddle.tile(src_enc, [1, n_bm, 1]) - inst_idx_to_position_map = get_inst_idx_to_tensor_position_map( - active_inst_idx_list) - # Decode - for len_dec_seq in range(1, 25): - src_enc_copy = src_enc.clone() - active_inst_idx_list = beam_decode_step( - inst_dec_beams, len_dec_seq, src_enc_copy, - inst_idx_to_position_map, n_bm, None) - if not active_inst_idx_list: - break # all instances have finished their path to - src_enc, inst_idx_to_position_map = collate_active_info( - src_enc_copy, inst_idx_to_position_map, - active_inst_idx_list) - batch_hyp, batch_scores = collect_hypothesis_and_scores(inst_dec_beams, - 1) - result_hyp = [] - hyp_scores = [] - for bs_hyp, score in zip(batch_hyp, batch_scores): - l = len(bs_hyp[0]) - bs_hyp_pad = bs_hyp[0] + [3] * (25 - l) - result_hyp.append(bs_hyp_pad) - score = float(score) / l - hyp_score = [score for _ in range(25)] - hyp_scores.append(hyp_score) - return [ - paddle.to_tensor( - np.array(result_hyp), dtype=paddle.int64), - paddle.to_tensor(hyp_scores) - ] - - def generate_square_subsequent_mask(self, sz): - """Generate a square mask for the sequence. The masked positions are filled with float('-inf'). - Unmasked positions are filled with float(0.0). - """ - mask = paddle.zeros([sz, sz], dtype='float32') - mask_inf = paddle.triu( - paddle.full( - shape=[sz, sz], dtype='float32', fill_value='-inf'), - diagonal=1) - mask = mask + mask_inf - return mask - - def generate_padding_mask(self, x): - padding_mask = paddle.equal(x, paddle.to_tensor(0, dtype=x.dtype)) - return padding_mask - - def _reset_parameters(self): - """Initiate parameters in the transformer model.""" - - for p in self.parameters(): - if p.dim() > 1: - xavier_uniform_(p) - - -class TransformerEncoder(nn.Layer): - """TransformerEncoder is a stack of N encoder layers - Args: - encoder_layer: an instance of the TransformerEncoderLayer() class (required). - num_layers: the number of sub-encoder-layers in the encoder (required). - norm: the layer normalization component (optional). - """ - - def __init__(self, encoder_layer, num_layers): - super(TransformerEncoder, self).__init__() - self.layers = _get_clones(encoder_layer, num_layers) - self.num_layers = num_layers - - def forward(self, src): - """Pass the input through the endocder layers in turn. - Args: - src: the sequnce to the encoder (required). - mask: the mask for the src sequence (optional). - src_key_padding_mask: the mask for the src keys per batch (optional). - """ - output = src - - for i in range(self.num_layers): - output = self.layers[i](output, - src_mask=None, - src_key_padding_mask=None) - - return output - - -class TransformerDecoder(nn.Layer): - """TransformerDecoder is a stack of N decoder layers - - Args: - decoder_layer: an instance of the TransformerDecoderLayer() class (required). - num_layers: the number of sub-decoder-layers in the decoder (required). - norm: the layer normalization component (optional). - - """ - - def __init__(self, decoder_layer, num_layers): - super(TransformerDecoder, self).__init__() - self.layers = _get_clones(decoder_layer, num_layers) - self.num_layers = num_layers - - def forward(self, - tgt, - memory, - tgt_mask=None, - memory_mask=None, - tgt_key_padding_mask=None, - memory_key_padding_mask=None): - """Pass the inputs (and mask) through the decoder layer in turn. - - Args: - tgt: the sequence to the decoder (required). - memory: the sequnce from the last layer of the encoder (required). - tgt_mask: the mask for the tgt sequence (optional). - memory_mask: the mask for the memory sequence (optional). - tgt_key_padding_mask: the mask for the tgt keys per batch (optional). - memory_key_padding_mask: the mask for the memory keys per batch (optional). - """ - output = tgt - for i in range(self.num_layers): - output = self.layers[i]( - output, - memory, - tgt_mask=tgt_mask, - memory_mask=memory_mask, - tgt_key_padding_mask=tgt_key_padding_mask, - memory_key_padding_mask=memory_key_padding_mask) - - return output - - -class TransformerEncoderLayer(nn.Layer): - """TransformerEncoderLayer is made up of self-attn and feedforward network. - This standard encoder layer is based on the paper "Attention Is All You Need". - Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, - Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in - Neural Information Processing Systems, pages 6000-6010. Users may modify or implement - in a different way during application. - - Args: - d_model: the number of expected features in the input (required). - nhead: the number of heads in the multiheadattention models (required). - dim_feedforward: the dimension of the feedforward network model (default=2048). - dropout: the dropout value (default=0.1). - - """ - - def __init__(self, - d_model, - nhead, - dim_feedforward=2048, - attention_dropout_rate=0.0, - residual_dropout_rate=0.1): - super(TransformerEncoderLayer, self).__init__() - self.self_attn = MultiheadAttention( - d_model, nhead, dropout=attention_dropout_rate) - - self.conv1 = Conv2D( - in_channels=d_model, - out_channels=dim_feedforward, - kernel_size=(1, 1)) - self.conv2 = Conv2D( - in_channels=dim_feedforward, - out_channels=d_model, - kernel_size=(1, 1)) - - self.norm1 = LayerNorm(d_model) - self.norm2 = LayerNorm(d_model) - self.dropout1 = Dropout(residual_dropout_rate) - self.dropout2 = Dropout(residual_dropout_rate) - - def forward(self, src, src_mask=None, src_key_padding_mask=None): - """Pass the input through the endocder layer. - Args: - src: the sequnce to the encoder layer (required). - src_mask: the mask for the src sequence (optional). - src_key_padding_mask: the mask for the src keys per batch (optional). - """ - src2 = self.self_attn( - src, - src, - src, - attn_mask=src_mask, - key_padding_mask=src_key_padding_mask) - src = src + self.dropout1(src2) - src = self.norm1(src) - - src = paddle.transpose(src, [1, 2, 0]) - src = paddle.unsqueeze(src, 2) - src2 = self.conv2(F.relu(self.conv1(src))) - src2 = paddle.squeeze(src2, 2) - src2 = paddle.transpose(src2, [2, 0, 1]) - src = paddle.squeeze(src, 2) - src = paddle.transpose(src, [2, 0, 1]) - - src = src + self.dropout2(src2) - src = self.norm2(src) - return src - - -class TransformerDecoderLayer(nn.Layer): - """TransformerDecoderLayer is made up of self-attn, multi-head-attn and feedforward network. - This standard decoder layer is based on the paper "Attention Is All You Need". - Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, - Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in - Neural Information Processing Systems, pages 6000-6010. Users may modify or implement - in a different way during application. - - Args: - d_model: the number of expected features in the input (required). - nhead: the number of heads in the multiheadattention models (required). - dim_feedforward: the dimension of the feedforward network model (default=2048). - dropout: the dropout value (default=0.1). - - """ - - def __init__(self, - d_model, - nhead, - dim_feedforward=2048, - attention_dropout_rate=0.0, - residual_dropout_rate=0.1): - super(TransformerDecoderLayer, self).__init__() - self.self_attn = MultiheadAttention( - d_model, nhead, dropout=attention_dropout_rate) - self.multihead_attn = MultiheadAttention( - d_model, nhead, dropout=attention_dropout_rate) - - self.conv1 = Conv2D( - in_channels=d_model, - out_channels=dim_feedforward, - kernel_size=(1, 1)) - self.conv2 = Conv2D( - in_channels=dim_feedforward, - out_channels=d_model, - kernel_size=(1, 1)) - - self.norm1 = LayerNorm(d_model) - self.norm2 = LayerNorm(d_model) - self.norm3 = LayerNorm(d_model) - self.dropout1 = Dropout(residual_dropout_rate) - self.dropout2 = Dropout(residual_dropout_rate) - self.dropout3 = Dropout(residual_dropout_rate) - - def forward(self, - tgt, - memory, - tgt_mask=None, - memory_mask=None, - tgt_key_padding_mask=None, - memory_key_padding_mask=None): - """Pass the inputs (and mask) through the decoder layer. - - Args: - tgt: the sequence to the decoder layer (required). - memory: the sequnce from the last layer of the encoder (required). - tgt_mask: the mask for the tgt sequence (optional). - memory_mask: the mask for the memory sequence (optional). - tgt_key_padding_mask: the mask for the tgt keys per batch (optional). - memory_key_padding_mask: the mask for the memory keys per batch (optional). - - """ - tgt2 = self.self_attn( - tgt, - tgt, - tgt, - attn_mask=tgt_mask, - key_padding_mask=tgt_key_padding_mask) - tgt = tgt + self.dropout1(tgt2) - tgt = self.norm1(tgt) - tgt2 = self.multihead_attn( - tgt, - memory, - memory, - attn_mask=memory_mask, - key_padding_mask=memory_key_padding_mask) - tgt = tgt + self.dropout2(tgt2) - tgt = self.norm2(tgt) - - # default - tgt = paddle.transpose(tgt, [1, 2, 0]) - tgt = paddle.unsqueeze(tgt, 2) - tgt2 = self.conv2(F.relu(self.conv1(tgt))) - tgt2 = paddle.squeeze(tgt2, 2) - tgt2 = paddle.transpose(tgt2, [2, 0, 1]) - tgt = paddle.squeeze(tgt, 2) - tgt = paddle.transpose(tgt, [2, 0, 1]) - - tgt = tgt + self.dropout3(tgt2) - tgt = self.norm3(tgt) - return tgt - - -def _get_clones(module, N): - return LayerList([copy.deepcopy(module) for i in range(N)]) - - -class PositionalEncoding(nn.Layer): - """Inject some information about the relative or absolute position of the tokens - in the sequence. The positional encodings have the same dimension as - the embeddings, so that the two can be summed. Here, we use sine and cosine - functions of different frequencies. - .. math:: - \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model)) - \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model)) - \text{where pos is the word position and i is the embed idx) - Args: - d_model: the embed dim (required). - dropout: the dropout value (default=0.1). - max_len: the max. length of the incoming sequence (default=5000). - Examples: - >>> pos_encoder = PositionalEncoding(d_model) - """ - - def __init__(self, dropout, dim, max_len=5000): - super(PositionalEncoding, self).__init__() - self.dropout = nn.Dropout(p=dropout) - - pe = paddle.zeros([max_len, dim]) - position = paddle.arange(0, max_len, dtype=paddle.float32).unsqueeze(1) - div_term = paddle.exp( - paddle.arange(0, dim, 2).astype('float32') * - (-math.log(10000.0) / dim)) - pe[:, 0::2] = paddle.sin(position * div_term) - pe[:, 1::2] = paddle.cos(position * div_term) - pe = paddle.unsqueeze(pe, 0) - pe = paddle.transpose(pe, [1, 0, 2]) - self.register_buffer('pe', pe) - - def forward(self, x): - """Inputs of forward function - Args: - x: the sequence fed to the positional encoder model (required). - Shape: - x: [sequence length, batch size, embed dim] - output: [sequence length, batch size, embed dim] - Examples: - >>> output = pos_encoder(x) - """ - x = x + self.pe[:paddle.shape(x)[0], :] - return self.dropout(x) - - -class PositionalEncoding_2d(nn.Layer): - """Inject some information about the relative or absolute position of the tokens - in the sequence. The positional encodings have the same dimension as - the embeddings, so that the two can be summed. Here, we use sine and cosine - functions of different frequencies. - .. math:: - \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model)) - \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model)) - \text{where pos is the word position and i is the embed idx) - Args: - d_model: the embed dim (required). - dropout: the dropout value (default=0.1). - max_len: the max. length of the incoming sequence (default=5000). - Examples: - >>> pos_encoder = PositionalEncoding(d_model) - """ - - def __init__(self, dropout, dim, max_len=5000): - super(PositionalEncoding_2d, self).__init__() - self.dropout = nn.Dropout(p=dropout) - - pe = paddle.zeros([max_len, dim]) - position = paddle.arange(0, max_len, dtype=paddle.float32).unsqueeze(1) - div_term = paddle.exp( - paddle.arange(0, dim, 2).astype('float32') * - (-math.log(10000.0) / dim)) - pe[:, 0::2] = paddle.sin(position * div_term) - pe[:, 1::2] = paddle.cos(position * div_term) - pe = paddle.transpose(paddle.unsqueeze(pe, 0), [1, 0, 2]) - self.register_buffer('pe', pe) - - self.avg_pool_1 = nn.AdaptiveAvgPool2D((1, 1)) - self.linear1 = nn.Linear(dim, dim) - self.linear1.weight.data.fill_(1.) - self.avg_pool_2 = nn.AdaptiveAvgPool2D((1, 1)) - self.linear2 = nn.Linear(dim, dim) - self.linear2.weight.data.fill_(1.) - - def forward(self, x): - """Inputs of forward function - Args: - x: the sequence fed to the positional encoder model (required). - Shape: - x: [sequence length, batch size, embed dim] - output: [sequence length, batch size, embed dim] - Examples: - >>> output = pos_encoder(x) - """ - w_pe = self.pe[:paddle.shape(x)[-1], :] - w1 = self.linear1(self.avg_pool_1(x).squeeze()).unsqueeze(0) - w_pe = w_pe * w1 - w_pe = paddle.transpose(w_pe, [1, 2, 0]) - w_pe = paddle.unsqueeze(w_pe, 2) - - h_pe = self.pe[:paddle.shape(x).shape[-2], :] - w2 = self.linear2(self.avg_pool_2(x).squeeze()).unsqueeze(0) - h_pe = h_pe * w2 - h_pe = paddle.transpose(h_pe, [1, 2, 0]) - h_pe = paddle.unsqueeze(h_pe, 3) - - x = x + w_pe + h_pe - x = paddle.transpose( - paddle.reshape(x, - [x.shape[0], x.shape[1], x.shape[2] * x.shape[3]]), - [2, 0, 1]) - - return self.dropout(x) - - -class Embeddings(nn.Layer): - def __init__(self, d_model, vocab, padding_idx, scale_embedding): - super(Embeddings, self).__init__() - self.embedding = nn.Embedding(vocab, d_model, padding_idx=padding_idx) - w0 = np.random.normal(0.0, d_model**-0.5, - (vocab, d_model)).astype(np.float32) - self.embedding.weight.set_value(w0) - self.d_model = d_model - self.scale_embedding = scale_embedding - - def forward(self, x): - if self.scale_embedding: - x = self.embedding(x) - return x * math.sqrt(self.d_model) - return self.embedding(x) - - -class Beam(): - ''' Beam search ''' - - def __init__(self, size, device=False): - - self.size = size - self._done = False - # The score for each translation on the beam. - self.scores = paddle.zeros((size, ), dtype=paddle.float32) - self.all_scores = [] - # The backpointers at each time-step. - self.prev_ks = [] - # The outputs at each time-step. - self.next_ys = [paddle.full((size, ), 0, dtype=paddle.int64)] - self.next_ys[0][0] = 2 - - def get_current_state(self): - "Get the outputs for the current timestep." - return self.get_tentative_hypothesis() - - def get_current_origin(self): - "Get the backpointers for the current timestep." - return self.prev_ks[-1] - - @property - def done(self): - return self._done - - def advance(self, word_prob): - "Update beam status and check if finished or not." - num_words = word_prob.shape[1] - - # Sum the previous scores. - if len(self.prev_ks) > 0: - beam_lk = word_prob + self.scores.unsqueeze(1).expand_as(word_prob) - else: - beam_lk = word_prob[0] - - flat_beam_lk = beam_lk.reshape([-1]) - best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, - True) # 1st sort - self.all_scores.append(self.scores) - self.scores = best_scores - # bestScoresId is flattened as a (beam x word) array, - # so we need to calculate which word and beam each score came from - prev_k = best_scores_id // num_words - self.prev_ks.append(prev_k) - self.next_ys.append(best_scores_id - prev_k * num_words) - # End condition is when top-of-beam is EOS. - if self.next_ys[-1][0] == 3: - self._done = True - self.all_scores.append(self.scores) - - return self._done - - def sort_scores(self): - "Sort the scores." - return self.scores, paddle.to_tensor( - [i for i in range(int(self.scores.shape[0]))], dtype='int32') - - def get_the_best_score_and_idx(self): - "Get the score of the best in the beam." - scores, ids = self.sort_scores() - return scores[1], ids[1] - - def get_tentative_hypothesis(self): - "Get the decoded sequence for the current timestep." - if len(self.next_ys) == 1: - dec_seq = self.next_ys[0].unsqueeze(1) - else: - _, keys = self.sort_scores() - hyps = [self.get_hypothesis(k) for k in keys] - hyps = [[2] + h for h in hyps] - dec_seq = paddle.to_tensor(hyps, dtype='int64') - return dec_seq - - def get_hypothesis(self, k): - """ Walk back to construct the full hypothesis. """ - hyp = [] - for j in range(len(self.prev_ks) - 1, -1, -1): - hyp.append(self.next_ys[j + 1][k]) - k = self.prev_ks[j][k] - return list(map(lambda x: x.item(), hyp[::-1])) diff --git a/backend/ppocr/modeling/heads/rec_pren_head.py b/backend/ppocr/modeling/heads/rec_pren_head.py deleted file mode 100644 index c9e4b3e9..00000000 --- a/backend/ppocr/modeling/heads/rec_pren_head.py +++ /dev/null @@ -1,34 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn -from paddle.nn import functional as F - - -class PRENHead(nn.Layer): - def __init__(self, in_channels, out_channels, **kwargs): - super(PRENHead, self).__init__() - self.linear = nn.Linear(in_channels, out_channels) - - def forward(self, x, targets=None): - predicts = self.linear(x) - - if not self.training: - predicts = F.softmax(predicts, axis=2) - - return predicts diff --git a/backend/ppocr/modeling/heads/rec_sar_head.py b/backend/ppocr/modeling/heads/rec_sar_head.py deleted file mode 100644 index 0e6b3440..00000000 --- a/backend/ppocr/modeling/heads/rec_sar_head.py +++ /dev/null @@ -1,410 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/encoders/sar_encoder.py -https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/decoders/sar_decoder.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F - - -class SAREncoder(nn.Layer): - """ - Args: - enc_bi_rnn (bool): If True, use bidirectional RNN in encoder. - enc_drop_rnn (float): Dropout probability of RNN layer in encoder. - enc_gru (bool): If True, use GRU, else LSTM in encoder. - d_model (int): Dim of channels from backbone. - d_enc (int): Dim of encoder RNN layer. - mask (bool): If True, mask padding in RNN sequence. - """ - - def __init__(self, - enc_bi_rnn=False, - enc_drop_rnn=0.1, - enc_gru=False, - d_model=512, - d_enc=512, - mask=True, - **kwargs): - super().__init__() - assert isinstance(enc_bi_rnn, bool) - assert isinstance(enc_drop_rnn, (int, float)) - assert 0 <= enc_drop_rnn < 1.0 - assert isinstance(enc_gru, bool) - assert isinstance(d_model, int) - assert isinstance(d_enc, int) - assert isinstance(mask, bool) - - self.enc_bi_rnn = enc_bi_rnn - self.enc_drop_rnn = enc_drop_rnn - self.mask = mask - - # LSTM Encoder - if enc_bi_rnn: - direction = 'bidirectional' - else: - direction = 'forward' - kwargs = dict( - input_size=d_model, - hidden_size=d_enc, - num_layers=2, - time_major=False, - dropout=enc_drop_rnn, - direction=direction) - if enc_gru: - self.rnn_encoder = nn.GRU(**kwargs) - else: - self.rnn_encoder = nn.LSTM(**kwargs) - - # global feature transformation - encoder_rnn_out_size = d_enc * (int(enc_bi_rnn) + 1) - self.linear = nn.Linear(encoder_rnn_out_size, encoder_rnn_out_size) - - def forward(self, feat, img_metas=None): - if img_metas is not None: - assert len(img_metas[0]) == feat.shape[0] - - valid_ratios = None - if img_metas is not None and self.mask: - valid_ratios = img_metas[-1] - - h_feat = feat.shape[2] # bsz c h w - feat_v = F.max_pool2d( - feat, kernel_size=(h_feat, 1), stride=1, padding=0) - feat_v = feat_v.squeeze(2) # bsz * C * W - feat_v = paddle.transpose(feat_v, perm=[0, 2, 1]) # bsz * W * C - holistic_feat = self.rnn_encoder(feat_v)[0] # bsz * T * C - - if valid_ratios is not None: - valid_hf = [] - T = holistic_feat.shape[1] - for i in range(len(valid_ratios)): - valid_step = min(T, math.ceil(T * valid_ratios[i])) - 1 - valid_hf.append(holistic_feat[i, valid_step, :]) - valid_hf = paddle.stack(valid_hf, axis=0) - else: - valid_hf = holistic_feat[:, -1, :] # bsz * C - holistic_feat = self.linear(valid_hf) # bsz * C - - return holistic_feat - - -class BaseDecoder(nn.Layer): - def __init__(self, **kwargs): - super().__init__() - - def forward_train(self, feat, out_enc, targets, img_metas): - raise NotImplementedError - - def forward_test(self, feat, out_enc, img_metas): - raise NotImplementedError - - def forward(self, - feat, - out_enc, - label=None, - img_metas=None, - train_mode=True): - self.train_mode = train_mode - - if train_mode: - return self.forward_train(feat, out_enc, label, img_metas) - return self.forward_test(feat, out_enc, img_metas) - - -class ParallelSARDecoder(BaseDecoder): - """ - Args: - out_channels (int): Output class number. - enc_bi_rnn (bool): If True, use bidirectional RNN in encoder. - dec_bi_rnn (bool): If True, use bidirectional RNN in decoder. - dec_drop_rnn (float): Dropout of RNN layer in decoder. - dec_gru (bool): If True, use GRU, else LSTM in decoder. - d_model (int): Dim of channels from backbone. - d_enc (int): Dim of encoder RNN layer. - d_k (int): Dim of channels of attention module. - pred_dropout (float): Dropout probability of prediction layer. - max_seq_len (int): Maximum sequence length for decoding. - mask (bool): If True, mask padding in feature map. - start_idx (int): Index of start token. - padding_idx (int): Index of padding token. - pred_concat (bool): If True, concat glimpse feature from - attention with holistic feature and hidden state. - """ - - def __init__( - self, - out_channels, # 90 + unknown + start + padding - enc_bi_rnn=False, - dec_bi_rnn=False, - dec_drop_rnn=0.0, - dec_gru=False, - d_model=512, - d_enc=512, - d_k=64, - pred_dropout=0.1, - max_text_length=30, - mask=True, - pred_concat=True, - **kwargs): - super().__init__() - - self.num_classes = out_channels - self.enc_bi_rnn = enc_bi_rnn - self.d_k = d_k - self.start_idx = out_channels - 2 - self.padding_idx = out_channels - 1 - self.max_seq_len = max_text_length - self.mask = mask - self.pred_concat = pred_concat - - encoder_rnn_out_size = d_enc * (int(enc_bi_rnn) + 1) - decoder_rnn_out_size = encoder_rnn_out_size * (int(dec_bi_rnn) + 1) - - # 2D attention layer - self.conv1x1_1 = nn.Linear(decoder_rnn_out_size, d_k) - self.conv3x3_1 = nn.Conv2D( - d_model, d_k, kernel_size=3, stride=1, padding=1) - self.conv1x1_2 = nn.Linear(d_k, 1) - - # Decoder RNN layer - if dec_bi_rnn: - direction = 'bidirectional' - else: - direction = 'forward' - - kwargs = dict( - input_size=encoder_rnn_out_size, - hidden_size=encoder_rnn_out_size, - num_layers=2, - time_major=False, - dropout=dec_drop_rnn, - direction=direction) - if dec_gru: - self.rnn_decoder = nn.GRU(**kwargs) - else: - self.rnn_decoder = nn.LSTM(**kwargs) - - # Decoder input embedding - self.embedding = nn.Embedding( - self.num_classes, - encoder_rnn_out_size, - padding_idx=self.padding_idx) - - # Prediction layer - self.pred_dropout = nn.Dropout(pred_dropout) - pred_num_classes = self.num_classes - 1 - if pred_concat: - fc_in_channel = decoder_rnn_out_size + d_model + encoder_rnn_out_size - else: - fc_in_channel = d_model - self.prediction = nn.Linear(fc_in_channel, pred_num_classes) - - def _2d_attention(self, - decoder_input, - feat, - holistic_feat, - valid_ratios=None): - - y = self.rnn_decoder(decoder_input)[0] - # y: bsz * (seq_len + 1) * hidden_size - - attn_query = self.conv1x1_1(y) # bsz * (seq_len + 1) * attn_size - bsz, seq_len, attn_size = attn_query.shape - attn_query = paddle.unsqueeze(attn_query, axis=[3, 4]) - # (bsz, seq_len + 1, attn_size, 1, 1) - - attn_key = self.conv3x3_1(feat) - # bsz * attn_size * h * w - attn_key = attn_key.unsqueeze(1) - # bsz * 1 * attn_size * h * w - - attn_weight = paddle.tanh(paddle.add(attn_key, attn_query)) - - # bsz * (seq_len + 1) * attn_size * h * w - attn_weight = paddle.transpose(attn_weight, perm=[0, 1, 3, 4, 2]) - # bsz * (seq_len + 1) * h * w * attn_size - attn_weight = self.conv1x1_2(attn_weight) - # bsz * (seq_len + 1) * h * w * 1 - bsz, T, h, w, c = attn_weight.shape - assert c == 1 - - if valid_ratios is not None: - # cal mask of attention weight - for i in range(len(valid_ratios)): - valid_width = min(w, math.ceil(w * valid_ratios[i])) - if valid_width < w: - attn_weight[i, :, :, valid_width:, :] = float('-inf') - - attn_weight = paddle.reshape(attn_weight, [bsz, T, -1]) - attn_weight = F.softmax(attn_weight, axis=-1) - - attn_weight = paddle.reshape(attn_weight, [bsz, T, h, w, c]) - attn_weight = paddle.transpose(attn_weight, perm=[0, 1, 4, 2, 3]) - # attn_weight: bsz * T * c * h * w - # feat: bsz * c * h * w - attn_feat = paddle.sum(paddle.multiply(feat.unsqueeze(1), attn_weight), - (3, 4), - keepdim=False) - # bsz * (seq_len + 1) * C - - # Linear transformation - if self.pred_concat: - hf_c = holistic_feat.shape[-1] - holistic_feat = paddle.expand( - holistic_feat, shape=[bsz, seq_len, hf_c]) - y = self.prediction(paddle.concat((y, attn_feat, holistic_feat), 2)) - else: - y = self.prediction(attn_feat) - # bsz * (seq_len + 1) * num_classes - if self.train_mode: - y = self.pred_dropout(y) - - return y - - def forward_train(self, feat, out_enc, label, img_metas): - ''' - img_metas: [label, valid_ratio] - ''' - if img_metas is not None: - assert len(img_metas[0]) == feat.shape[0] - - valid_ratios = None - if img_metas is not None and self.mask: - valid_ratios = img_metas[-1] - - lab_embedding = self.embedding(label) - # bsz * seq_len * emb_dim - out_enc = out_enc.unsqueeze(1) - # bsz * 1 * emb_dim - in_dec = paddle.concat((out_enc, lab_embedding), axis=1) - # bsz * (seq_len + 1) * C - out_dec = self._2d_attention( - in_dec, feat, out_enc, valid_ratios=valid_ratios) - # bsz * (seq_len + 1) * num_classes - - return out_dec[:, 1:, :] # bsz * seq_len * num_classes - - def forward_test(self, feat, out_enc, img_metas): - if img_metas is not None: - assert len(img_metas[0]) == feat.shape[0] - - valid_ratios = None - if img_metas is not None and self.mask: - valid_ratios = img_metas[-1] - - seq_len = self.max_seq_len - bsz = feat.shape[0] - start_token = paddle.full( - (bsz, ), fill_value=self.start_idx, dtype='int64') - # bsz - start_token = self.embedding(start_token) - # bsz * emb_dim - emb_dim = start_token.shape[1] - start_token = start_token.unsqueeze(1) - start_token = paddle.expand(start_token, shape=[bsz, seq_len, emb_dim]) - # bsz * seq_len * emb_dim - out_enc = out_enc.unsqueeze(1) - # bsz * 1 * emb_dim - decoder_input = paddle.concat((out_enc, start_token), axis=1) - # bsz * (seq_len + 1) * emb_dim - - outputs = [] - for i in range(1, seq_len + 1): - decoder_output = self._2d_attention( - decoder_input, feat, out_enc, valid_ratios=valid_ratios) - char_output = decoder_output[:, i, :] # bsz * num_classes - char_output = F.softmax(char_output, -1) - outputs.append(char_output) - max_idx = paddle.argmax(char_output, axis=1, keepdim=False) - char_embedding = self.embedding(max_idx) # bsz * emb_dim - if i < seq_len: - decoder_input[:, i + 1, :] = char_embedding - - outputs = paddle.stack(outputs, 1) # bsz * seq_len * num_classes - - return outputs - - -class SARHead(nn.Layer): - def __init__(self, - in_channels, - out_channels, - enc_dim=512, - max_text_length=30, - enc_bi_rnn=False, - enc_drop_rnn=0.1, - enc_gru=False, - dec_bi_rnn=False, - dec_drop_rnn=0.0, - dec_gru=False, - d_k=512, - pred_dropout=0.1, - pred_concat=True, - **kwargs): - super(SARHead, self).__init__() - - # encoder module - self.encoder = SAREncoder( - enc_bi_rnn=enc_bi_rnn, - enc_drop_rnn=enc_drop_rnn, - enc_gru=enc_gru, - d_model=in_channels, - d_enc=enc_dim) - - # decoder module - self.decoder = ParallelSARDecoder( - out_channels=out_channels, - enc_bi_rnn=enc_bi_rnn, - dec_bi_rnn=dec_bi_rnn, - dec_drop_rnn=dec_drop_rnn, - dec_gru=dec_gru, - d_model=in_channels, - d_enc=enc_dim, - d_k=d_k, - pred_dropout=pred_dropout, - max_text_length=max_text_length, - pred_concat=pred_concat) - - def forward(self, feat, targets=None): - ''' - img_metas: [label, valid_ratio] - ''' - holistic_feat = self.encoder(feat, targets) # bsz c - - if self.training: - label = targets[0] # label - label = paddle.to_tensor(label, dtype='int64') - final_out = self.decoder( - feat, holistic_feat, label, img_metas=targets) - else: - final_out = self.decoder( - feat, - holistic_feat, - label=None, - img_metas=targets, - train_mode=False) - # (bsz, seq_len, num_classes) - - return final_out diff --git a/backend/ppocr/modeling/heads/rec_srn_head.py b/backend/ppocr/modeling/heads/rec_srn_head.py deleted file mode 100644 index 8d59e471..00000000 --- a/backend/ppocr/modeling/heads/rec_srn_head.py +++ /dev/null @@ -1,280 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn, ParamAttr -from paddle.nn import functional as F -import paddle.fluid as fluid -import numpy as np -from .self_attention import WrapEncoderForFeature -from .self_attention import WrapEncoder -from paddle.static import Program -from ppocr.modeling.backbones.rec_resnet_fpn import ResNetFPN -import paddle.fluid.framework as framework - -from collections import OrderedDict -gradient_clip = 10 - - -class PVAM(nn.Layer): - def __init__(self, in_channels, char_num, max_text_length, num_heads, - num_encoder_tus, hidden_dims): - super(PVAM, self).__init__() - self.char_num = char_num - self.max_length = max_text_length - self.num_heads = num_heads - self.num_encoder_TUs = num_encoder_tus - self.hidden_dims = hidden_dims - # Transformer encoder - t = 256 - c = 512 - self.wrap_encoder_for_feature = WrapEncoderForFeature( - src_vocab_size=1, - max_length=t, - n_layer=self.num_encoder_TUs, - n_head=self.num_heads, - d_key=int(self.hidden_dims / self.num_heads), - d_value=int(self.hidden_dims / self.num_heads), - d_model=self.hidden_dims, - d_inner_hid=self.hidden_dims, - prepostprocess_dropout=0.1, - attention_dropout=0.1, - relu_dropout=0.1, - preprocess_cmd="n", - postprocess_cmd="da", - weight_sharing=True) - - # PVAM - self.flatten0 = paddle.nn.Flatten(start_axis=0, stop_axis=1) - self.fc0 = paddle.nn.Linear( - in_features=in_channels, - out_features=in_channels, ) - self.emb = paddle.nn.Embedding( - num_embeddings=self.max_length, embedding_dim=in_channels) - self.flatten1 = paddle.nn.Flatten(start_axis=0, stop_axis=2) - self.fc1 = paddle.nn.Linear( - in_features=in_channels, out_features=1, bias_attr=False) - - def forward(self, inputs, encoder_word_pos, gsrm_word_pos): - b, c, h, w = inputs.shape - conv_features = paddle.reshape(inputs, shape=[-1, c, h * w]) - conv_features = paddle.transpose(conv_features, perm=[0, 2, 1]) - # transformer encoder - b, t, c = conv_features.shape - - enc_inputs = [conv_features, encoder_word_pos, None] - word_features = self.wrap_encoder_for_feature(enc_inputs) - - # pvam - b, t, c = word_features.shape - word_features = self.fc0(word_features) - word_features_ = paddle.reshape(word_features, [-1, 1, t, c]) - word_features_ = paddle.tile(word_features_, [1, self.max_length, 1, 1]) - word_pos_feature = self.emb(gsrm_word_pos) - word_pos_feature_ = paddle.reshape(word_pos_feature, - [-1, self.max_length, 1, c]) - word_pos_feature_ = paddle.tile(word_pos_feature_, [1, 1, t, 1]) - y = word_pos_feature_ + word_features_ - y = F.tanh(y) - attention_weight = self.fc1(y) - attention_weight = paddle.reshape( - attention_weight, shape=[-1, self.max_length, t]) - attention_weight = F.softmax(attention_weight, axis=-1) - pvam_features = paddle.matmul(attention_weight, - word_features) #[b, max_length, c] - return pvam_features - - -class GSRM(nn.Layer): - def __init__(self, in_channels, char_num, max_text_length, num_heads, - num_encoder_tus, num_decoder_tus, hidden_dims): - super(GSRM, self).__init__() - self.char_num = char_num - self.max_length = max_text_length - self.num_heads = num_heads - self.num_encoder_TUs = num_encoder_tus - self.num_decoder_TUs = num_decoder_tus - self.hidden_dims = hidden_dims - - self.fc0 = paddle.nn.Linear( - in_features=in_channels, out_features=self.char_num) - self.wrap_encoder0 = WrapEncoder( - src_vocab_size=self.char_num + 1, - max_length=self.max_length, - n_layer=self.num_decoder_TUs, - n_head=self.num_heads, - d_key=int(self.hidden_dims / self.num_heads), - d_value=int(self.hidden_dims / self.num_heads), - d_model=self.hidden_dims, - d_inner_hid=self.hidden_dims, - prepostprocess_dropout=0.1, - attention_dropout=0.1, - relu_dropout=0.1, - preprocess_cmd="n", - postprocess_cmd="da", - weight_sharing=True) - - self.wrap_encoder1 = WrapEncoder( - src_vocab_size=self.char_num + 1, - max_length=self.max_length, - n_layer=self.num_decoder_TUs, - n_head=self.num_heads, - d_key=int(self.hidden_dims / self.num_heads), - d_value=int(self.hidden_dims / self.num_heads), - d_model=self.hidden_dims, - d_inner_hid=self.hidden_dims, - prepostprocess_dropout=0.1, - attention_dropout=0.1, - relu_dropout=0.1, - preprocess_cmd="n", - postprocess_cmd="da", - weight_sharing=True) - - self.mul = lambda x: paddle.matmul(x=x, - y=self.wrap_encoder0.prepare_decoder.emb0.weight, - transpose_y=True) - - def forward(self, inputs, gsrm_word_pos, gsrm_slf_attn_bias1, - gsrm_slf_attn_bias2): - # ===== GSRM Visual-to-semantic embedding block ===== - b, t, c = inputs.shape - pvam_features = paddle.reshape(inputs, [-1, c]) - word_out = self.fc0(pvam_features) - word_ids = paddle.argmax(F.softmax(word_out), axis=1) - word_ids = paddle.reshape(x=word_ids, shape=[-1, t, 1]) - - #===== GSRM Semantic reasoning block ===== - """ - This module is achieved through bi-transformers, - ngram_feature1 is the froward one, ngram_fetaure2 is the backward one - """ - pad_idx = self.char_num - - word1 = paddle.cast(word_ids, "float32") - word1 = F.pad(word1, [1, 0], value=1.0 * pad_idx, data_format="NLC") - word1 = paddle.cast(word1, "int64") - word1 = word1[:, :-1, :] - word2 = word_ids - - enc_inputs_1 = [word1, gsrm_word_pos, gsrm_slf_attn_bias1] - enc_inputs_2 = [word2, gsrm_word_pos, gsrm_slf_attn_bias2] - - gsrm_feature1 = self.wrap_encoder0(enc_inputs_1) - gsrm_feature2 = self.wrap_encoder1(enc_inputs_2) - - gsrm_feature2 = F.pad(gsrm_feature2, [0, 1], - value=0., - data_format="NLC") - gsrm_feature2 = gsrm_feature2[:, 1:, ] - gsrm_features = gsrm_feature1 + gsrm_feature2 - - gsrm_out = self.mul(gsrm_features) - - b, t, c = gsrm_out.shape - gsrm_out = paddle.reshape(gsrm_out, [-1, c]) - - return gsrm_features, word_out, gsrm_out - - -class VSFD(nn.Layer): - def __init__(self, in_channels=512, pvam_ch=512, char_num=38): - super(VSFD, self).__init__() - self.char_num = char_num - self.fc0 = paddle.nn.Linear( - in_features=in_channels * 2, out_features=pvam_ch) - self.fc1 = paddle.nn.Linear( - in_features=pvam_ch, out_features=self.char_num) - - def forward(self, pvam_feature, gsrm_feature): - b, t, c1 = pvam_feature.shape - b, t, c2 = gsrm_feature.shape - combine_feature_ = paddle.concat([pvam_feature, gsrm_feature], axis=2) - img_comb_feature_ = paddle.reshape( - combine_feature_, shape=[-1, c1 + c2]) - img_comb_feature_map = self.fc0(img_comb_feature_) - img_comb_feature_map = F.sigmoid(img_comb_feature_map) - img_comb_feature_map = paddle.reshape( - img_comb_feature_map, shape=[-1, t, c1]) - combine_feature = img_comb_feature_map * pvam_feature + ( - 1.0 - img_comb_feature_map) * gsrm_feature - img_comb_feature = paddle.reshape(combine_feature, shape=[-1, c1]) - - out = self.fc1(img_comb_feature) - return out - - -class SRNHead(nn.Layer): - def __init__(self, in_channels, out_channels, max_text_length, num_heads, - num_encoder_TUs, num_decoder_TUs, hidden_dims, **kwargs): - super(SRNHead, self).__init__() - self.char_num = out_channels - self.max_length = max_text_length - self.num_heads = num_heads - self.num_encoder_TUs = num_encoder_TUs - self.num_decoder_TUs = num_decoder_TUs - self.hidden_dims = hidden_dims - - self.pvam = PVAM( - in_channels=in_channels, - char_num=self.char_num, - max_text_length=self.max_length, - num_heads=self.num_heads, - num_encoder_tus=self.num_encoder_TUs, - hidden_dims=self.hidden_dims) - - self.gsrm = GSRM( - in_channels=in_channels, - char_num=self.char_num, - max_text_length=self.max_length, - num_heads=self.num_heads, - num_encoder_tus=self.num_encoder_TUs, - num_decoder_tus=self.num_decoder_TUs, - hidden_dims=self.hidden_dims) - self.vsfd = VSFD(in_channels=in_channels, char_num=self.char_num) - - self.gsrm.wrap_encoder1.prepare_decoder.emb0 = self.gsrm.wrap_encoder0.prepare_decoder.emb0 - - def forward(self, inputs, targets=None): - others = targets[-4:] - encoder_word_pos = others[0] - gsrm_word_pos = others[1] - gsrm_slf_attn_bias1 = others[2] - gsrm_slf_attn_bias2 = others[3] - - pvam_feature = self.pvam(inputs, encoder_word_pos, gsrm_word_pos) - - gsrm_feature, word_out, gsrm_out = self.gsrm( - pvam_feature, gsrm_word_pos, gsrm_slf_attn_bias1, - gsrm_slf_attn_bias2) - - final_out = self.vsfd(pvam_feature, gsrm_feature) - if not self.training: - final_out = F.softmax(final_out, axis=1) - - _, decoded_out = paddle.topk(final_out, k=1) - - predicts = OrderedDict([ - ('predict', final_out), - ('pvam_feature', pvam_feature), - ('decoded_out', decoded_out), - ('word_out', word_out), - ('gsrm_out', gsrm_out), - ]) - - return predicts diff --git a/backend/ppocr/modeling/heads/self_attention.py b/backend/ppocr/modeling/heads/self_attention.py deleted file mode 100644 index 6c27fdbe..00000000 --- a/backend/ppocr/modeling/heads/self_attention.py +++ /dev/null @@ -1,406 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle -from paddle import ParamAttr, nn -from paddle import nn, ParamAttr -from paddle.nn import functional as F -import paddle.fluid as fluid -import numpy as np -gradient_clip = 10 - - -class WrapEncoderForFeature(nn.Layer): - def __init__(self, - src_vocab_size, - max_length, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd, - postprocess_cmd, - weight_sharing, - bos_idx=0): - super(WrapEncoderForFeature, self).__init__() - - self.prepare_encoder = PrepareEncoder( - src_vocab_size, - d_model, - max_length, - prepostprocess_dropout, - bos_idx=bos_idx, - word_emb_param_name="src_word_emb_table") - self.encoder = Encoder(n_layer, n_head, d_key, d_value, d_model, - d_inner_hid, prepostprocess_dropout, - attention_dropout, relu_dropout, preprocess_cmd, - postprocess_cmd) - - def forward(self, enc_inputs): - conv_features, src_pos, src_slf_attn_bias = enc_inputs - enc_input = self.prepare_encoder(conv_features, src_pos) - enc_output = self.encoder(enc_input, src_slf_attn_bias) - return enc_output - - -class WrapEncoder(nn.Layer): - """ - embedder + encoder - """ - - def __init__(self, - src_vocab_size, - max_length, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd, - postprocess_cmd, - weight_sharing, - bos_idx=0): - super(WrapEncoder, self).__init__() - - self.prepare_decoder = PrepareDecoder( - src_vocab_size, - d_model, - max_length, - prepostprocess_dropout, - bos_idx=bos_idx) - self.encoder = Encoder(n_layer, n_head, d_key, d_value, d_model, - d_inner_hid, prepostprocess_dropout, - attention_dropout, relu_dropout, preprocess_cmd, - postprocess_cmd) - - def forward(self, enc_inputs): - src_word, src_pos, src_slf_attn_bias = enc_inputs - enc_input = self.prepare_decoder(src_word, src_pos) - enc_output = self.encoder(enc_input, src_slf_attn_bias) - return enc_output - - -class Encoder(nn.Layer): - """ - encoder - """ - - def __init__(self, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd="n", - postprocess_cmd="da"): - - super(Encoder, self).__init__() - - self.encoder_layers = list() - for i in range(n_layer): - self.encoder_layers.append( - self.add_sublayer( - "layer_%d" % i, - EncoderLayer(n_head, d_key, d_value, d_model, d_inner_hid, - prepostprocess_dropout, attention_dropout, - relu_dropout, preprocess_cmd, - postprocess_cmd))) - self.processer = PrePostProcessLayer(preprocess_cmd, d_model, - prepostprocess_dropout) - - def forward(self, enc_input, attn_bias): - for encoder_layer in self.encoder_layers: - enc_output = encoder_layer(enc_input, attn_bias) - enc_input = enc_output - enc_output = self.processer(enc_output) - return enc_output - - -class EncoderLayer(nn.Layer): - """ - EncoderLayer - """ - - def __init__(self, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd="n", - postprocess_cmd="da"): - - super(EncoderLayer, self).__init__() - self.preprocesser1 = PrePostProcessLayer(preprocess_cmd, d_model, - prepostprocess_dropout) - self.self_attn = MultiHeadAttention(d_key, d_value, d_model, n_head, - attention_dropout) - self.postprocesser1 = PrePostProcessLayer(postprocess_cmd, d_model, - prepostprocess_dropout) - - self.preprocesser2 = PrePostProcessLayer(preprocess_cmd, d_model, - prepostprocess_dropout) - self.ffn = FFN(d_inner_hid, d_model, relu_dropout) - self.postprocesser2 = PrePostProcessLayer(postprocess_cmd, d_model, - prepostprocess_dropout) - - def forward(self, enc_input, attn_bias): - attn_output = self.self_attn( - self.preprocesser1(enc_input), None, None, attn_bias) - attn_output = self.postprocesser1(attn_output, enc_input) - ffn_output = self.ffn(self.preprocesser2(attn_output)) - ffn_output = self.postprocesser2(ffn_output, attn_output) - return ffn_output - - -class MultiHeadAttention(nn.Layer): - """ - Multi-Head Attention - """ - - def __init__(self, d_key, d_value, d_model, n_head=1, dropout_rate=0.): - super(MultiHeadAttention, self).__init__() - self.n_head = n_head - self.d_key = d_key - self.d_value = d_value - self.d_model = d_model - self.dropout_rate = dropout_rate - self.q_fc = paddle.nn.Linear( - in_features=d_model, out_features=d_key * n_head, bias_attr=False) - self.k_fc = paddle.nn.Linear( - in_features=d_model, out_features=d_key * n_head, bias_attr=False) - self.v_fc = paddle.nn.Linear( - in_features=d_model, out_features=d_value * n_head, bias_attr=False) - self.proj_fc = paddle.nn.Linear( - in_features=d_value * n_head, out_features=d_model, bias_attr=False) - - def _prepare_qkv(self, queries, keys, values, cache=None): - if keys is None: # self-attention - keys, values = queries, queries - static_kv = False - else: # cross-attention - static_kv = True - - q = self.q_fc(queries) - q = paddle.reshape(x=q, shape=[0, 0, self.n_head, self.d_key]) - q = paddle.transpose(x=q, perm=[0, 2, 1, 3]) - - if cache is not None and static_kv and "static_k" in cache: - # for encoder-decoder attention in inference and has cached - k = cache["static_k"] - v = cache["static_v"] - else: - k = self.k_fc(keys) - v = self.v_fc(values) - k = paddle.reshape(x=k, shape=[0, 0, self.n_head, self.d_key]) - k = paddle.transpose(x=k, perm=[0, 2, 1, 3]) - v = paddle.reshape(x=v, shape=[0, 0, self.n_head, self.d_value]) - v = paddle.transpose(x=v, perm=[0, 2, 1, 3]) - - if cache is not None: - if static_kv and not "static_k" in cache: - # for encoder-decoder attention in inference and has not cached - cache["static_k"], cache["static_v"] = k, v - elif not static_kv: - # for decoder self-attention in inference - cache_k, cache_v = cache["k"], cache["v"] - k = paddle.concat([cache_k, k], axis=2) - v = paddle.concat([cache_v, v], axis=2) - cache["k"], cache["v"] = k, v - - return q, k, v - - def forward(self, queries, keys, values, attn_bias, cache=None): - # compute q ,k ,v - keys = queries if keys is None else keys - values = keys if values is None else values - q, k, v = self._prepare_qkv(queries, keys, values, cache) - - # scale dot product attention - product = paddle.matmul(x=q, y=k, transpose_y=True) - product = product * self.d_model**-0.5 - if attn_bias is not None: - product += attn_bias - weights = F.softmax(product) - if self.dropout_rate: - weights = F.dropout( - weights, p=self.dropout_rate, mode="downscale_in_infer") - out = paddle.matmul(weights, v) - - # combine heads - out = paddle.transpose(out, perm=[0, 2, 1, 3]) - out = paddle.reshape(x=out, shape=[0, 0, out.shape[2] * out.shape[3]]) - - # project to output - out = self.proj_fc(out) - - return out - - -class PrePostProcessLayer(nn.Layer): - """ - PrePostProcessLayer - """ - - def __init__(self, process_cmd, d_model, dropout_rate): - super(PrePostProcessLayer, self).__init__() - self.process_cmd = process_cmd - self.functors = [] - for cmd in self.process_cmd: - if cmd == "a": # add residual connection - self.functors.append(lambda x, y: x + y if y is not None else x) - elif cmd == "n": # add layer normalization - self.functors.append( - self.add_sublayer( - "layer_norm_%d" % len(self.sublayers()), - paddle.nn.LayerNorm( - normalized_shape=d_model, - weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.))))) - elif cmd == "d": # add dropout - self.functors.append(lambda x: F.dropout( - x, p=dropout_rate, mode="downscale_in_infer") - if dropout_rate else x) - - def forward(self, x, residual=None): - for i, cmd in enumerate(self.process_cmd): - if cmd == "a": - x = self.functors[i](x, residual) - else: - x = self.functors[i](x) - return x - - -class PrepareEncoder(nn.Layer): - def __init__(self, - src_vocab_size, - src_emb_dim, - src_max_len, - dropout_rate=0, - bos_idx=0, - word_emb_param_name=None, - pos_enc_param_name=None): - super(PrepareEncoder, self).__init__() - self.src_emb_dim = src_emb_dim - self.src_max_len = src_max_len - self.emb = paddle.nn.Embedding( - num_embeddings=self.src_max_len, embedding_dim=self.src_emb_dim) - self.dropout_rate = dropout_rate - - def forward(self, src_word, src_pos): - src_word_emb = src_word - src_word_emb = fluid.layers.cast(src_word_emb, 'float32') - src_word_emb = paddle.scale(x=src_word_emb, scale=self.src_emb_dim**0.5) - src_pos = paddle.squeeze(src_pos, axis=-1) - src_pos_enc = self.emb(src_pos) - src_pos_enc.stop_gradient = True - enc_input = src_word_emb + src_pos_enc - if self.dropout_rate: - out = F.dropout( - x=enc_input, p=self.dropout_rate, mode="downscale_in_infer") - else: - out = enc_input - return out - - -class PrepareDecoder(nn.Layer): - def __init__(self, - src_vocab_size, - src_emb_dim, - src_max_len, - dropout_rate=0, - bos_idx=0, - word_emb_param_name=None, - pos_enc_param_name=None): - super(PrepareDecoder, self).__init__() - self.src_emb_dim = src_emb_dim - """ - self.emb0 = Embedding(num_embeddings=src_vocab_size, - embedding_dim=src_emb_dim) - """ - self.emb0 = paddle.nn.Embedding( - num_embeddings=src_vocab_size, - embedding_dim=self.src_emb_dim, - padding_idx=bos_idx, - weight_attr=paddle.ParamAttr( - name=word_emb_param_name, - initializer=nn.initializer.Normal(0., src_emb_dim**-0.5))) - self.emb1 = paddle.nn.Embedding( - num_embeddings=src_max_len, - embedding_dim=self.src_emb_dim, - weight_attr=paddle.ParamAttr(name=pos_enc_param_name)) - self.dropout_rate = dropout_rate - - def forward(self, src_word, src_pos): - src_word = fluid.layers.cast(src_word, 'int64') - src_word = paddle.squeeze(src_word, axis=-1) - src_word_emb = self.emb0(src_word) - src_word_emb = paddle.scale(x=src_word_emb, scale=self.src_emb_dim**0.5) - src_pos = paddle.squeeze(src_pos, axis=-1) - src_pos_enc = self.emb1(src_pos) - src_pos_enc.stop_gradient = True - enc_input = src_word_emb + src_pos_enc - if self.dropout_rate: - out = F.dropout( - x=enc_input, p=self.dropout_rate, mode="downscale_in_infer") - else: - out = enc_input - return out - - -class FFN(nn.Layer): - """ - Feed-Forward Network - """ - - def __init__(self, d_inner_hid, d_model, dropout_rate): - super(FFN, self).__init__() - self.dropout_rate = dropout_rate - self.fc1 = paddle.nn.Linear( - in_features=d_model, out_features=d_inner_hid) - self.fc2 = paddle.nn.Linear( - in_features=d_inner_hid, out_features=d_model) - - def forward(self, x): - hidden = self.fc1(x) - hidden = F.relu(hidden) - if self.dropout_rate: - hidden = F.dropout( - hidden, p=self.dropout_rate, mode="downscale_in_infer") - out = self.fc2(hidden) - return out diff --git a/backend/ppocr/modeling/heads/table_att_head.py b/backend/ppocr/modeling/heads/table_att_head.py deleted file mode 100644 index e354f40d..00000000 --- a/backend/ppocr/modeling/heads/table_att_head.py +++ /dev/null @@ -1,246 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -import paddle.nn as nn -import paddle.nn.functional as F -import numpy as np - - -class TableAttentionHead(nn.Layer): - def __init__(self, - in_channels, - hidden_size, - loc_type, - in_max_len=488, - max_text_length=100, - max_elem_length=800, - max_cell_num=500, - **kwargs): - super(TableAttentionHead, self).__init__() - self.input_size = in_channels[-1] - self.hidden_size = hidden_size - self.elem_num = 30 - self.max_text_length = max_text_length - self.max_elem_length = max_elem_length - self.max_cell_num = max_cell_num - - self.structure_attention_cell = AttentionGRUCell( - self.input_size, hidden_size, self.elem_num, use_gru=False) - self.structure_generator = nn.Linear(hidden_size, self.elem_num) - self.loc_type = loc_type - self.in_max_len = in_max_len - - if self.loc_type == 1: - self.loc_generator = nn.Linear(hidden_size, 4) - else: - if self.in_max_len == 640: - self.loc_fea_trans = nn.Linear(400, self.max_elem_length + 1) - elif self.in_max_len == 800: - self.loc_fea_trans = nn.Linear(625, self.max_elem_length + 1) - else: - self.loc_fea_trans = nn.Linear(256, self.max_elem_length + 1) - self.loc_generator = nn.Linear(self.input_size + hidden_size, 4) - - def _char_to_onehot(self, input_char, onehot_dim): - input_ont_hot = F.one_hot(input_char, onehot_dim) - return input_ont_hot - - def forward(self, inputs, targets=None): - # if and else branch are both needed when you want to assign a variable - # if you modify the var in just one branch, then the modification will not work. - fea = inputs[-1] - if len(fea.shape) == 3: - pass - else: - last_shape = int(np.prod(fea.shape[2:])) # gry added - fea = paddle.reshape(fea, [fea.shape[0], fea.shape[1], last_shape]) - fea = fea.transpose([0, 2, 1]) # (NTC)(batch, width, channels) - batch_size = fea.shape[0] - - hidden = paddle.zeros((batch_size, self.hidden_size)) - output_hiddens = [] - if self.training and targets is not None: - structure = targets[0] - for i in range(self.max_elem_length + 1): - elem_onehots = self._char_to_onehot( - structure[:, i], onehot_dim=self.elem_num) - (outputs, hidden), alpha = self.structure_attention_cell( - hidden, fea, elem_onehots) - output_hiddens.append(paddle.unsqueeze(outputs, axis=1)) - output = paddle.concat(output_hiddens, axis=1) - structure_probs = self.structure_generator(output) - if self.loc_type == 1: - loc_preds = self.loc_generator(output) - loc_preds = F.sigmoid(loc_preds) - else: - loc_fea = fea.transpose([0, 2, 1]) - loc_fea = self.loc_fea_trans(loc_fea) - loc_fea = loc_fea.transpose([0, 2, 1]) - loc_concat = paddle.concat([output, loc_fea], axis=2) - loc_preds = self.loc_generator(loc_concat) - loc_preds = F.sigmoid(loc_preds) - else: - temp_elem = paddle.zeros(shape=[batch_size], dtype="int32") - structure_probs = None - loc_preds = None - elem_onehots = None - outputs = None - alpha = None - max_elem_length = paddle.to_tensor(self.max_elem_length) - i = 0 - while i < max_elem_length + 1: - elem_onehots = self._char_to_onehot( - temp_elem, onehot_dim=self.elem_num) - (outputs, hidden), alpha = self.structure_attention_cell( - hidden, fea, elem_onehots) - output_hiddens.append(paddle.unsqueeze(outputs, axis=1)) - structure_probs_step = self.structure_generator(outputs) - temp_elem = structure_probs_step.argmax(axis=1, dtype="int32") - i += 1 - - output = paddle.concat(output_hiddens, axis=1) - structure_probs = self.structure_generator(output) - structure_probs = F.softmax(structure_probs) - if self.loc_type == 1: - loc_preds = self.loc_generator(output) - loc_preds = F.sigmoid(loc_preds) - else: - loc_fea = fea.transpose([0, 2, 1]) - loc_fea = self.loc_fea_trans(loc_fea) - loc_fea = loc_fea.transpose([0, 2, 1]) - loc_concat = paddle.concat([output, loc_fea], axis=2) - loc_preds = self.loc_generator(loc_concat) - loc_preds = F.sigmoid(loc_preds) - return {'structure_probs': structure_probs, 'loc_preds': loc_preds} - - -class AttentionGRUCell(nn.Layer): - def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False): - super(AttentionGRUCell, self).__init__() - self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False) - self.h2h = nn.Linear(hidden_size, hidden_size) - self.score = nn.Linear(hidden_size, 1, bias_attr=False) - self.rnn = nn.GRUCell( - input_size=input_size + num_embeddings, hidden_size=hidden_size) - self.hidden_size = hidden_size - - def forward(self, prev_hidden, batch_H, char_onehots): - batch_H_proj = self.i2h(batch_H) - prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden), axis=1) - res = paddle.add(batch_H_proj, prev_hidden_proj) - res = paddle.tanh(res) - e = self.score(res) - alpha = F.softmax(e, axis=1) - alpha = paddle.transpose(alpha, [0, 2, 1]) - context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1) - concat_context = paddle.concat([context, char_onehots], 1) - cur_hidden = self.rnn(concat_context, prev_hidden) - return cur_hidden, alpha - - -class AttentionLSTM(nn.Layer): - def __init__(self, in_channels, out_channels, hidden_size, **kwargs): - super(AttentionLSTM, self).__init__() - self.input_size = in_channels - self.hidden_size = hidden_size - self.num_classes = out_channels - - self.attention_cell = AttentionLSTMCell( - in_channels, hidden_size, out_channels, use_gru=False) - self.generator = nn.Linear(hidden_size, out_channels) - - def _char_to_onehot(self, input_char, onehot_dim): - input_ont_hot = F.one_hot(input_char, onehot_dim) - return input_ont_hot - - def forward(self, inputs, targets=None, batch_max_length=25): - batch_size = inputs.shape[0] - num_steps = batch_max_length - - hidden = (paddle.zeros((batch_size, self.hidden_size)), paddle.zeros( - (batch_size, self.hidden_size))) - output_hiddens = [] - - if targets is not None: - for i in range(num_steps): - # one-hot vectors for a i-th char - char_onehots = self._char_to_onehot( - targets[:, i], onehot_dim=self.num_classes) - hidden, alpha = self.attention_cell(hidden, inputs, - char_onehots) - - hidden = (hidden[1][0], hidden[1][1]) - output_hiddens.append(paddle.unsqueeze(hidden[0], axis=1)) - output = paddle.concat(output_hiddens, axis=1) - probs = self.generator(output) - - else: - targets = paddle.zeros(shape=[batch_size], dtype="int32") - probs = None - - for i in range(num_steps): - char_onehots = self._char_to_onehot( - targets, onehot_dim=self.num_classes) - hidden, alpha = self.attention_cell(hidden, inputs, - char_onehots) - probs_step = self.generator(hidden[0]) - hidden = (hidden[1][0], hidden[1][1]) - if probs is None: - probs = paddle.unsqueeze(probs_step, axis=1) - else: - probs = paddle.concat( - [probs, paddle.unsqueeze( - probs_step, axis=1)], axis=1) - - next_input = probs_step.argmax(axis=1) - - targets = next_input - - return probs - - -class AttentionLSTMCell(nn.Layer): - def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False): - super(AttentionLSTMCell, self).__init__() - self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False) - self.h2h = nn.Linear(hidden_size, hidden_size) - self.score = nn.Linear(hidden_size, 1, bias_attr=False) - if not use_gru: - self.rnn = nn.LSTMCell( - input_size=input_size + num_embeddings, hidden_size=hidden_size) - else: - self.rnn = nn.GRUCell( - input_size=input_size + num_embeddings, hidden_size=hidden_size) - - self.hidden_size = hidden_size - - def forward(self, prev_hidden, batch_H, char_onehots): - batch_H_proj = self.i2h(batch_H) - prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden[0]), axis=1) - res = paddle.add(batch_H_proj, prev_hidden_proj) - res = paddle.tanh(res) - e = self.score(res) - - alpha = F.softmax(e, axis=1) - alpha = paddle.transpose(alpha, [0, 2, 1]) - context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1) - concat_context = paddle.concat([context, char_onehots], 1) - cur_hidden = self.rnn(concat_context, prev_hidden) - - return cur_hidden, alpha diff --git a/backend/ppocr/modeling/necks/__init__.py b/backend/ppocr/modeling/necks/__init__.py deleted file mode 100644 index e10b082d..00000000 --- a/backend/ppocr/modeling/necks/__init__.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__all__ = ['build_neck'] - - -def build_neck(config): - from .db_fpn import DBFPN, RSEFPN, LKPAN - from .east_fpn import EASTFPN - from .sast_fpn import SASTFPN - from .rnn import SequenceEncoder - from .pg_fpn import PGFPN - from .table_fpn import TableFPN - from .fpn import FPN - from .fce_fpn import FCEFPN - from .pren_fpn import PRENFPN - support_dict = [ - 'FPN', 'FCEFPN', 'LKPAN', 'DBFPN', 'RSEFPN', 'EASTFPN', 'SASTFPN', - 'SequenceEncoder', 'PGFPN', 'TableFPN', 'PRENFPN' - ] - - module_name = config.pop('name') - assert module_name in support_dict, Exception('neck only support {}'.format( - support_dict)) - module_class = eval(module_name)(**config) - return module_class diff --git a/backend/ppocr/modeling/necks/db_fpn.py b/backend/ppocr/modeling/necks/db_fpn.py deleted file mode 100644 index 93ed2dbf..00000000 --- a/backend/ppocr/modeling/necks/db_fpn.py +++ /dev/null @@ -1,358 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../../..'))) - -from ppocr.modeling.backbones.det_mobilenet_v3 import SEModule - - -class DSConv(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - padding, - stride=1, - groups=None, - if_act=True, - act="relu", - **kwargs): - super(DSConv, self).__init__() - if groups == None: - groups = in_channels - self.if_act = if_act - self.act = act - self.conv1 = nn.Conv2D( - in_channels=in_channels, - out_channels=in_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - bias_attr=False) - - self.bn1 = nn.BatchNorm(num_channels=in_channels, act=None) - - self.conv2 = nn.Conv2D( - in_channels=in_channels, - out_channels=int(in_channels * 4), - kernel_size=1, - stride=1, - bias_attr=False) - - self.bn2 = nn.BatchNorm(num_channels=int(in_channels * 4), act=None) - - self.conv3 = nn.Conv2D( - in_channels=int(in_channels * 4), - out_channels=out_channels, - kernel_size=1, - stride=1, - bias_attr=False) - self._c = [in_channels, out_channels] - if in_channels != out_channels: - self.conv_end = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - bias_attr=False) - - def forward(self, inputs): - - x = self.conv1(inputs) - x = self.bn1(x) - - x = self.conv2(x) - x = self.bn2(x) - if self.if_act: - if self.act == "relu": - x = F.relu(x) - elif self.act == "hardswish": - x = F.hardswish(x) - else: - print("The activation function({}) is selected incorrectly.". - format(self.act)) - exit() - - x = self.conv3(x) - if self._c[0] != self._c[1]: - x = x + self.conv_end(inputs) - return x - - -class DBFPN(nn.Layer): - def __init__(self, in_channels, out_channels, **kwargs): - super(DBFPN, self).__init__() - self.out_channels = out_channels - weight_attr = paddle.nn.initializer.KaimingUniform() - - self.in2_conv = nn.Conv2D( - in_channels=in_channels[0], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.in3_conv = nn.Conv2D( - in_channels=in_channels[1], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.in4_conv = nn.Conv2D( - in_channels=in_channels[2], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.in5_conv = nn.Conv2D( - in_channels=in_channels[3], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p5_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p4_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p3_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p2_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - - def forward(self, x): - c2, c3, c4, c5 = x - - in5 = self.in5_conv(c5) - in4 = self.in4_conv(c4) - in3 = self.in3_conv(c3) - in2 = self.in2_conv(c2) - - out4 = in4 + F.upsample( - in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16 - out3 = in3 + F.upsample( - out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8 - out2 = in2 + F.upsample( - out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4 - - p5 = self.p5_conv(in5) - p4 = self.p4_conv(out4) - p3 = self.p3_conv(out3) - p2 = self.p2_conv(out2) - p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1) - p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1) - p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1) - - fuse = paddle.concat([p5, p4, p3, p2], axis=1) - return fuse - - -class RSELayer(nn.Layer): - def __init__(self, in_channels, out_channels, kernel_size, shortcut=True): - super(RSELayer, self).__init__() - weight_attr = paddle.nn.initializer.KaimingUniform() - self.out_channels = out_channels - self.in_conv = nn.Conv2D( - in_channels=in_channels, - out_channels=self.out_channels, - kernel_size=kernel_size, - padding=int(kernel_size // 2), - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.se_block = SEModule(self.out_channels) - self.shortcut = shortcut - - def forward(self, ins): - x = self.in_conv(ins) - if self.shortcut: - out = x + self.se_block(x) - else: - out = self.se_block(x) - return out - - -class RSEFPN(nn.Layer): - def __init__(self, in_channels, out_channels, shortcut=True, **kwargs): - super(RSEFPN, self).__init__() - self.out_channels = out_channels - self.ins_conv = nn.LayerList() - self.inp_conv = nn.LayerList() - - for i in range(len(in_channels)): - self.ins_conv.append( - RSELayer( - in_channels[i], - out_channels, - kernel_size=1, - shortcut=shortcut)) - self.inp_conv.append( - RSELayer( - out_channels, - out_channels // 4, - kernel_size=3, - shortcut=shortcut)) - - def forward(self, x): - c2, c3, c4, c5 = x - - in5 = self.ins_conv[3](c5) - in4 = self.ins_conv[2](c4) - in3 = self.ins_conv[1](c3) - in2 = self.ins_conv[0](c2) - - out4 = in4 + F.upsample( - in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16 - out3 = in3 + F.upsample( - out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8 - out2 = in2 + F.upsample( - out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4 - - p5 = self.inp_conv[3](in5) - p4 = self.inp_conv[2](out4) - p3 = self.inp_conv[1](out3) - p2 = self.inp_conv[0](out2) - - p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1) - p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1) - p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1) - - fuse = paddle.concat([p5, p4, p3, p2], axis=1) - return fuse - - -class LKPAN(nn.Layer): - def __init__(self, in_channels, out_channels, mode='large', **kwargs): - super(LKPAN, self).__init__() - self.out_channels = out_channels - weight_attr = paddle.nn.initializer.KaimingUniform() - - self.ins_conv = nn.LayerList() - self.inp_conv = nn.LayerList() - # pan head - self.pan_head_conv = nn.LayerList() - self.pan_lat_conv = nn.LayerList() - - if mode.lower() == 'lite': - p_layer = DSConv - elif mode.lower() == 'large': - p_layer = nn.Conv2D - else: - raise ValueError( - "mode can only be one of ['lite', 'large'], but received {}". - format(mode)) - - for i in range(len(in_channels)): - self.ins_conv.append( - nn.Conv2D( - in_channels=in_channels[i], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False)) - - self.inp_conv.append( - p_layer( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=9, - padding=4, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False)) - - if i > 0: - self.pan_head_conv.append( - nn.Conv2D( - in_channels=self.out_channels // 4, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - stride=2, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False)) - self.pan_lat_conv.append( - p_layer( - in_channels=self.out_channels // 4, - out_channels=self.out_channels // 4, - kernel_size=9, - padding=4, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False)) - - def forward(self, x): - c2, c3, c4, c5 = x - - in5 = self.ins_conv[3](c5) - in4 = self.ins_conv[2](c4) - in3 = self.ins_conv[1](c3) - in2 = self.ins_conv[0](c2) - - out4 = in4 + F.upsample( - in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16 - out3 = in3 + F.upsample( - out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8 - out2 = in2 + F.upsample( - out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4 - - f5 = self.inp_conv[3](in5) - f4 = self.inp_conv[2](out4) - f3 = self.inp_conv[1](out3) - f2 = self.inp_conv[0](out2) - - pan3 = f3 + self.pan_head_conv[0](f2) - pan4 = f4 + self.pan_head_conv[1](pan3) - pan5 = f5 + self.pan_head_conv[2](pan4) - - p2 = self.pan_lat_conv[0](f2) - p3 = self.pan_lat_conv[1](pan3) - p4 = self.pan_lat_conv[2](pan4) - p5 = self.pan_lat_conv[3](pan5) - - p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1) - p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1) - p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1) - - fuse = paddle.concat([p5, p4, p3, p2], axis=1) - return fuse diff --git a/backend/ppocr/modeling/necks/east_fpn.py b/backend/ppocr/modeling/necks/east_fpn.py deleted file mode 100644 index 120ff156..00000000 --- a/backend/ppocr/modeling/necks/east_fpn.py +++ /dev/null @@ -1,188 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - padding, - groups=1, - if_act=True, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance") - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - return x - - -class DeConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - padding, - groups=1, - if_act=True, - act=None, - name=None): - super(DeConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.deconv = nn.Conv2DTranspose( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance") - - def forward(self, x): - x = self.deconv(x) - x = self.bn(x) - return x - - -class EASTFPN(nn.Layer): - def __init__(self, in_channels, model_name, **kwargs): - super(EASTFPN, self).__init__() - self.model_name = model_name - if self.model_name == "large": - self.out_channels = 128 - else: - self.out_channels = 64 - self.in_channels = in_channels[::-1] - self.h1_conv = ConvBNLayer( - in_channels=self.out_channels+self.in_channels[1], - out_channels=self.out_channels, - kernel_size=3, - stride=1, - padding=1, - if_act=True, - act='relu', - name="unet_h_1") - self.h2_conv = ConvBNLayer( - in_channels=self.out_channels+self.in_channels[2], - out_channels=self.out_channels, - kernel_size=3, - stride=1, - padding=1, - if_act=True, - act='relu', - name="unet_h_2") - self.h3_conv = ConvBNLayer( - in_channels=self.out_channels+self.in_channels[3], - out_channels=self.out_channels, - kernel_size=3, - stride=1, - padding=1, - if_act=True, - act='relu', - name="unet_h_3") - self.g0_deconv = DeConvBNLayer( - in_channels=self.in_channels[0], - out_channels=self.out_channels, - kernel_size=4, - stride=2, - padding=1, - if_act=True, - act='relu', - name="unet_g_0") - self.g1_deconv = DeConvBNLayer( - in_channels=self.out_channels, - out_channels=self.out_channels, - kernel_size=4, - stride=2, - padding=1, - if_act=True, - act='relu', - name="unet_g_1") - self.g2_deconv = DeConvBNLayer( - in_channels=self.out_channels, - out_channels=self.out_channels, - kernel_size=4, - stride=2, - padding=1, - if_act=True, - act='relu', - name="unet_g_2") - self.g3_conv = ConvBNLayer( - in_channels=self.out_channels, - out_channels=self.out_channels, - kernel_size=3, - stride=1, - padding=1, - if_act=True, - act='relu', - name="unet_g_3") - - def forward(self, x): - f = x[::-1] - - h = f[0] - g = self.g0_deconv(h) - h = paddle.concat([g, f[1]], axis=1) - h = self.h1_conv(h) - g = self.g1_deconv(h) - h = paddle.concat([g, f[2]], axis=1) - h = self.h2_conv(h) - g = self.g2_deconv(h) - h = paddle.concat([g, f[3]], axis=1) - h = self.h3_conv(h) - g = self.g3_conv(h) - - return g \ No newline at end of file diff --git a/backend/ppocr/modeling/necks/fce_fpn.py b/backend/ppocr/modeling/necks/fce_fpn.py deleted file mode 100644 index 954e964e..00000000 --- a/backend/ppocr/modeling/necks/fce_fpn.py +++ /dev/null @@ -1,280 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.3/ppdet/modeling/necks/fpn.py -""" - -import paddle.nn as nn -import paddle.nn.functional as F -from paddle import ParamAttr -from paddle.nn.initializer import XavierUniform -from paddle.nn.initializer import Normal -from paddle.regularizer import L2Decay - -__all__ = ['FCEFPN'] - - -class ConvNormLayer(nn.Layer): - def __init__(self, - ch_in, - ch_out, - filter_size, - stride, - groups=1, - norm_type='bn', - norm_decay=0., - norm_groups=32, - lr_scale=1., - freeze_norm=False, - initializer=Normal( - mean=0., std=0.01)): - super(ConvNormLayer, self).__init__() - assert norm_type in ['bn', 'sync_bn', 'gn'] - - bias_attr = False - - self.conv = nn.Conv2D( - in_channels=ch_in, - out_channels=ch_out, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr( - initializer=initializer, learning_rate=1.), - bias_attr=bias_attr) - - norm_lr = 0. if freeze_norm else 1. - param_attr = ParamAttr( - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay) if norm_decay is not None else None) - bias_attr = ParamAttr( - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay) if norm_decay is not None else None) - if norm_type == 'bn': - self.norm = nn.BatchNorm2D( - ch_out, weight_attr=param_attr, bias_attr=bias_attr) - elif norm_type == 'sync_bn': - self.norm = nn.SyncBatchNorm( - ch_out, weight_attr=param_attr, bias_attr=bias_attr) - elif norm_type == 'gn': - self.norm = nn.GroupNorm( - num_groups=norm_groups, - num_channels=ch_out, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs): - out = self.conv(inputs) - out = self.norm(out) - return out - - -class FCEFPN(nn.Layer): - """ - Feature Pyramid Network, see https://arxiv.org/abs/1612.03144 - Args: - in_channels (list[int]): input channels of each level which can be - derived from the output shape of backbone by from_config - out_channels (list[int]): output channel of each level - spatial_scales (list[float]): the spatial scales between input feature - maps and original input image which can be derived from the output - shape of backbone by from_config - has_extra_convs (bool): whether to add extra conv to the last level. - default False - extra_stage (int): the number of extra stages added to the last level. - default 1 - use_c5 (bool): Whether to use c5 as the input of extra stage, - otherwise p5 is used. default True - norm_type (string|None): The normalization type in FPN module. If - norm_type is None, norm will not be used after conv and if - norm_type is string, bn, gn, sync_bn are available. default None - norm_decay (float): weight decay for normalization layer weights. - default 0. - freeze_norm (bool): whether to freeze normalization layer. - default False - relu_before_extra_convs (bool): whether to add relu before extra convs. - default False - - """ - - def __init__(self, - in_channels, - out_channels, - spatial_scales=[0.25, 0.125, 0.0625, 0.03125], - has_extra_convs=False, - extra_stage=1, - use_c5=True, - norm_type=None, - norm_decay=0., - freeze_norm=False, - relu_before_extra_convs=True): - super(FCEFPN, self).__init__() - self.out_channels = out_channels - for s in range(extra_stage): - spatial_scales = spatial_scales + [spatial_scales[-1] / 2.] - self.spatial_scales = spatial_scales - self.has_extra_convs = has_extra_convs - self.extra_stage = extra_stage - self.use_c5 = use_c5 - self.relu_before_extra_convs = relu_before_extra_convs - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - - self.lateral_convs = [] - self.fpn_convs = [] - fan = out_channels * 3 * 3 - - # stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone - # 0 <= st_stage < ed_stage <= 3 - st_stage = 4 - len(in_channels) - ed_stage = st_stage + len(in_channels) - 1 - for i in range(st_stage, ed_stage + 1): - if i == 3: - lateral_name = 'fpn_inner_res5_sum' - else: - lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2) - in_c = in_channels[i - st_stage] - if self.norm_type is not None: - lateral = self.add_sublayer( - lateral_name, - ConvNormLayer( - ch_in=in_c, - ch_out=out_channels, - filter_size=1, - stride=1, - norm_type=self.norm_type, - norm_decay=self.norm_decay, - freeze_norm=self.freeze_norm, - initializer=XavierUniform(fan_out=in_c))) - else: - lateral = self.add_sublayer( - lateral_name, - nn.Conv2D( - in_channels=in_c, - out_channels=out_channels, - kernel_size=1, - weight_attr=ParamAttr( - initializer=XavierUniform(fan_out=in_c)))) - self.lateral_convs.append(lateral) - - for i in range(st_stage, ed_stage + 1): - fpn_name = 'fpn_res{}_sum'.format(i + 2) - if self.norm_type is not None: - fpn_conv = self.add_sublayer( - fpn_name, - ConvNormLayer( - ch_in=out_channels, - ch_out=out_channels, - filter_size=3, - stride=1, - norm_type=self.norm_type, - norm_decay=self.norm_decay, - freeze_norm=self.freeze_norm, - initializer=XavierUniform(fan_out=fan))) - else: - fpn_conv = self.add_sublayer( - fpn_name, - nn.Conv2D( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - padding=1, - weight_attr=ParamAttr( - initializer=XavierUniform(fan_out=fan)))) - self.fpn_convs.append(fpn_conv) - - # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5) - if self.has_extra_convs: - for i in range(self.extra_stage): - lvl = ed_stage + 1 + i - if i == 0 and self.use_c5: - in_c = in_channels[-1] - else: - in_c = out_channels - extra_fpn_name = 'fpn_{}'.format(lvl + 2) - if self.norm_type is not None: - extra_fpn_conv = self.add_sublayer( - extra_fpn_name, - ConvNormLayer( - ch_in=in_c, - ch_out=out_channels, - filter_size=3, - stride=2, - norm_type=self.norm_type, - norm_decay=self.norm_decay, - freeze_norm=self.freeze_norm, - initializer=XavierUniform(fan_out=fan))) - else: - extra_fpn_conv = self.add_sublayer( - extra_fpn_name, - nn.Conv2D( - in_channels=in_c, - out_channels=out_channels, - kernel_size=3, - stride=2, - padding=1, - weight_attr=ParamAttr( - initializer=XavierUniform(fan_out=fan)))) - self.fpn_convs.append(extra_fpn_conv) - - @classmethod - def from_config(cls, cfg, input_shape): - return { - 'in_channels': [i.channels for i in input_shape], - 'spatial_scales': [1.0 / i.stride for i in input_shape], - } - - def forward(self, body_feats): - laterals = [] - num_levels = len(body_feats) - - for i in range(num_levels): - laterals.append(self.lateral_convs[i](body_feats[i])) - - for i in range(1, num_levels): - lvl = num_levels - i - upsample = F.interpolate( - laterals[lvl], - scale_factor=2., - mode='nearest', ) - laterals[lvl - 1] += upsample - - fpn_output = [] - for lvl in range(num_levels): - fpn_output.append(self.fpn_convs[lvl](laterals[lvl])) - - if self.extra_stage > 0: - # use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN) - if not self.has_extra_convs: - assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs' - fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2)) - # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5) - else: - if self.use_c5: - extra_source = body_feats[-1] - else: - extra_source = fpn_output[-1] - fpn_output.append(self.fpn_convs[num_levels](extra_source)) - - for i in range(1, self.extra_stage): - if self.relu_before_extra_convs: - fpn_output.append(self.fpn_convs[num_levels + i](F.relu( - fpn_output[-1]))) - else: - fpn_output.append(self.fpn_convs[num_levels + i]( - fpn_output[-1])) - return fpn_output diff --git a/backend/ppocr/modeling/necks/fpn.py b/backend/ppocr/modeling/necks/fpn.py deleted file mode 100644 index 48c85b1e..00000000 --- a/backend/ppocr/modeling/necks/fpn.py +++ /dev/null @@ -1,138 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/whai362/PSENet/blob/python3/models/neck/fpn.py -""" - -import paddle.nn as nn -import paddle -import math -import paddle.nn.functional as F - - -class Conv_BN_ReLU(nn.Layer): - def __init__(self, - in_planes, - out_planes, - kernel_size=1, - stride=1, - padding=0): - super(Conv_BN_ReLU, self).__init__() - self.conv = nn.Conv2D( - in_planes, - out_planes, - kernel_size=kernel_size, - stride=stride, - padding=padding, - bias_attr=False) - self.bn = nn.BatchNorm2D(out_planes, momentum=0.1) - self.relu = nn.ReLU() - - for m in self.sublayers(): - if isinstance(m, nn.Conv2D): - n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels - m.weight = paddle.create_parameter( - shape=m.weight.shape, - dtype='float32', - default_initializer=paddle.nn.initializer.Normal( - 0, math.sqrt(2. / n))) - elif isinstance(m, nn.BatchNorm2D): - m.weight = paddle.create_parameter( - shape=m.weight.shape, - dtype='float32', - default_initializer=paddle.nn.initializer.Constant(1.0)) - m.bias = paddle.create_parameter( - shape=m.bias.shape, - dtype='float32', - default_initializer=paddle.nn.initializer.Constant(0.0)) - - def forward(self, x): - return self.relu(self.bn(self.conv(x))) - - -class FPN(nn.Layer): - def __init__(self, in_channels, out_channels): - super(FPN, self).__init__() - - # Top layer - self.toplayer_ = Conv_BN_ReLU( - in_channels[3], out_channels, kernel_size=1, stride=1, padding=0) - # Lateral layers - self.latlayer1_ = Conv_BN_ReLU( - in_channels[2], out_channels, kernel_size=1, stride=1, padding=0) - - self.latlayer2_ = Conv_BN_ReLU( - in_channels[1], out_channels, kernel_size=1, stride=1, padding=0) - - self.latlayer3_ = Conv_BN_ReLU( - in_channels[0], out_channels, kernel_size=1, stride=1, padding=0) - - # Smooth layers - self.smooth1_ = Conv_BN_ReLU( - out_channels, out_channels, kernel_size=3, stride=1, padding=1) - - self.smooth2_ = Conv_BN_ReLU( - out_channels, out_channels, kernel_size=3, stride=1, padding=1) - - self.smooth3_ = Conv_BN_ReLU( - out_channels, out_channels, kernel_size=3, stride=1, padding=1) - - self.out_channels = out_channels * 4 - for m in self.sublayers(): - if isinstance(m, nn.Conv2D): - n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels - m.weight = paddle.create_parameter( - shape=m.weight.shape, - dtype='float32', - default_initializer=paddle.nn.initializer.Normal( - 0, math.sqrt(2. / n))) - elif isinstance(m, nn.BatchNorm2D): - m.weight = paddle.create_parameter( - shape=m.weight.shape, - dtype='float32', - default_initializer=paddle.nn.initializer.Constant(1.0)) - m.bias = paddle.create_parameter( - shape=m.bias.shape, - dtype='float32', - default_initializer=paddle.nn.initializer.Constant(0.0)) - - def _upsample(self, x, scale=1): - return F.upsample(x, scale_factor=scale, mode='bilinear') - - def _upsample_add(self, x, y, scale=1): - return F.upsample(x, scale_factor=scale, mode='bilinear') + y - - def forward(self, x): - f2, f3, f4, f5 = x - p5 = self.toplayer_(f5) - - f4 = self.latlayer1_(f4) - p4 = self._upsample_add(p5, f4, 2) - p4 = self.smooth1_(p4) - - f3 = self.latlayer2_(f3) - p3 = self._upsample_add(p4, f3, 2) - p3 = self.smooth2_(p3) - - f2 = self.latlayer3_(f2) - p2 = self._upsample_add(p3, f2, 2) - p2 = self.smooth3_(p2) - - p3 = self._upsample(p3, 2) - p4 = self._upsample(p4, 4) - p5 = self._upsample(p5, 8) - - fuse = paddle.concat([p2, p3, p4, p5], axis=1) - return fuse diff --git a/backend/ppocr/modeling/necks/pg_fpn.py b/backend/ppocr/modeling/necks/pg_fpn.py deleted file mode 100644 index 3f64539f..00000000 --- a/backend/ppocr/modeling/necks/pg_fpn.py +++ /dev/null @@ -1,314 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - is_vd_mode=False, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = nn.AvgPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = nn.BatchNorm( - out_channels, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=False) - - def forward(self, inputs): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class DeConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size=4, - stride=2, - padding=1, - groups=1, - if_act=True, - act=None, - name=None): - super(DeConvBNLayer, self).__init__() - - self.if_act = if_act - self.act = act - self.deconv = nn.Conv2DTranspose( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance", - use_global_stats=False) - - def forward(self, x): - x = self.deconv(x) - x = self.bn(x) - return x - - -class PGFPN(nn.Layer): - def __init__(self, in_channels, **kwargs): - super(PGFPN, self).__init__() - num_inputs = [2048, 2048, 1024, 512, 256] - num_outputs = [256, 256, 192, 192, 128] - self.out_channels = 128 - self.conv_bn_layer_1 = ConvBNLayer( - in_channels=3, - out_channels=32, - kernel_size=3, - stride=1, - act=None, - name='FPN_d1') - self.conv_bn_layer_2 = ConvBNLayer( - in_channels=64, - out_channels=64, - kernel_size=3, - stride=1, - act=None, - name='FPN_d2') - self.conv_bn_layer_3 = ConvBNLayer( - in_channels=256, - out_channels=128, - kernel_size=3, - stride=1, - act=None, - name='FPN_d3') - self.conv_bn_layer_4 = ConvBNLayer( - in_channels=32, - out_channels=64, - kernel_size=3, - stride=2, - act=None, - name='FPN_d4') - self.conv_bn_layer_5 = ConvBNLayer( - in_channels=64, - out_channels=64, - kernel_size=3, - stride=1, - act='relu', - name='FPN_d5') - self.conv_bn_layer_6 = ConvBNLayer( - in_channels=64, - out_channels=128, - kernel_size=3, - stride=2, - act=None, - name='FPN_d6') - self.conv_bn_layer_7 = ConvBNLayer( - in_channels=128, - out_channels=128, - kernel_size=3, - stride=1, - act='relu', - name='FPN_d7') - self.conv_bn_layer_8 = ConvBNLayer( - in_channels=128, - out_channels=128, - kernel_size=1, - stride=1, - act=None, - name='FPN_d8') - - self.conv_h0 = ConvBNLayer( - in_channels=num_inputs[0], - out_channels=num_outputs[0], - kernel_size=1, - stride=1, - act=None, - name="conv_h{}".format(0)) - self.conv_h1 = ConvBNLayer( - in_channels=num_inputs[1], - out_channels=num_outputs[1], - kernel_size=1, - stride=1, - act=None, - name="conv_h{}".format(1)) - self.conv_h2 = ConvBNLayer( - in_channels=num_inputs[2], - out_channels=num_outputs[2], - kernel_size=1, - stride=1, - act=None, - name="conv_h{}".format(2)) - self.conv_h3 = ConvBNLayer( - in_channels=num_inputs[3], - out_channels=num_outputs[3], - kernel_size=1, - stride=1, - act=None, - name="conv_h{}".format(3)) - self.conv_h4 = ConvBNLayer( - in_channels=num_inputs[4], - out_channels=num_outputs[4], - kernel_size=1, - stride=1, - act=None, - name="conv_h{}".format(4)) - - self.dconv0 = DeConvBNLayer( - in_channels=num_outputs[0], - out_channels=num_outputs[0 + 1], - name="dconv_{}".format(0)) - self.dconv1 = DeConvBNLayer( - in_channels=num_outputs[1], - out_channels=num_outputs[1 + 1], - act=None, - name="dconv_{}".format(1)) - self.dconv2 = DeConvBNLayer( - in_channels=num_outputs[2], - out_channels=num_outputs[2 + 1], - act=None, - name="dconv_{}".format(2)) - self.dconv3 = DeConvBNLayer( - in_channels=num_outputs[3], - out_channels=num_outputs[3 + 1], - act=None, - name="dconv_{}".format(3)) - self.conv_g1 = ConvBNLayer( - in_channels=num_outputs[1], - out_channels=num_outputs[1], - kernel_size=3, - stride=1, - act='relu', - name="conv_g{}".format(1)) - self.conv_g2 = ConvBNLayer( - in_channels=num_outputs[2], - out_channels=num_outputs[2], - kernel_size=3, - stride=1, - act='relu', - name="conv_g{}".format(2)) - self.conv_g3 = ConvBNLayer( - in_channels=num_outputs[3], - out_channels=num_outputs[3], - kernel_size=3, - stride=1, - act='relu', - name="conv_g{}".format(3)) - self.conv_g4 = ConvBNLayer( - in_channels=num_outputs[4], - out_channels=num_outputs[4], - kernel_size=3, - stride=1, - act='relu', - name="conv_g{}".format(4)) - self.convf = ConvBNLayer( - in_channels=num_outputs[4], - out_channels=num_outputs[4], - kernel_size=1, - stride=1, - act=None, - name="conv_f{}".format(4)) - - def forward(self, x): - c0, c1, c2, c3, c4, c5, c6 = x - # FPN_Down_Fusion - f = [c0, c1, c2] - g = [None, None, None] - h = [None, None, None] - h[0] = self.conv_bn_layer_1(f[0]) - h[1] = self.conv_bn_layer_2(f[1]) - h[2] = self.conv_bn_layer_3(f[2]) - - g[0] = self.conv_bn_layer_4(h[0]) - g[1] = paddle.add(g[0], h[1]) - g[1] = F.relu(g[1]) - g[1] = self.conv_bn_layer_5(g[1]) - g[1] = self.conv_bn_layer_6(g[1]) - - g[2] = paddle.add(g[1], h[2]) - g[2] = F.relu(g[2]) - g[2] = self.conv_bn_layer_7(g[2]) - f_down = self.conv_bn_layer_8(g[2]) - - # FPN UP Fusion - f1 = [c6, c5, c4, c3, c2] - g = [None, None, None, None, None] - h = [None, None, None, None, None] - h[0] = self.conv_h0(f1[0]) - h[1] = self.conv_h1(f1[1]) - h[2] = self.conv_h2(f1[2]) - h[3] = self.conv_h3(f1[3]) - h[4] = self.conv_h4(f1[4]) - - g[0] = self.dconv0(h[0]) - g[1] = paddle.add(g[0], h[1]) - g[1] = F.relu(g[1]) - g[1] = self.conv_g1(g[1]) - g[1] = self.dconv1(g[1]) - - g[2] = paddle.add(g[1], h[2]) - g[2] = F.relu(g[2]) - g[2] = self.conv_g2(g[2]) - g[2] = self.dconv2(g[2]) - - g[3] = paddle.add(g[2], h[3]) - g[3] = F.relu(g[3]) - g[3] = self.conv_g3(g[3]) - g[3] = self.dconv3(g[3]) - - g[4] = paddle.add(x=g[3], y=h[4]) - g[4] = F.relu(g[4]) - g[4] = self.conv_g4(g[4]) - f_up = self.convf(g[4]) - f_common = paddle.add(f_down, f_up) - f_common = F.relu(f_common) - return f_common diff --git a/backend/ppocr/modeling/necks/pren_fpn.py b/backend/ppocr/modeling/necks/pren_fpn.py deleted file mode 100644 index afbdcea8..00000000 --- a/backend/ppocr/modeling/necks/pren_fpn.py +++ /dev/null @@ -1,163 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Code is refer from: -https://github.com/RuijieJ/pren/blob/main/Nets/Aggregation.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F - - -class PoolAggregate(nn.Layer): - def __init__(self, n_r, d_in, d_middle=None, d_out=None): - super(PoolAggregate, self).__init__() - if not d_middle: - d_middle = d_in - if not d_out: - d_out = d_in - - self.d_in = d_in - self.d_middle = d_middle - self.d_out = d_out - self.act = nn.Swish() - - self.n_r = n_r - self.aggs = self._build_aggs() - - def _build_aggs(self): - aggs = [] - for i in range(self.n_r): - aggs.append( - self.add_sublayer( - '{}'.format(i), - nn.Sequential( - ('conv1', nn.Conv2D( - self.d_in, self.d_middle, 3, 2, 1, bias_attr=False) - ), ('bn1', nn.BatchNorm(self.d_middle)), - ('act', self.act), ('conv2', nn.Conv2D( - self.d_middle, self.d_out, 3, 2, 1, bias_attr=False - )), ('bn2', nn.BatchNorm(self.d_out))))) - return aggs - - def forward(self, x): - b = x.shape[0] - outs = [] - for agg in self.aggs: - y = agg(x) - p = F.adaptive_avg_pool2d(y, 1) - outs.append(p.reshape((b, 1, self.d_out))) - out = paddle.concat(outs, 1) - return out - - -class WeightAggregate(nn.Layer): - def __init__(self, n_r, d_in, d_middle=None, d_out=None): - super(WeightAggregate, self).__init__() - if not d_middle: - d_middle = d_in - if not d_out: - d_out = d_in - - self.n_r = n_r - self.d_out = d_out - self.act = nn.Swish() - - self.conv_n = nn.Sequential( - ('conv1', nn.Conv2D( - d_in, d_in, 3, 1, 1, - bias_attr=False)), ('bn1', nn.BatchNorm(d_in)), - ('act1', self.act), ('conv2', nn.Conv2D( - d_in, n_r, 1, bias_attr=False)), ('bn2', nn.BatchNorm(n_r)), - ('act2', nn.Sigmoid())) - self.conv_d = nn.Sequential( - ('conv1', nn.Conv2D( - d_in, d_middle, 3, 1, 1, - bias_attr=False)), ('bn1', nn.BatchNorm(d_middle)), - ('act1', self.act), ('conv2', nn.Conv2D( - d_middle, d_out, 1, - bias_attr=False)), ('bn2', nn.BatchNorm(d_out))) - - def forward(self, x): - b, _, h, w = x.shape - - hmaps = self.conv_n(x) - fmaps = self.conv_d(x) - r = paddle.bmm( - hmaps.reshape((b, self.n_r, h * w)), - fmaps.reshape((b, self.d_out, h * w)).transpose((0, 2, 1))) - return r - - -class GCN(nn.Layer): - def __init__(self, d_in, n_in, d_out=None, n_out=None, dropout=0.1): - super(GCN, self).__init__() - if not d_out: - d_out = d_in - if not n_out: - n_out = d_in - - self.conv_n = nn.Conv1D(n_in, n_out, 1) - self.linear = nn.Linear(d_in, d_out) - self.dropout = nn.Dropout(dropout) - self.act = nn.Swish() - - def forward(self, x): - x = self.conv_n(x) - x = self.dropout(self.linear(x)) - return self.act(x) - - -class PRENFPN(nn.Layer): - def __init__(self, in_channels, n_r, d_model, max_len, dropout): - super(PRENFPN, self).__init__() - assert len(in_channels) == 3, "in_channels' length must be 3." - c1, c2, c3 = in_channels # the depths are from big to small - # build fpn - assert d_model % 3 == 0, "{} can't be divided by 3.".format(d_model) - self.agg_p1 = PoolAggregate(n_r, c1, d_out=d_model // 3) - self.agg_p2 = PoolAggregate(n_r, c2, d_out=d_model // 3) - self.agg_p3 = PoolAggregate(n_r, c3, d_out=d_model // 3) - - self.agg_w1 = WeightAggregate(n_r, c1, 4 * c1, d_model // 3) - self.agg_w2 = WeightAggregate(n_r, c2, 4 * c2, d_model // 3) - self.agg_w3 = WeightAggregate(n_r, c3, 4 * c3, d_model // 3) - - self.gcn_pool = GCN(d_model, n_r, d_model, max_len, dropout) - self.gcn_weight = GCN(d_model, n_r, d_model, max_len, dropout) - - self.out_channels = d_model - - def forward(self, inputs): - f3, f5, f7 = inputs - - rp1 = self.agg_p1(f3) - rp2 = self.agg_p2(f5) - rp3 = self.agg_p3(f7) - rp = paddle.concat([rp1, rp2, rp3], 2) # [b,nr,d] - - rw1 = self.agg_w1(f3) - rw2 = self.agg_w2(f5) - rw3 = self.agg_w3(f7) - rw = paddle.concat([rw1, rw2, rw3], 2) # [b,nr,d] - - y1 = self.gcn_pool(rp) - y2 = self.gcn_weight(rw) - y = 0.5 * (y1 + y2) - return y # [b,max_len,d] diff --git a/backend/ppocr/modeling/necks/rnn.py b/backend/ppocr/modeling/necks/rnn.py deleted file mode 100644 index c8a774b8..00000000 --- a/backend/ppocr/modeling/necks/rnn.py +++ /dev/null @@ -1,191 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn - -from ppocr.modeling.heads.rec_ctc_head import get_para_bias_attr -from ppocr.modeling.backbones.rec_svtrnet import Block, ConvBNLayer, trunc_normal_, zeros_, ones_ - - -class Im2Seq(nn.Layer): - def __init__(self, in_channels, **kwargs): - super().__init__() - self.out_channels = in_channels - - def forward(self, x): - B, C, H, W = x.shape - assert H == 1 - x = x.squeeze(axis=2) - x = x.transpose([0, 2, 1]) # (NTC)(batch, width, channels) - return x - - -class EncoderWithRNN(nn.Layer): - def __init__(self, in_channels, hidden_size): - super(EncoderWithRNN, self).__init__() - self.out_channels = hidden_size * 2 - self.lstm = nn.LSTM( - in_channels, hidden_size, direction='bidirectional', num_layers=2) - - def forward(self, x): - x, _ = self.lstm(x) - return x - - -class EncoderWithFC(nn.Layer): - def __init__(self, in_channels, hidden_size): - super(EncoderWithFC, self).__init__() - self.out_channels = hidden_size - weight_attr, bias_attr = get_para_bias_attr( - l2_decay=0.00001, k=in_channels) - self.fc = nn.Linear( - in_channels, - hidden_size, - weight_attr=weight_attr, - bias_attr=bias_attr, - name='reduce_encoder_fea') - - def forward(self, x): - x = self.fc(x) - return x - - -class EncoderWithSVTR(nn.Layer): - def __init__( - self, - in_channels, - dims=64, # XS - depth=2, - hidden_dims=120, - use_guide=False, - num_heads=8, - qkv_bias=True, - mlp_ratio=2.0, - drop_rate=0.1, - attn_drop_rate=0.1, - drop_path=0., - qk_scale=None): - super(EncoderWithSVTR, self).__init__() - self.depth = depth - self.use_guide = use_guide - self.conv1 = ConvBNLayer( - in_channels, in_channels // 8, padding=1, act=nn.Swish) - self.conv2 = ConvBNLayer( - in_channels // 8, hidden_dims, kernel_size=1, act=nn.Swish) - - self.svtr_block = nn.LayerList([ - Block( - dim=hidden_dims, - num_heads=num_heads, - mixer='Global', - HW=None, - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - act_layer=nn.Swish, - attn_drop=attn_drop_rate, - drop_path=drop_path, - norm_layer='nn.LayerNorm', - epsilon=1e-05, - prenorm=False) for i in range(depth) - ]) - self.norm = nn.LayerNorm(hidden_dims, epsilon=1e-6) - self.conv3 = ConvBNLayer( - hidden_dims, in_channels, kernel_size=1, act=nn.Swish) - # last conv-nxn, the input is concat of input tensor and conv3 output tensor - self.conv4 = ConvBNLayer( - 2 * in_channels, in_channels // 8, padding=1, act=nn.Swish) - - self.conv1x1 = ConvBNLayer( - in_channels // 8, dims, kernel_size=1, act=nn.Swish) - self.out_channels = dims - self.apply(self._init_weights) - - def _init_weights(self, m): - if isinstance(m, nn.Linear): - trunc_normal_(m.weight) - if isinstance(m, nn.Linear) and m.bias is not None: - zeros_(m.bias) - elif isinstance(m, nn.LayerNorm): - zeros_(m.bias) - ones_(m.weight) - - def forward(self, x): - # for use guide - if self.use_guide: - z = x.clone() - z.stop_gradient = True - else: - z = x - # for short cut - h = z - # reduce dim - z = self.conv1(z) - z = self.conv2(z) - # SVTR global block - B, C, H, W = z.shape - z = z.flatten(2).transpose([0, 2, 1]) - for blk in self.svtr_block: - z = blk(z) - z = self.norm(z) - # last stage - z = z.reshape([0, H, W, C]).transpose([0, 3, 1, 2]) - z = self.conv3(z) - z = paddle.concat((h, z), axis=1) - z = self.conv1x1(self.conv4(z)) - return z - - -class SequenceEncoder(nn.Layer): - def __init__(self, in_channels, encoder_type, hidden_size=48, **kwargs): - super(SequenceEncoder, self).__init__() - self.encoder_reshape = Im2Seq(in_channels) - self.out_channels = self.encoder_reshape.out_channels - self.encoder_type = encoder_type - if encoder_type == 'reshape': - self.only_reshape = True - else: - support_encoder_dict = { - 'reshape': Im2Seq, - 'fc': EncoderWithFC, - 'rnn': EncoderWithRNN, - 'svtr': EncoderWithSVTR - } - assert encoder_type in support_encoder_dict, '{} must in {}'.format( - encoder_type, support_encoder_dict.keys()) - if encoder_type == "svtr": - self.encoder = support_encoder_dict[encoder_type]( - self.encoder_reshape.out_channels, **kwargs) - else: - self.encoder = support_encoder_dict[encoder_type]( - self.encoder_reshape.out_channels, hidden_size) - self.out_channels = self.encoder.out_channels - self.only_reshape = False - - def forward(self, x): - if self.encoder_type != 'svtr': - x = self.encoder_reshape(x) - if not self.only_reshape: - x = self.encoder(x) - return x - else: - x = self.encoder(x) - x = self.encoder_reshape(x) - return x diff --git a/backend/ppocr/modeling/necks/sast_fpn.py b/backend/ppocr/modeling/necks/sast_fpn.py deleted file mode 100644 index 9b602459..00000000 --- a/backend/ppocr/modeling/necks/sast_fpn.py +++ /dev/null @@ -1,284 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - groups=1, - if_act=True, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance") - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - return x - - -class DeConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - groups=1, - if_act=True, - act=None, - name=None): - super(DeConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.deconv = nn.Conv2DTranspose( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance") - - def forward(self, x): - x = self.deconv(x) - x = self.bn(x) - return x - - -class FPN_Up_Fusion(nn.Layer): - def __init__(self, in_channels): - super(FPN_Up_Fusion, self).__init__() - in_channels = in_channels[::-1] - out_channels = [256, 256, 192, 192, 128] - - self.h0_conv = ConvBNLayer(in_channels[0], out_channels[0], 1, 1, act=None, name='fpn_up_h0') - self.h1_conv = ConvBNLayer(in_channels[1], out_channels[1], 1, 1, act=None, name='fpn_up_h1') - self.h2_conv = ConvBNLayer(in_channels[2], out_channels[2], 1, 1, act=None, name='fpn_up_h2') - self.h3_conv = ConvBNLayer(in_channels[3], out_channels[3], 1, 1, act=None, name='fpn_up_h3') - self.h4_conv = ConvBNLayer(in_channels[4], out_channels[4], 1, 1, act=None, name='fpn_up_h4') - - self.g0_conv = DeConvBNLayer(out_channels[0], out_channels[1], 4, 2, act=None, name='fpn_up_g0') - - self.g1_conv = nn.Sequential( - ConvBNLayer(out_channels[1], out_channels[1], 3, 1, act='relu', name='fpn_up_g1_1'), - DeConvBNLayer(out_channels[1], out_channels[2], 4, 2, act=None, name='fpn_up_g1_2') - ) - self.g2_conv = nn.Sequential( - ConvBNLayer(out_channels[2], out_channels[2], 3, 1, act='relu', name='fpn_up_g2_1'), - DeConvBNLayer(out_channels[2], out_channels[3], 4, 2, act=None, name='fpn_up_g2_2') - ) - self.g3_conv = nn.Sequential( - ConvBNLayer(out_channels[3], out_channels[3], 3, 1, act='relu', name='fpn_up_g3_1'), - DeConvBNLayer(out_channels[3], out_channels[4], 4, 2, act=None, name='fpn_up_g3_2') - ) - - self.g4_conv = nn.Sequential( - ConvBNLayer(out_channels[4], out_channels[4], 3, 1, act='relu', name='fpn_up_fusion_1'), - ConvBNLayer(out_channels[4], out_channels[4], 1, 1, act=None, name='fpn_up_fusion_2') - ) - - def _add_relu(self, x1, x2): - x = paddle.add(x=x1, y=x2) - x = F.relu(x) - return x - - def forward(self, x): - f = x[2:][::-1] - h0 = self.h0_conv(f[0]) - h1 = self.h1_conv(f[1]) - h2 = self.h2_conv(f[2]) - h3 = self.h3_conv(f[3]) - h4 = self.h4_conv(f[4]) - - g0 = self.g0_conv(h0) - g1 = self._add_relu(g0, h1) - g1 = self.g1_conv(g1) - g2 = self.g2_conv(self._add_relu(g1, h2)) - g3 = self.g3_conv(self._add_relu(g2, h3)) - g4 = self.g4_conv(self._add_relu(g3, h4)) - - return g4 - - -class FPN_Down_Fusion(nn.Layer): - def __init__(self, in_channels): - super(FPN_Down_Fusion, self).__init__() - out_channels = [32, 64, 128] - - self.h0_conv = ConvBNLayer(in_channels[0], out_channels[0], 3, 1, act=None, name='fpn_down_h0') - self.h1_conv = ConvBNLayer(in_channels[1], out_channels[1], 3, 1, act=None, name='fpn_down_h1') - self.h2_conv = ConvBNLayer(in_channels[2], out_channels[2], 3, 1, act=None, name='fpn_down_h2') - - self.g0_conv = ConvBNLayer(out_channels[0], out_channels[1], 3, 2, act=None, name='fpn_down_g0') - - self.g1_conv = nn.Sequential( - ConvBNLayer(out_channels[1], out_channels[1], 3, 1, act='relu', name='fpn_down_g1_1'), - ConvBNLayer(out_channels[1], out_channels[2], 3, 2, act=None, name='fpn_down_g1_2') - ) - - self.g2_conv = nn.Sequential( - ConvBNLayer(out_channels[2], out_channels[2], 3, 1, act='relu', name='fpn_down_fusion_1'), - ConvBNLayer(out_channels[2], out_channels[2], 1, 1, act=None, name='fpn_down_fusion_2') - ) - - def forward(self, x): - f = x[:3] - h0 = self.h0_conv(f[0]) - h1 = self.h1_conv(f[1]) - h2 = self.h2_conv(f[2]) - g0 = self.g0_conv(h0) - g1 = paddle.add(x=g0, y=h1) - g1 = F.relu(g1) - g1 = self.g1_conv(g1) - g2 = paddle.add(x=g1, y=h2) - g2 = F.relu(g2) - g2 = self.g2_conv(g2) - return g2 - - -class Cross_Attention(nn.Layer): - def __init__(self, in_channels): - super(Cross_Attention, self).__init__() - self.theta_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_theta') - self.phi_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_phi') - self.g_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_g') - - self.fh_weight_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fh_weight') - self.fh_sc_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fh_sc') - - self.fv_weight_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fv_weight') - self.fv_sc_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fv_sc') - - self.f_attn_conv = ConvBNLayer(in_channels * 2, in_channels, 1, 1, act='relu', name='f_attn') - - def _cal_fweight(self, f, shape): - f_theta, f_phi, f_g = f - #flatten - f_theta = paddle.transpose(f_theta, [0, 2, 3, 1]) - f_theta = paddle.reshape(f_theta, [shape[0] * shape[1], shape[2], 128]) - f_phi = paddle.transpose(f_phi, [0, 2, 3, 1]) - f_phi = paddle.reshape(f_phi, [shape[0] * shape[1], shape[2], 128]) - f_g = paddle.transpose(f_g, [0, 2, 3, 1]) - f_g = paddle.reshape(f_g, [shape[0] * shape[1], shape[2], 128]) - #correlation - f_attn = paddle.matmul(f_theta, paddle.transpose(f_phi, [0, 2, 1])) - #scale - f_attn = f_attn / (128**0.5) - f_attn = F.softmax(f_attn) - #weighted sum - f_weight = paddle.matmul(f_attn, f_g) - f_weight = paddle.reshape( - f_weight, [shape[0], shape[1], shape[2], 128]) - return f_weight - - def forward(self, f_common): - f_shape = paddle.shape(f_common) - # print('f_shape: ', f_shape) - - f_theta = self.theta_conv(f_common) - f_phi = self.phi_conv(f_common) - f_g = self.g_conv(f_common) - - ######## horizon ######## - fh_weight = self._cal_fweight([f_theta, f_phi, f_g], - [f_shape[0], f_shape[2], f_shape[3]]) - fh_weight = paddle.transpose(fh_weight, [0, 3, 1, 2]) - fh_weight = self.fh_weight_conv(fh_weight) - #short cut - fh_sc = self.fh_sc_conv(f_common) - f_h = F.relu(fh_weight + fh_sc) - - ######## vertical ######## - fv_theta = paddle.transpose(f_theta, [0, 1, 3, 2]) - fv_phi = paddle.transpose(f_phi, [0, 1, 3, 2]) - fv_g = paddle.transpose(f_g, [0, 1, 3, 2]) - fv_weight = self._cal_fweight([fv_theta, fv_phi, fv_g], - [f_shape[0], f_shape[3], f_shape[2]]) - fv_weight = paddle.transpose(fv_weight, [0, 3, 2, 1]) - fv_weight = self.fv_weight_conv(fv_weight) - #short cut - fv_sc = self.fv_sc_conv(f_common) - f_v = F.relu(fv_weight + fv_sc) - - ######## merge ######## - f_attn = paddle.concat([f_h, f_v], axis=1) - f_attn = self.f_attn_conv(f_attn) - return f_attn - - -class SASTFPN(nn.Layer): - def __init__(self, in_channels, with_cab=False, **kwargs): - super(SASTFPN, self).__init__() - self.in_channels = in_channels - self.with_cab = with_cab - self.FPN_Down_Fusion = FPN_Down_Fusion(self.in_channels) - self.FPN_Up_Fusion = FPN_Up_Fusion(self.in_channels) - self.out_channels = 128 - self.cross_attention = Cross_Attention(self.out_channels) - - def forward(self, x): - #down fpn - f_down = self.FPN_Down_Fusion(x) - - #up fpn - f_up = self.FPN_Up_Fusion(x) - - #fusion - f_common = paddle.add(x=f_down, y=f_up) - f_common = F.relu(f_common) - - if self.with_cab: - # print('enhence f_common with CAB.') - f_common = self.cross_attention(f_common) - - return f_common diff --git a/backend/ppocr/modeling/necks/table_fpn.py b/backend/ppocr/modeling/necks/table_fpn.py deleted file mode 100644 index 734f15af..00000000 --- a/backend/ppocr/modeling/necks/table_fpn.py +++ /dev/null @@ -1,110 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class TableFPN(nn.Layer): - def __init__(self, in_channels, out_channels, **kwargs): - super(TableFPN, self).__init__() - self.out_channels = 512 - weight_attr = paddle.nn.initializer.KaimingUniform() - self.in2_conv = nn.Conv2D( - in_channels=in_channels[0], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.in3_conv = nn.Conv2D( - in_channels=in_channels[1], - out_channels=self.out_channels, - kernel_size=1, - stride = 1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.in4_conv = nn.Conv2D( - in_channels=in_channels[2], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.in5_conv = nn.Conv2D( - in_channels=in_channels[3], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p5_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p4_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p3_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p2_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.fuse_conv = nn.Conv2D( - in_channels=self.out_channels * 4, - out_channels=512, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), bias_attr=False) - - def forward(self, x): - c2, c3, c4, c5 = x - - in5 = self.in5_conv(c5) - in4 = self.in4_conv(c4) - in3 = self.in3_conv(c3) - in2 = self.in2_conv(c2) - - out4 = in4 + F.upsample( - in5, size=in4.shape[2:4], mode="nearest", align_mode=1) # 1/16 - out3 = in3 + F.upsample( - out4, size=in3.shape[2:4], mode="nearest", align_mode=1) # 1/8 - out2 = in2 + F.upsample( - out3, size=in2.shape[2:4], mode="nearest", align_mode=1) # 1/4 - - p4 = F.upsample(out4, size=in5.shape[2:4], mode="nearest", align_mode=1) - p3 = F.upsample(out3, size=in5.shape[2:4], mode="nearest", align_mode=1) - p2 = F.upsample(out2, size=in5.shape[2:4], mode="nearest", align_mode=1) - fuse = paddle.concat([in5, p4, p3, p2], axis=1) - fuse_conv = self.fuse_conv(fuse) * 0.005 - return [c5 + fuse_conv] diff --git a/backend/ppocr/modeling/transforms/__init__.py b/backend/ppocr/modeling/transforms/__init__.py deleted file mode 100755 index 405ab3cc..00000000 --- a/backend/ppocr/modeling/transforms/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__all__ = ['build_transform'] - - -def build_transform(config): - from .tps import TPS - from .stn import STN_ON - - support_dict = ['TPS', 'STN_ON'] - - module_name = config.pop('name') - assert module_name in support_dict, Exception( - 'transform only support {}'.format(support_dict)) - module_class = eval(module_name)(**config) - return module_class diff --git a/backend/ppocr/modeling/transforms/stn.py b/backend/ppocr/modeling/transforms/stn.py deleted file mode 100644 index 6f2bdda0..00000000 --- a/backend/ppocr/modeling/transforms/stn.py +++ /dev/null @@ -1,135 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/stn_head.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn, ParamAttr -from paddle.nn import functional as F -import numpy as np - -from .tps_spatial_transformer import TPSSpatialTransformer - - -def conv3x3_block(in_channels, out_channels, stride=1): - n = 3 * 3 * out_channels - w = math.sqrt(2. / n) - conv_layer = nn.Conv2D( - in_channels, - out_channels, - kernel_size=3, - stride=stride, - padding=1, - weight_attr=nn.initializer.Normal( - mean=0.0, std=w), - bias_attr=nn.initializer.Constant(0)) - block = nn.Sequential(conv_layer, nn.BatchNorm2D(out_channels), nn.ReLU()) - return block - - -class STN(nn.Layer): - def __init__(self, in_channels, num_ctrlpoints, activation='none'): - super(STN, self).__init__() - self.in_channels = in_channels - self.num_ctrlpoints = num_ctrlpoints - self.activation = activation - self.stn_convnet = nn.Sequential( - conv3x3_block(in_channels, 32), #32x64 - nn.MaxPool2D( - kernel_size=2, stride=2), - conv3x3_block(32, 64), #16x32 - nn.MaxPool2D( - kernel_size=2, stride=2), - conv3x3_block(64, 128), # 8*16 - nn.MaxPool2D( - kernel_size=2, stride=2), - conv3x3_block(128, 256), # 4*8 - nn.MaxPool2D( - kernel_size=2, stride=2), - conv3x3_block(256, 256), # 2*4, - nn.MaxPool2D( - kernel_size=2, stride=2), - conv3x3_block(256, 256)) # 1*2 - self.stn_fc1 = nn.Sequential( - nn.Linear( - 2 * 256, - 512, - weight_attr=nn.initializer.Normal(0, 0.001), - bias_attr=nn.initializer.Constant(0)), - nn.BatchNorm1D(512), - nn.ReLU()) - fc2_bias = self.init_stn() - self.stn_fc2 = nn.Linear( - 512, - num_ctrlpoints * 2, - weight_attr=nn.initializer.Constant(0.0), - bias_attr=nn.initializer.Assign(fc2_bias)) - - def init_stn(self): - margin = 0.01 - sampling_num_per_side = int(self.num_ctrlpoints / 2) - ctrl_pts_x = np.linspace(margin, 1. - margin, sampling_num_per_side) - ctrl_pts_y_top = np.ones(sampling_num_per_side) * margin - ctrl_pts_y_bottom = np.ones(sampling_num_per_side) * (1 - margin) - ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1) - ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1) - ctrl_points = np.concatenate( - [ctrl_pts_top, ctrl_pts_bottom], axis=0).astype(np.float32) - if self.activation == 'none': - pass - elif self.activation == 'sigmoid': - ctrl_points = -np.log(1. / ctrl_points - 1.) - ctrl_points = paddle.to_tensor(ctrl_points) - fc2_bias = paddle.reshape( - ctrl_points, shape=[ctrl_points.shape[0] * ctrl_points.shape[1]]) - return fc2_bias - - def forward(self, x): - x = self.stn_convnet(x) - batch_size, _, h, w = x.shape - x = paddle.reshape(x, shape=(batch_size, -1)) - img_feat = self.stn_fc1(x) - x = self.stn_fc2(0.1 * img_feat) - if self.activation == 'sigmoid': - x = F.sigmoid(x) - x = paddle.reshape(x, shape=[-1, self.num_ctrlpoints, 2]) - return img_feat, x - - -class STN_ON(nn.Layer): - def __init__(self, in_channels, tps_inputsize, tps_outputsize, - num_control_points, tps_margins, stn_activation): - super(STN_ON, self).__init__() - self.tps = TPSSpatialTransformer( - output_image_size=tuple(tps_outputsize), - num_control_points=num_control_points, - margins=tuple(tps_margins)) - self.stn_head = STN(in_channels=in_channels, - num_ctrlpoints=num_control_points, - activation=stn_activation) - self.tps_inputsize = tps_inputsize - self.out_channels = in_channels - - def forward(self, image): - stn_input = paddle.nn.functional.interpolate( - image, self.tps_inputsize, mode="bilinear", align_corners=True) - stn_img_feat, ctrl_points = self.stn_head(stn_input) - x, _ = self.tps(image, ctrl_points) - return x diff --git a/backend/ppocr/modeling/transforms/tps.py b/backend/ppocr/modeling/transforms/tps.py deleted file mode 100644 index 9bdab0f8..00000000 --- a/backend/ppocr/modeling/transforms/tps.py +++ /dev/null @@ -1,308 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/clovaai/deep-text-recognition-benchmark/blob/master/modules/transformation.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn, ParamAttr -from paddle.nn import functional as F -import numpy as np - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - bn_name = "bn_" + name - self.bn = nn.BatchNorm( - out_channels, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - return x - - -class LocalizationNetwork(nn.Layer): - def __init__(self, in_channels, num_fiducial, loc_lr, model_name): - super(LocalizationNetwork, self).__init__() - self.F = num_fiducial - F = num_fiducial - if model_name == "large": - num_filters_list = [64, 128, 256, 512] - fc_dim = 256 - else: - num_filters_list = [16, 32, 64, 128] - fc_dim = 64 - - self.block_list = [] - for fno in range(0, len(num_filters_list)): - num_filters = num_filters_list[fno] - name = "loc_conv%d" % fno - conv = self.add_sublayer( - name, - ConvBNLayer( - in_channels=in_channels, - out_channels=num_filters, - kernel_size=3, - act='relu', - name=name)) - self.block_list.append(conv) - if fno == len(num_filters_list) - 1: - pool = nn.AdaptiveAvgPool2D(1) - else: - pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0) - in_channels = num_filters - self.block_list.append(pool) - name = "loc_fc1" - stdv = 1.0 / math.sqrt(num_filters_list[-1] * 1.0) - self.fc1 = nn.Linear( - in_channels, - fc_dim, - weight_attr=ParamAttr( - learning_rate=loc_lr, - name=name + "_w", - initializer=nn.initializer.Uniform(-stdv, stdv)), - bias_attr=ParamAttr(name=name + '.b_0'), - name=name) - - # Init fc2 in LocalizationNetwork - initial_bias = self.get_initial_fiducials() - initial_bias = initial_bias.reshape(-1) - name = "loc_fc2" - param_attr = ParamAttr( - learning_rate=loc_lr, - initializer=nn.initializer.Assign(np.zeros([fc_dim, F * 2])), - name=name + "_w") - bias_attr = ParamAttr( - learning_rate=loc_lr, - initializer=nn.initializer.Assign(initial_bias), - name=name + "_b") - self.fc2 = nn.Linear( - fc_dim, - F * 2, - weight_attr=param_attr, - bias_attr=bias_attr, - name=name) - self.out_channels = F * 2 - - def forward(self, x): - """ - Estimating parameters of geometric transformation - Args: - image: input - Return: - batch_C_prime: the matrix of the geometric transformation - """ - B = x.shape[0] - i = 0 - for block in self.block_list: - x = block(x) - x = x.squeeze(axis=2).squeeze(axis=2) - x = self.fc1(x) - - x = F.relu(x) - x = self.fc2(x) - x = x.reshape(shape=[-1, self.F, 2]) - return x - - def get_initial_fiducials(self): - """ see RARE paper Fig. 6 (a) """ - F = self.F - ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2)) - ctrl_pts_y_top = np.linspace(0.0, -1.0, num=int(F / 2)) - ctrl_pts_y_bottom = np.linspace(1.0, 0.0, num=int(F / 2)) - ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1) - ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1) - initial_bias = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0) - return initial_bias - - -class GridGenerator(nn.Layer): - def __init__(self, in_channels, num_fiducial): - super(GridGenerator, self).__init__() - self.eps = 1e-6 - self.F = num_fiducial - - name = "ex_fc" - initializer = nn.initializer.Constant(value=0.0) - param_attr = ParamAttr( - learning_rate=0.0, initializer=initializer, name=name + "_w") - bias_attr = ParamAttr( - learning_rate=0.0, initializer=initializer, name=name + "_b") - self.fc = nn.Linear( - in_channels, - 6, - weight_attr=param_attr, - bias_attr=bias_attr, - name=name) - - def forward(self, batch_C_prime, I_r_size): - """ - Generate the grid for the grid_sampler. - Args: - batch_C_prime: the matrix of the geometric transformation - I_r_size: the shape of the input image - Return: - batch_P_prime: the grid for the grid_sampler - """ - C = self.build_C_paddle() - P = self.build_P_paddle(I_r_size) - - inv_delta_C_tensor = self.build_inv_delta_C_paddle(C).astype('float32') - P_hat_tensor = self.build_P_hat_paddle( - C, paddle.to_tensor(P)).astype('float32') - - inv_delta_C_tensor.stop_gradient = True - P_hat_tensor.stop_gradient = True - - batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime) - - batch_C_ex_part_tensor.stop_gradient = True - - batch_C_prime_with_zeros = paddle.concat( - [batch_C_prime, batch_C_ex_part_tensor], axis=1) - batch_T = paddle.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros) - batch_P_prime = paddle.matmul(P_hat_tensor, batch_T) - return batch_P_prime - - def build_C_paddle(self): - """ Return coordinates of fiducial points in I_r; C """ - F = self.F - ctrl_pts_x = paddle.linspace(-1.0, 1.0, int(F / 2), dtype='float64') - ctrl_pts_y_top = -1 * paddle.ones([int(F / 2)], dtype='float64') - ctrl_pts_y_bottom = paddle.ones([int(F / 2)], dtype='float64') - ctrl_pts_top = paddle.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1) - ctrl_pts_bottom = paddle.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1) - C = paddle.concat([ctrl_pts_top, ctrl_pts_bottom], axis=0) - return C # F x 2 - - def build_P_paddle(self, I_r_size): - I_r_height, I_r_width = I_r_size - I_r_grid_x = (paddle.arange( - -I_r_width, I_r_width, 2, dtype='float64') + 1.0 - ) / paddle.to_tensor(np.array([I_r_width])) - - I_r_grid_y = (paddle.arange( - -I_r_height, I_r_height, 2, dtype='float64') + 1.0 - ) / paddle.to_tensor(np.array([I_r_height])) - - # P: self.I_r_width x self.I_r_height x 2 - P = paddle.stack(paddle.meshgrid(I_r_grid_x, I_r_grid_y), axis=2) - P = paddle.transpose(P, perm=[1, 0, 2]) - # n (= self.I_r_width x self.I_r_height) x 2 - return P.reshape([-1, 2]) - - def build_inv_delta_C_paddle(self, C): - """ Return inv_delta_C which is needed to calculate T """ - F = self.F - hat_eye = paddle.eye(F, dtype='float64') # F x F - hat_C = paddle.norm( - C.reshape([1, F, 2]) - C.reshape([F, 1, 2]), axis=2) + hat_eye - hat_C = (hat_C**2) * paddle.log(hat_C) - delta_C = paddle.concat( # F+3 x F+3 - [ - paddle.concat( - [paddle.ones( - (F, 1), dtype='float64'), C, hat_C], axis=1), # F x F+3 - paddle.concat( - [ - paddle.zeros( - (2, 3), dtype='float64'), paddle.transpose( - C, perm=[1, 0]) - ], - axis=1), # 2 x F+3 - paddle.concat( - [ - paddle.zeros( - (1, 3), dtype='float64'), paddle.ones( - (1, F), dtype='float64') - ], - axis=1) # 1 x F+3 - ], - axis=0) - inv_delta_C = paddle.inverse(delta_C) - return inv_delta_C # F+3 x F+3 - - def build_P_hat_paddle(self, C, P): - F = self.F - eps = self.eps - n = P.shape[0] # n (= self.I_r_width x self.I_r_height) - # P_tile: n x 2 -> n x 1 x 2 -> n x F x 2 - P_tile = paddle.tile(paddle.unsqueeze(P, axis=1), (1, F, 1)) - C_tile = paddle.unsqueeze(C, axis=0) # 1 x F x 2 - P_diff = P_tile - C_tile # n x F x 2 - # rbf_norm: n x F - rbf_norm = paddle.norm(P_diff, p=2, axis=2, keepdim=False) - - # rbf: n x F - rbf = paddle.multiply( - paddle.square(rbf_norm), paddle.log(rbf_norm + eps)) - P_hat = paddle.concat( - [paddle.ones( - (n, 1), dtype='float64'), P, rbf], axis=1) - return P_hat # n x F+3 - - def get_expand_tensor(self, batch_C_prime): - B, H, C = batch_C_prime.shape - batch_C_prime = batch_C_prime.reshape([B, H * C]) - batch_C_ex_part_tensor = self.fc(batch_C_prime) - batch_C_ex_part_tensor = batch_C_ex_part_tensor.reshape([-1, 3, 2]) - return batch_C_ex_part_tensor - - -class TPS(nn.Layer): - def __init__(self, in_channels, num_fiducial, loc_lr, model_name): - super(TPS, self).__init__() - self.loc_net = LocalizationNetwork(in_channels, num_fiducial, loc_lr, - model_name) - self.grid_generator = GridGenerator(self.loc_net.out_channels, - num_fiducial) - self.out_channels = in_channels - - def forward(self, image): - image.stop_gradient = False - batch_C_prime = self.loc_net(image) - batch_P_prime = self.grid_generator(batch_C_prime, image.shape[2:]) - batch_P_prime = batch_P_prime.reshape( - [-1, image.shape[2], image.shape[3], 2]) - batch_I_r = F.grid_sample(x=image, grid=batch_P_prime) - return batch_I_r diff --git a/backend/ppocr/modeling/transforms/tps_spatial_transformer.py b/backend/ppocr/modeling/transforms/tps_spatial_transformer.py deleted file mode 100644 index cb1cb10a..00000000 --- a/backend/ppocr/modeling/transforms/tps_spatial_transformer.py +++ /dev/null @@ -1,156 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/tps_spatial_transformer.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn, ParamAttr -from paddle.nn import functional as F -import numpy as np -import itertools - - -def grid_sample(input, grid, canvas=None): - input.stop_gradient = False - output = F.grid_sample(input, grid) - if canvas is None: - return output - else: - input_mask = paddle.ones(shape=input.shape) - output_mask = F.grid_sample(input_mask, grid) - padded_output = output * output_mask + canvas * (1 - output_mask) - return padded_output - - -# phi(x1, x2) = r^2 * log(r), where r = ||x1 - x2||_2 -def compute_partial_repr(input_points, control_points): - N = input_points.shape[0] - M = control_points.shape[0] - pairwise_diff = paddle.reshape( - input_points, shape=[N, 1, 2]) - paddle.reshape( - control_points, shape=[1, M, 2]) - # original implementation, very slow - # pairwise_dist = torch.sum(pairwise_diff ** 2, dim = 2) # square of distance - pairwise_diff_square = pairwise_diff * pairwise_diff - pairwise_dist = pairwise_diff_square[:, :, 0] + pairwise_diff_square[:, :, - 1] - repr_matrix = 0.5 * pairwise_dist * paddle.log(pairwise_dist) - # fix numerical error for 0 * log(0), substitute all nan with 0 - mask = np.array(repr_matrix != repr_matrix) - repr_matrix[mask] = 0 - return repr_matrix - - -# output_ctrl_pts are specified, according to our task. -def build_output_control_points(num_control_points, margins): - margin_x, margin_y = margins - num_ctrl_pts_per_side = num_control_points // 2 - ctrl_pts_x = np.linspace(margin_x, 1.0 - margin_x, num_ctrl_pts_per_side) - ctrl_pts_y_top = np.ones(num_ctrl_pts_per_side) * margin_y - ctrl_pts_y_bottom = np.ones(num_ctrl_pts_per_side) * (1.0 - margin_y) - ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1) - ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1) - output_ctrl_pts_arr = np.concatenate( - [ctrl_pts_top, ctrl_pts_bottom], axis=0) - output_ctrl_pts = paddle.to_tensor(output_ctrl_pts_arr) - return output_ctrl_pts - - -class TPSSpatialTransformer(nn.Layer): - def __init__(self, - output_image_size=None, - num_control_points=None, - margins=None): - super(TPSSpatialTransformer, self).__init__() - self.output_image_size = output_image_size - self.num_control_points = num_control_points - self.margins = margins - - self.target_height, self.target_width = output_image_size - target_control_points = build_output_control_points(num_control_points, - margins) - N = num_control_points - - # create padded kernel matrix - forward_kernel = paddle.zeros(shape=[N + 3, N + 3]) - target_control_partial_repr = compute_partial_repr( - target_control_points, target_control_points) - target_control_partial_repr = paddle.cast(target_control_partial_repr, - forward_kernel.dtype) - forward_kernel[:N, :N] = target_control_partial_repr - forward_kernel[:N, -3] = 1 - forward_kernel[-3, :N] = 1 - target_control_points = paddle.cast(target_control_points, - forward_kernel.dtype) - forward_kernel[:N, -2:] = target_control_points - forward_kernel[-2:, :N] = paddle.transpose( - target_control_points, perm=[1, 0]) - # compute inverse matrix - inverse_kernel = paddle.inverse(forward_kernel) - - # create target cordinate matrix - HW = self.target_height * self.target_width - target_coordinate = list( - itertools.product( - range(self.target_height), range(self.target_width))) - target_coordinate = paddle.to_tensor(target_coordinate) # HW x 2 - Y, X = paddle.split( - target_coordinate, target_coordinate.shape[1], axis=1) - Y = Y / (self.target_height - 1) - X = X / (self.target_width - 1) - target_coordinate = paddle.concat( - [X, Y], axis=1) # convert from (y, x) to (x, y) - target_coordinate_partial_repr = compute_partial_repr( - target_coordinate, target_control_points) - target_coordinate_repr = paddle.concat( - [ - target_coordinate_partial_repr, paddle.ones(shape=[HW, 1]), - target_coordinate - ], - axis=1) - - # register precomputed matrices - self.inverse_kernel = inverse_kernel - self.padding_matrix = paddle.zeros(shape=[3, 2]) - self.target_coordinate_repr = target_coordinate_repr - self.target_control_points = target_control_points - - def forward(self, input, source_control_points): - assert source_control_points.ndimension() == 3 - assert source_control_points.shape[1] == self.num_control_points - assert source_control_points.shape[2] == 2 - batch_size = paddle.shape(source_control_points)[0] - - padding_matrix = paddle.expand( - self.padding_matrix, shape=[batch_size, 3, 2]) - Y = paddle.concat([source_control_points, padding_matrix], 1) - mapping_matrix = paddle.matmul(self.inverse_kernel, Y) - source_coordinate = paddle.matmul(self.target_coordinate_repr, - mapping_matrix) - - grid = paddle.reshape( - source_coordinate, - shape=[-1, self.target_height, self.target_width, 2]) - grid = paddle.clip(grid, 0, - 1) # the source_control_points may be out of [0, 1]. - # the input to grid_sample is normalized [-1, 1], but what we get is [0, 1] - grid = 2.0 * grid - 1.0 - output_maps = grid_sample(input, grid, canvas=None) - return output_maps, source_coordinate diff --git a/backend/ppocr/optimizer/__init__.py b/backend/ppocr/optimizer/__init__.py deleted file mode 100644 index a6bd2ebb..00000000 --- a/backend/ppocr/optimizer/__init__.py +++ /dev/null @@ -1,62 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -import copy -import paddle - -__all__ = ['build_optimizer'] - - -def build_lr_scheduler(lr_config, epochs, step_each_epoch): - from . import learning_rate - lr_config.update({'epochs': epochs, 'step_each_epoch': step_each_epoch}) - lr_name = lr_config.pop('name', 'Const') - lr = getattr(learning_rate, lr_name)(**lr_config)() - return lr - - -def build_optimizer(config, epochs, step_each_epoch, model): - from . import regularizer, optimizer - config = copy.deepcopy(config) - # step1 build lr - lr = build_lr_scheduler(config.pop('lr'), epochs, step_each_epoch) - - # step2 build regularization - if 'regularizer' in config and config['regularizer'] is not None: - reg_config = config.pop('regularizer') - reg_name = reg_config.pop('name') - if not hasattr(regularizer, reg_name): - reg_name += 'Decay' - reg = getattr(regularizer, reg_name)(**reg_config)() - elif 'weight_decay' in config: - reg = config.pop('weight_decay') - else: - reg = None - - # step3 build optimizer - optim_name = config.pop('name') - if 'clip_norm' in config: - clip_norm = config.pop('clip_norm') - grad_clip = paddle.nn.ClipGradByNorm(clip_norm=clip_norm) - else: - grad_clip = None - optim = getattr(optimizer, optim_name)(learning_rate=lr, - weight_decay=reg, - grad_clip=grad_clip, - **config) - return optim(model), lr diff --git a/backend/ppocr/optimizer/learning_rate.py b/backend/ppocr/optimizer/learning_rate.py deleted file mode 100644 index fe251f36..00000000 --- a/backend/ppocr/optimizer/learning_rate.py +++ /dev/null @@ -1,310 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from paddle.optimizer import lr -from .lr_scheduler import CyclicalCosineDecay, OneCycleDecay - - -class Linear(object): - """ - Linear learning rate decay - Args: - lr (float): The initial learning rate. It is a python float number. - epochs(int): The decay step size. It determines the decay cycle. - end_lr(float, optional): The minimum final learning rate. Default: 0.0001. - power(float, optional): Power of polynomial. Default: 1.0. - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - """ - - def __init__(self, - learning_rate, - epochs, - step_each_epoch, - end_lr=0.0, - power=1.0, - warmup_epoch=0, - last_epoch=-1, - **kwargs): - super(Linear, self).__init__() - self.learning_rate = learning_rate - self.epochs = epochs * step_each_epoch - self.end_lr = end_lr - self.power = power - self.last_epoch = last_epoch - self.warmup_epoch = round(warmup_epoch * step_each_epoch) - - def __call__(self): - learning_rate = lr.PolynomialDecay( - learning_rate=self.learning_rate, - decay_steps=self.epochs, - end_lr=self.end_lr, - power=self.power, - last_epoch=self.last_epoch) - if self.warmup_epoch > 0: - learning_rate = lr.LinearWarmup( - learning_rate=learning_rate, - warmup_steps=self.warmup_epoch, - start_lr=0.0, - end_lr=self.learning_rate, - last_epoch=self.last_epoch) - return learning_rate - - -class Cosine(object): - """ - Cosine learning rate decay - lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1) - Args: - lr(float): initial learning rate - step_each_epoch(int): steps each epoch - epochs(int): total training epochs - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - """ - - def __init__(self, - learning_rate, - step_each_epoch, - epochs, - warmup_epoch=0, - last_epoch=-1, - **kwargs): - super(Cosine, self).__init__() - self.learning_rate = learning_rate - self.T_max = step_each_epoch * epochs - self.last_epoch = last_epoch - self.warmup_epoch = round(warmup_epoch * step_each_epoch) - - def __call__(self): - learning_rate = lr.CosineAnnealingDecay( - learning_rate=self.learning_rate, - T_max=self.T_max, - last_epoch=self.last_epoch) - if self.warmup_epoch > 0: - learning_rate = lr.LinearWarmup( - learning_rate=learning_rate, - warmup_steps=self.warmup_epoch, - start_lr=0.0, - end_lr=self.learning_rate, - last_epoch=self.last_epoch) - return learning_rate - - -class Step(object): - """ - Piecewise learning rate decay - Args: - step_each_epoch(int): steps each epoch - learning_rate (float): The initial learning rate. It is a python float number. - step_size (int): the interval to update. - gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` . - It should be less than 1.0. Default: 0.1. - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - """ - - def __init__(self, - learning_rate, - step_size, - step_each_epoch, - gamma, - warmup_epoch=0, - last_epoch=-1, - **kwargs): - super(Step, self).__init__() - self.step_size = step_each_epoch * step_size - self.learning_rate = learning_rate - self.gamma = gamma - self.last_epoch = last_epoch - self.warmup_epoch = round(warmup_epoch * step_each_epoch) - - def __call__(self): - learning_rate = lr.StepDecay( - learning_rate=self.learning_rate, - step_size=self.step_size, - gamma=self.gamma, - last_epoch=self.last_epoch) - if self.warmup_epoch > 0: - learning_rate = lr.LinearWarmup( - learning_rate=learning_rate, - warmup_steps=self.warmup_epoch, - start_lr=0.0, - end_lr=self.learning_rate, - last_epoch=self.last_epoch) - return learning_rate - - -class Piecewise(object): - """ - Piecewise learning rate decay - Args: - boundaries(list): A list of steps numbers. The type of element in the list is python int. - values(list): A list of learning rate values that will be picked during different epoch boundaries. - The type of element in the list is python float. - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - """ - - def __init__(self, - step_each_epoch, - decay_epochs, - values, - warmup_epoch=0, - last_epoch=-1, - **kwargs): - super(Piecewise, self).__init__() - self.boundaries = [step_each_epoch * e for e in decay_epochs] - self.values = values - self.last_epoch = last_epoch - self.warmup_epoch = round(warmup_epoch * step_each_epoch) - - def __call__(self): - learning_rate = lr.PiecewiseDecay( - boundaries=self.boundaries, - values=self.values, - last_epoch=self.last_epoch) - if self.warmup_epoch > 0: - learning_rate = lr.LinearWarmup( - learning_rate=learning_rate, - warmup_steps=self.warmup_epoch, - start_lr=0.0, - end_lr=self.values[0], - last_epoch=self.last_epoch) - return learning_rate - - -class CyclicalCosine(object): - """ - Cyclical cosine learning rate decay - Args: - learning_rate(float): initial learning rate - step_each_epoch(int): steps each epoch - epochs(int): total training epochs - cycle(int): period of the cosine learning rate - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - """ - - def __init__(self, - learning_rate, - step_each_epoch, - epochs, - cycle, - warmup_epoch=0, - last_epoch=-1, - **kwargs): - super(CyclicalCosine, self).__init__() - self.learning_rate = learning_rate - self.T_max = step_each_epoch * epochs - self.last_epoch = last_epoch - self.warmup_epoch = round(warmup_epoch * step_each_epoch) - self.cycle = round(cycle * step_each_epoch) - - def __call__(self): - learning_rate = CyclicalCosineDecay( - learning_rate=self.learning_rate, - T_max=self.T_max, - cycle=self.cycle, - last_epoch=self.last_epoch) - if self.warmup_epoch > 0: - learning_rate = lr.LinearWarmup( - learning_rate=learning_rate, - warmup_steps=self.warmup_epoch, - start_lr=0.0, - end_lr=self.learning_rate, - last_epoch=self.last_epoch) - return learning_rate - - -class OneCycle(object): - """ - One Cycle learning rate decay - Args: - max_lr(float): Upper learning rate boundaries - epochs(int): total training epochs - step_each_epoch(int): steps each epoch - anneal_strategy(str): {‘cos’, ‘linear’} Specifies the annealing strategy: “cos” for cosine annealing, “linear” for linear annealing. - Default: ‘cos’ - three_phase(bool): If True, use a third phase of the schedule to annihilate the learning rate according to ‘final_div_factor’ - instead of modifying the second phase (the first two phases will be symmetrical about the step indicated by ‘pct_start’). - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - """ - - def __init__(self, - max_lr, - epochs, - step_each_epoch, - anneal_strategy='cos', - three_phase=False, - warmup_epoch=0, - last_epoch=-1, - **kwargs): - super(OneCycle, self).__init__() - self.max_lr = max_lr - self.epochs = epochs - self.steps_per_epoch = step_each_epoch - self.anneal_strategy = anneal_strategy - self.three_phase = three_phase - self.last_epoch = last_epoch - self.warmup_epoch = round(warmup_epoch * step_each_epoch) - - def __call__(self): - learning_rate = OneCycleDecay( - max_lr=self.max_lr, - epochs=self.epochs, - steps_per_epoch=self.steps_per_epoch, - anneal_strategy=self.anneal_strategy, - three_phase=self.three_phase, - last_epoch=self.last_epoch) - if self.warmup_epoch > 0: - learning_rate = lr.LinearWarmup( - learning_rate=learning_rate, - warmup_steps=self.warmup_epoch, - start_lr=0.0, - end_lr=self.max_lr, - last_epoch=self.last_epoch) - return learning_rate - - -class Const(object): - """ - Const learning rate decay - Args: - learning_rate(float): initial learning rate - step_each_epoch(int): steps each epoch - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - """ - - def __init__(self, - learning_rate, - step_each_epoch, - warmup_epoch=0, - last_epoch=-1, - **kwargs): - super(Const, self).__init__() - self.learning_rate = learning_rate - self.last_epoch = last_epoch - self.warmup_epoch = round(warmup_epoch * step_each_epoch) - - def __call__(self): - learning_rate = self.learning_rate - if self.warmup_epoch > 0: - learning_rate = lr.LinearWarmup( - learning_rate=learning_rate, - warmup_steps=self.warmup_epoch, - start_lr=0.0, - end_lr=self.learning_rate, - last_epoch=self.last_epoch) - return learning_rate diff --git a/backend/ppocr/optimizer/lr_scheduler.py b/backend/ppocr/optimizer/lr_scheduler.py deleted file mode 100644 index f62f1f3b..00000000 --- a/backend/ppocr/optimizer/lr_scheduler.py +++ /dev/null @@ -1,162 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -from paddle.optimizer.lr import LRScheduler - - -class CyclicalCosineDecay(LRScheduler): - def __init__(self, - learning_rate, - T_max, - cycle=1, - last_epoch=-1, - eta_min=0.0, - verbose=False): - """ - Cyclical cosine learning rate decay - A learning rate which can be referred in https://arxiv.org/pdf/2012.12645.pdf - Args: - learning rate(float): learning rate - T_max(int): maximum epoch num - cycle(int): period of the cosine decay - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - eta_min(float): minimum learning rate during training - verbose(bool): whether to print learning rate for each epoch - """ - super(CyclicalCosineDecay, self).__init__(learning_rate, last_epoch, - verbose) - self.cycle = cycle - self.eta_min = eta_min - - def get_lr(self): - if self.last_epoch == 0: - return self.base_lr - reletive_epoch = self.last_epoch % self.cycle - lr = self.eta_min + 0.5 * (self.base_lr - self.eta_min) * \ - (1 + math.cos(math.pi * reletive_epoch / self.cycle)) - return lr - - -class OneCycleDecay(LRScheduler): - """ - One Cycle learning rate decay - A learning rate which can be referred in https://arxiv.org/abs/1708.07120 - Code refered in https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR - """ - - def __init__(self, - max_lr, - epochs=None, - steps_per_epoch=None, - pct_start=0.3, - anneal_strategy='cos', - div_factor=25., - final_div_factor=1e4, - three_phase=False, - last_epoch=-1, - verbose=False): - - # Validate total_steps - if epochs <= 0 or not isinstance(epochs, int): - raise ValueError( - "Expected positive integer epochs, but got {}".format(epochs)) - if steps_per_epoch <= 0 or not isinstance(steps_per_epoch, int): - raise ValueError( - "Expected positive integer steps_per_epoch, but got {}".format( - steps_per_epoch)) - self.total_steps = epochs * steps_per_epoch - - self.max_lr = max_lr - self.initial_lr = self.max_lr / div_factor - self.min_lr = self.initial_lr / final_div_factor - - if three_phase: - self._schedule_phases = [ - { - 'end_step': float(pct_start * self.total_steps) - 1, - 'start_lr': self.initial_lr, - 'end_lr': self.max_lr, - }, - { - 'end_step': float(2 * pct_start * self.total_steps) - 2, - 'start_lr': self.max_lr, - 'end_lr': self.initial_lr, - }, - { - 'end_step': self.total_steps - 1, - 'start_lr': self.initial_lr, - 'end_lr': self.min_lr, - }, - ] - else: - self._schedule_phases = [ - { - 'end_step': float(pct_start * self.total_steps) - 1, - 'start_lr': self.initial_lr, - 'end_lr': self.max_lr, - }, - { - 'end_step': self.total_steps - 1, - 'start_lr': self.max_lr, - 'end_lr': self.min_lr, - }, - ] - - # Validate pct_start - if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float): - raise ValueError( - "Expected float between 0 and 1 pct_start, but got {}".format( - pct_start)) - - # Validate anneal_strategy - if anneal_strategy not in ['cos', 'linear']: - raise ValueError( - "anneal_strategy must by one of 'cos' or 'linear', instead got {}". - format(anneal_strategy)) - elif anneal_strategy == 'cos': - self.anneal_func = self._annealing_cos - elif anneal_strategy == 'linear': - self.anneal_func = self._annealing_linear - - super(OneCycleDecay, self).__init__(max_lr, last_epoch, verbose) - - def _annealing_cos(self, start, end, pct): - "Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0." - cos_out = math.cos(math.pi * pct) + 1 - return end + (start - end) / 2.0 * cos_out - - def _annealing_linear(self, start, end, pct): - "Linearly anneal from `start` to `end` as pct goes from 0.0 to 1.0." - return (end - start) * pct + start - - def get_lr(self): - computed_lr = 0.0 - step_num = self.last_epoch - - if step_num > self.total_steps: - raise ValueError( - "Tried to step {} times. The specified number of total steps is {}" - .format(step_num + 1, self.total_steps)) - start_step = 0 - for i, phase in enumerate(self._schedule_phases): - end_step = phase['end_step'] - if step_num <= end_step or i == len(self._schedule_phases) - 1: - pct = (step_num - start_step) / (end_step - start_step) - computed_lr = self.anneal_func(phase['start_lr'], - phase['end_lr'], pct) - break - start_step = phase['end_step'] - - return computed_lr diff --git a/backend/ppocr/optimizer/optimizer.py b/backend/ppocr/optimizer/optimizer.py deleted file mode 100644 index dd8544e2..00000000 --- a/backend/ppocr/optimizer/optimizer.py +++ /dev/null @@ -1,234 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from paddle import optimizer as optim - - -class Momentum(object): - """ - Simple Momentum optimizer with velocity state. - Args: - learning_rate (float|Variable) - The learning rate used to update parameters. - Can be a float value or a Variable with one float value as data element. - momentum (float) - Momentum factor. - regularization (WeightDecayRegularizer, optional) - The strategy of regularization. - """ - - def __init__(self, - learning_rate, - momentum, - weight_decay=None, - grad_clip=None, - **args): - super(Momentum, self).__init__() - self.learning_rate = learning_rate - self.momentum = momentum - self.weight_decay = weight_decay - self.grad_clip = grad_clip - - def __call__(self, model): - train_params = [ - param for param in model.parameters() if param.trainable is True - ] - opt = optim.Momentum( - learning_rate=self.learning_rate, - momentum=self.momentum, - weight_decay=self.weight_decay, - grad_clip=self.grad_clip, - parameters=train_params) - return opt - - -class Adam(object): - def __init__(self, - learning_rate=0.001, - beta1=0.9, - beta2=0.999, - epsilon=1e-08, - parameter_list=None, - weight_decay=None, - grad_clip=None, - name=None, - lazy_mode=False, - **kwargs): - self.learning_rate = learning_rate - self.beta1 = beta1 - self.beta2 = beta2 - self.epsilon = epsilon - self.parameter_list = parameter_list - self.learning_rate = learning_rate - self.weight_decay = weight_decay - self.grad_clip = grad_clip - self.name = name - self.lazy_mode = lazy_mode - - def __call__(self, model): - train_params = [ - param for param in model.parameters() if param.trainable is True - ] - opt = optim.Adam( - learning_rate=self.learning_rate, - beta1=self.beta1, - beta2=self.beta2, - epsilon=self.epsilon, - weight_decay=self.weight_decay, - grad_clip=self.grad_clip, - name=self.name, - lazy_mode=self.lazy_mode, - parameters=train_params) - return opt - - -class RMSProp(object): - """ - Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method. - Args: - learning_rate (float|Variable) - The learning rate used to update parameters. - Can be a float value or a Variable with one float value as data element. - momentum (float) - Momentum factor. - rho (float) - rho value in equation. - epsilon (float) - avoid division by zero, default is 1e-6. - regularization (WeightDecayRegularizer, optional) - The strategy of regularization. - """ - - def __init__(self, - learning_rate, - momentum=0.0, - rho=0.95, - epsilon=1e-6, - weight_decay=None, - grad_clip=None, - **args): - super(RMSProp, self).__init__() - self.learning_rate = learning_rate - self.momentum = momentum - self.rho = rho - self.epsilon = epsilon - self.weight_decay = weight_decay - self.grad_clip = grad_clip - - def __call__(self, model): - train_params = [ - param for param in model.parameters() if param.trainable is True - ] - opt = optim.RMSProp( - learning_rate=self.learning_rate, - momentum=self.momentum, - rho=self.rho, - epsilon=self.epsilon, - weight_decay=self.weight_decay, - grad_clip=self.grad_clip, - parameters=train_params) - return opt - - -class Adadelta(object): - def __init__(self, - learning_rate=0.001, - epsilon=1e-08, - rho=0.95, - parameter_list=None, - weight_decay=None, - grad_clip=None, - name=None, - **kwargs): - self.learning_rate = learning_rate - self.epsilon = epsilon - self.rho = rho - self.parameter_list = parameter_list - self.learning_rate = learning_rate - self.weight_decay = weight_decay - self.grad_clip = grad_clip - self.name = name - - def __call__(self, model): - train_params = [ - param for param in model.parameters() if param.trainable is True - ] - opt = optim.Adadelta( - learning_rate=self.learning_rate, - epsilon=self.epsilon, - rho=self.rho, - weight_decay=self.weight_decay, - grad_clip=self.grad_clip, - name=self.name, - parameters=train_params) - return opt - - -class AdamW(object): - def __init__(self, - learning_rate=0.001, - beta1=0.9, - beta2=0.999, - epsilon=1e-8, - weight_decay=0.01, - multi_precision=False, - grad_clip=None, - no_weight_decay_name=None, - one_dim_param_no_weight_decay=False, - name=None, - lazy_mode=False, - **args): - super().__init__() - self.learning_rate = learning_rate - self.beta1 = beta1 - self.beta2 = beta2 - self.epsilon = epsilon - self.grad_clip = grad_clip - self.weight_decay = 0.01 if weight_decay is None else weight_decay - self.grad_clip = grad_clip - self.name = name - self.lazy_mode = lazy_mode - self.multi_precision = multi_precision - self.no_weight_decay_name_list = no_weight_decay_name.split( - ) if no_weight_decay_name else [] - self.one_dim_param_no_weight_decay = one_dim_param_no_weight_decay - - def __call__(self, model): - parameters = [ - param for param in model.parameters() if param.trainable is True - ] - - self.no_weight_decay_param_name_list = [ - p.name for n, p in model.named_parameters() - if any(nd in n for nd in self.no_weight_decay_name_list) - ] - - if self.one_dim_param_no_weight_decay: - self.no_weight_decay_param_name_list += [ - p.name for n, p in model.named_parameters() if len(p.shape) == 1 - ] - - opt = optim.AdamW( - learning_rate=self.learning_rate, - beta1=self.beta1, - beta2=self.beta2, - epsilon=self.epsilon, - parameters=parameters, - weight_decay=self.weight_decay, - multi_precision=self.multi_precision, - grad_clip=self.grad_clip, - name=self.name, - lazy_mode=self.lazy_mode, - apply_decay_param_fun=self._apply_decay_param_fun) - return opt - - def _apply_decay_param_fun(self, name): - return name not in self.no_weight_decay_param_name_list diff --git a/backend/ppocr/optimizer/regularizer.py b/backend/ppocr/optimizer/regularizer.py deleted file mode 100644 index 2ce68f71..00000000 --- a/backend/ppocr/optimizer/regularizer.py +++ /dev/null @@ -1,51 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle - - -class L1Decay(object): - """ - L1 Weight Decay Regularization, which encourages the weights to be sparse. - Args: - factor(float): regularization coeff. Default:0.0. - """ - - def __init__(self, factor=0.0): - super(L1Decay, self).__init__() - self.coeff = factor - - def __call__(self): - reg = paddle.regularizer.L1Decay(self.coeff) - return reg - - -class L2Decay(object): - """ - L2 Weight Decay Regularization, which helps to prevent the model over-fitting. - Args: - factor(float): regularization coeff. Default:0.0. - """ - - def __init__(self, factor=0.0): - super(L2Decay, self).__init__() - self.coeff = float(factor) - - def __call__(self): - return self.coeff \ No newline at end of file diff --git a/backend/ppocr/postprocess/__init__.py b/backend/ppocr/postprocess/__init__.py deleted file mode 100644 index f50b5f1c..00000000 --- a/backend/ppocr/postprocess/__init__.py +++ /dev/null @@ -1,61 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import copy - -__all__ = ['build_post_process'] - -from .db_postprocess import DBPostProcess, DistillationDBPostProcess -from .east_postprocess import EASTPostProcess -from .sast_postprocess import SASTPostProcess -from .fce_postprocess import FCEPostProcess -from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, \ - DistillationCTCLabelDecode, TableLabelDecode, NRTRLabelDecode, SARLabelDecode, \ - SEEDLabelDecode, PRENLabelDecode -from .cls_postprocess import ClsPostProcess -from .pg_postprocess import PGPostProcess -from .vqa_token_ser_layoutlm_postprocess import VQASerTokenLayoutLMPostProcess -from .vqa_token_re_layoutlm_postprocess import VQAReTokenLayoutLMPostProcess - - -def build_post_process(config, global_config=None): - support_dict = [ - 'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'FCEPostProcess', - 'CTCLabelDecode', 'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', - 'PGPostProcess', 'DistillationCTCLabelDecode', 'TableLabelDecode', - 'DistillationDBPostProcess', 'NRTRLabelDecode', 'SARLabelDecode', - 'SEEDLabelDecode', 'VQASerTokenLayoutLMPostProcess', - 'VQAReTokenLayoutLMPostProcess', 'PRENLabelDecode', - 'DistillationSARLabelDecode' - ] - - if config['name'] == 'PSEPostProcess': - from .pse_postprocess import PSEPostProcess - support_dict.append('PSEPostProcess') - - config = copy.deepcopy(config) - module_name = config.pop('name') - if module_name == "None": - return - if global_config is not None: - config.update(global_config) - assert module_name in support_dict, Exception( - 'post process only support {}'.format(support_dict)) - module_class = eval(module_name)(**config) - return module_class diff --git a/backend/ppocr/postprocess/cls_postprocess.py b/backend/ppocr/postprocess/cls_postprocess.py deleted file mode 100644 index 9a27ba08..00000000 --- a/backend/ppocr/postprocess/cls_postprocess.py +++ /dev/null @@ -1,42 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle - - -class ClsPostProcess(object): - """ Convert between text-label and text-index """ - - def __init__(self, label_list=None, key=None, **kwargs): - super(ClsPostProcess, self).__init__() - self.label_list = label_list - self.key = key - - def __call__(self, preds, label=None, *args, **kwargs): - if self.key is not None: - preds = preds[self.key] - - label_list = self.label_list - if label_list is None: - label_list = {idx: idx for idx in range(preds.shape[-1])} - - if isinstance(preds, paddle.Tensor): - preds = preds.numpy() - - pred_idxs = preds.argmax(axis=1) - decode_out = [(label_list[idx], preds[i, idx]) - for i, idx in enumerate(pred_idxs)] - if label is None: - return decode_out - label = [(label_list[idx], 1.0) for idx in label] - return decode_out, label diff --git a/backend/ppocr/postprocess/db_postprocess.py b/backend/ppocr/postprocess/db_postprocess.py deleted file mode 100755 index 6542a1bf..00000000 --- a/backend/ppocr/postprocess/db_postprocess.py +++ /dev/null @@ -1,220 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refered from: -https://github.com/WenmuZhou/DBNet.pytorch/blob/master/post_processing/seg_detector_representer.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import cv2 -import paddle -from shapely.geometry import Polygon -import pyclipper - - -class DBPostProcess(object): - """ - The post process for Differentiable Binarization (DB). - """ - - def __init__(self, - thresh=0.3, - box_thresh=0.7, - max_candidates=1000, - unclip_ratio=2.0, - use_dilation=False, - score_mode="fast", - **kwargs): - self.thresh = thresh - self.box_thresh = box_thresh - self.max_candidates = max_candidates - self.unclip_ratio = unclip_ratio - self.min_size = 3 - self.score_mode = score_mode - assert score_mode in [ - "slow", "fast" - ], "Score mode must be in [slow, fast] but got: {}".format(score_mode) - - self.dilation_kernel = None if not use_dilation else np.array( - [[1, 1], [1, 1]]) - - def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): - ''' - _bitmap: single map with shape (1, H, W), - whose values are binarized as {0, 1} - ''' - - bitmap = _bitmap - height, width = bitmap.shape - - outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, - cv2.CHAIN_APPROX_SIMPLE) - if len(outs) == 3: - img, contours, _ = outs[0], outs[1], outs[2] - elif len(outs) == 2: - contours, _ = outs[0], outs[1] - - num_contours = min(len(contours), self.max_candidates) - - boxes = [] - scores = [] - for index in range(num_contours): - contour = contours[index] - points, sside = self.get_mini_boxes(contour) - if sside < self.min_size: - continue - points = np.array(points) - if self.score_mode == "fast": - score = self.box_score_fast(pred, points.reshape(-1, 2)) - else: - score = self.box_score_slow(pred, contour) - if self.box_thresh > score: - continue - - box = self.unclip(points).reshape(-1, 1, 2) - box, sside = self.get_mini_boxes(box) - if sside < self.min_size + 2: - continue - box = np.array(box) - - box[:, 0] = np.clip( - np.round(box[:, 0] / width * dest_width), 0, dest_width) - box[:, 1] = np.clip( - np.round(box[:, 1] / height * dest_height), 0, dest_height) - boxes.append(box.astype(np.int16)) - scores.append(score) - return np.array(boxes, dtype=np.int16), scores - - def unclip(self, box): - unclip_ratio = self.unclip_ratio - poly = Polygon(box) - distance = poly.area * unclip_ratio / poly.length - offset = pyclipper.PyclipperOffset() - offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) - expanded = np.array(offset.Execute(distance)) - return expanded - - def get_mini_boxes(self, contour): - bounding_box = cv2.minAreaRect(contour) - points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) - - index_1, index_2, index_3, index_4 = 0, 1, 2, 3 - if points[1][1] > points[0][1]: - index_1 = 0 - index_4 = 1 - else: - index_1 = 1 - index_4 = 0 - if points[3][1] > points[2][1]: - index_2 = 2 - index_3 = 3 - else: - index_2 = 3 - index_3 = 2 - - box = [ - points[index_1], points[index_2], points[index_3], points[index_4] - ] - return box, min(bounding_box[1]) - - def box_score_fast(self, bitmap, _box): - ''' - box_score_fast: use bbox mean score as the mean score - ''' - h, w = bitmap.shape[:2] - box = _box.copy() - xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1) - xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1) - ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1) - ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1) - - mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) - box[:, 0] = box[:, 0] - xmin - box[:, 1] = box[:, 1] - ymin - cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) - return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] - - def box_score_slow(self, bitmap, contour): - ''' - box_score_slow: use polyon mean score as the mean score - ''' - h, w = bitmap.shape[:2] - contour = contour.copy() - contour = np.reshape(contour, (-1, 2)) - - xmin = np.clip(np.min(contour[:, 0]), 0, w - 1) - xmax = np.clip(np.max(contour[:, 0]), 0, w - 1) - ymin = np.clip(np.min(contour[:, 1]), 0, h - 1) - ymax = np.clip(np.max(contour[:, 1]), 0, h - 1) - - mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) - - contour[:, 0] = contour[:, 0] - xmin - contour[:, 1] = contour[:, 1] - ymin - - cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1) - return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] - - def __call__(self, outs_dict, shape_list): - pred = outs_dict['maps'] - if isinstance(pred, paddle.Tensor): - pred = pred.numpy() - pred = pred[:, 0, :, :] - segmentation = pred > self.thresh - - boxes_batch = [] - for batch_index in range(pred.shape[0]): - src_h, src_w, ratio_h, ratio_w = shape_list[batch_index] - if self.dilation_kernel is not None: - mask = cv2.dilate( - np.array(segmentation[batch_index]).astype(np.uint8), - self.dilation_kernel) - else: - mask = segmentation[batch_index] - boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask, - src_w, src_h) - - boxes_batch.append({'points': boxes}) - return boxes_batch - - -class DistillationDBPostProcess(object): - def __init__(self, - model_name=["student"], - key=None, - thresh=0.3, - box_thresh=0.6, - max_candidates=1000, - unclip_ratio=1.5, - use_dilation=False, - score_mode="fast", - **kwargs): - self.model_name = model_name - self.key = key - self.post_process = DBPostProcess( - thresh=thresh, - box_thresh=box_thresh, - max_candidates=max_candidates, - unclip_ratio=unclip_ratio, - use_dilation=use_dilation, - score_mode=score_mode) - - def __call__(self, predicts, shape_list): - results = {} - for k in self.model_name: - results[k] = self.post_process(predicts[k], shape_list=shape_list) - return results diff --git a/backend/ppocr/postprocess/east_postprocess.py b/backend/ppocr/postprocess/east_postprocess.py deleted file mode 100755 index c194c81c..00000000 --- a/backend/ppocr/postprocess/east_postprocess.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -from .locality_aware_nms import nms_locality -import cv2 -import paddle - -import os -import sys - - -class EASTPostProcess(object): - """ - The post process for EAST. - """ - - def __init__(self, - score_thresh=0.8, - cover_thresh=0.1, - nms_thresh=0.2, - **kwargs): - - self.score_thresh = score_thresh - self.cover_thresh = cover_thresh - self.nms_thresh = nms_thresh - - def restore_rectangle_quad(self, origin, geometry): - """ - Restore rectangle from quadrangle. - """ - # quad - origin_concat = np.concatenate( - (origin, origin, origin, origin), axis=1) # (n, 8) - pred_quads = origin_concat - geometry - pred_quads = pred_quads.reshape((-1, 4, 2)) # (n, 4, 2) - return pred_quads - - def detect(self, - score_map, - geo_map, - score_thresh=0.8, - cover_thresh=0.1, - nms_thresh=0.2): - """ - restore text boxes from score map and geo map - """ - - score_map = score_map[0] - geo_map = np.swapaxes(geo_map, 1, 0) - geo_map = np.swapaxes(geo_map, 1, 2) - # filter the score map - xy_text = np.argwhere(score_map > score_thresh) - if len(xy_text) == 0: - return [] - # sort the text boxes via the y axis - xy_text = xy_text[np.argsort(xy_text[:, 0])] - #restore quad proposals - text_box_restored = self.restore_rectangle_quad( - xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) - boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) - boxes[:, :8] = text_box_restored.reshape((-1, 8)) - boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] - - try: - import lanms - boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh) - except: - print( - 'you should install lanms by pip3 install lanms-nova to speed up nms_locality' - ) - boxes = nms_locality(boxes.astype(np.float64), nms_thresh) - if boxes.shape[0] == 0: - return [] - # Here we filter some low score boxes by the average score map, - # this is different from the orginal paper. - for i, box in enumerate(boxes): - mask = np.zeros_like(score_map, dtype=np.uint8) - cv2.fillPoly(mask, box[:8].reshape( - (-1, 4, 2)).astype(np.int32) // 4, 1) - boxes[i, 8] = cv2.mean(score_map, mask)[0] - boxes = boxes[boxes[:, 8] > cover_thresh] - return boxes - - def sort_poly(self, p): - """ - Sort polygons. - """ - min_axis = np.argmin(np.sum(p, axis=1)) - p = p[[min_axis, (min_axis + 1) % 4,\ - (min_axis + 2) % 4, (min_axis + 3) % 4]] - if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]): - return p - else: - return p[[0, 3, 2, 1]] - - def __call__(self, outs_dict, shape_list): - score_list = outs_dict['f_score'] - geo_list = outs_dict['f_geo'] - if isinstance(score_list, paddle.Tensor): - score_list = score_list.numpy() - geo_list = geo_list.numpy() - img_num = len(shape_list) - dt_boxes_list = [] - for ino in range(img_num): - score = score_list[ino] - geo = geo_list[ino] - boxes = self.detect( - score_map=score, - geo_map=geo, - score_thresh=self.score_thresh, - cover_thresh=self.cover_thresh, - nms_thresh=self.nms_thresh) - boxes_norm = [] - if len(boxes) > 0: - h, w = score.shape[1:] - src_h, src_w, ratio_h, ratio_w = shape_list[ino] - boxes = boxes[:, :8].reshape((-1, 4, 2)) - boxes[:, :, 0] /= ratio_w - boxes[:, :, 1] /= ratio_h - for i_box, box in enumerate(boxes): - box = self.sort_poly(box.astype(np.int32)) - if np.linalg.norm(box[0] - box[1]) < 5 \ - or np.linalg.norm(box[3] - box[0]) < 5: - continue - boxes_norm.append(box) - dt_boxes_list.append({'points': np.array(boxes_norm)}) - return dt_boxes_list diff --git a/backend/ppocr/postprocess/fce_postprocess.py b/backend/ppocr/postprocess/fce_postprocess.py deleted file mode 100755 index 8e0716f9..00000000 --- a/backend/ppocr/postprocess/fce_postprocess.py +++ /dev/null @@ -1,241 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/open-mmlab/mmocr/blob/v0.3.0/mmocr/models/textdet/postprocess/wrapper.py -""" - -import cv2 -import paddle -import numpy as np -from numpy.fft import ifft -from ppocr.utils.poly_nms import poly_nms, valid_boundary - - -def fill_hole(input_mask): - h, w = input_mask.shape - canvas = np.zeros((h + 2, w + 2), np.uint8) - canvas[1:h + 1, 1:w + 1] = input_mask.copy() - - mask = np.zeros((h + 4, w + 4), np.uint8) - - cv2.floodFill(canvas, mask, (0, 0), 1) - canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool) - - return ~canvas | input_mask - - -def fourier2poly(fourier_coeff, num_reconstr_points=50): - """ Inverse Fourier transform - Args: - fourier_coeff (ndarray): Fourier coefficients shaped (n, 2k+1), - with n and k being candidates number and Fourier degree - respectively. - num_reconstr_points (int): Number of reconstructed polygon points. - Returns: - Polygons (ndarray): The reconstructed polygons shaped (n, n') - """ - - a = np.zeros((len(fourier_coeff), num_reconstr_points), dtype='complex') - k = (len(fourier_coeff[0]) - 1) // 2 - - a[:, 0:k + 1] = fourier_coeff[:, k:] - a[:, -k:] = fourier_coeff[:, :k] - - poly_complex = ifft(a) * num_reconstr_points - polygon = np.zeros((len(fourier_coeff), num_reconstr_points, 2)) - polygon[:, :, 0] = poly_complex.real - polygon[:, :, 1] = poly_complex.imag - return polygon.astype('int32').reshape((len(fourier_coeff), -1)) - - -class FCEPostProcess(object): - """ - The post process for FCENet. - """ - - def __init__(self, - scales, - fourier_degree=5, - num_reconstr_points=50, - decoding_type='fcenet', - score_thr=0.3, - nms_thr=0.1, - alpha=1.0, - beta=1.0, - box_type='poly', - **kwargs): - - self.scales = scales - self.fourier_degree = fourier_degree - self.num_reconstr_points = num_reconstr_points - self.decoding_type = decoding_type - self.score_thr = score_thr - self.nms_thr = nms_thr - self.alpha = alpha - self.beta = beta - self.box_type = box_type - - def __call__(self, preds, shape_list): - score_maps = [] - for key, value in preds.items(): - if isinstance(value, paddle.Tensor): - value = value.numpy() - cls_res = value[:, :4, :, :] - reg_res = value[:, 4:, :, :] - score_maps.append([cls_res, reg_res]) - - return self.get_boundary(score_maps, shape_list) - - def resize_boundary(self, boundaries, scale_factor): - """Rescale boundaries via scale_factor. - - Args: - boundaries (list[list[float]]): The boundary list. Each boundary - with size 2k+1 with k>=4. - scale_factor(ndarray): The scale factor of size (4,). - - Returns: - boundaries (list[list[float]]): The scaled boundaries. - """ - boxes = [] - scores = [] - for b in boundaries: - sz = len(b) - valid_boundary(b, True) - scores.append(b[-1]) - b = (np.array(b[:sz - 1]) * - (np.tile(scale_factor[:2], int( - (sz - 1) / 2)).reshape(1, sz - 1))).flatten().tolist() - boxes.append(np.array(b).reshape([-1, 2])) - - return np.array(boxes, dtype=np.float32), scores - - def get_boundary(self, score_maps, shape_list): - assert len(score_maps) == len(self.scales) - boundaries = [] - for idx, score_map in enumerate(score_maps): - scale = self.scales[idx] - boundaries = boundaries + self._get_boundary_single(score_map, - scale) - - # nms - boundaries = poly_nms(boundaries, self.nms_thr) - boundaries, scores = self.resize_boundary( - boundaries, (1 / shape_list[0, 2:]).tolist()[::-1]) - - boxes_batch = [dict(points=boundaries, scores=scores)] - return boxes_batch - - def _get_boundary_single(self, score_map, scale): - assert len(score_map) == 2 - assert score_map[1].shape[1] == 4 * self.fourier_degree + 2 - - return self.fcenet_decode( - preds=score_map, - fourier_degree=self.fourier_degree, - num_reconstr_points=self.num_reconstr_points, - scale=scale, - alpha=self.alpha, - beta=self.beta, - box_type=self.box_type, - score_thr=self.score_thr, - nms_thr=self.nms_thr) - - def fcenet_decode(self, - preds, - fourier_degree, - num_reconstr_points, - scale, - alpha=1.0, - beta=2.0, - box_type='poly', - score_thr=0.3, - nms_thr=0.1): - """Decoding predictions of FCENet to instances. - - Args: - preds (list(Tensor)): The head output tensors. - fourier_degree (int): The maximum Fourier transform degree k. - num_reconstr_points (int): The points number of the polygon - reconstructed from predicted Fourier coefficients. - scale (int): The down-sample scale of the prediction. - alpha (float) : The parameter to calculate final scores. Score_{final} - = (Score_{text region} ^ alpha) - * (Score_{text center region}^ beta) - beta (float) : The parameter to calculate final score. - box_type (str): Boundary encoding type 'poly' or 'quad'. - score_thr (float) : The threshold used to filter out the final - candidates. - nms_thr (float) : The threshold of nms. - - Returns: - boundaries (list[list[float]]): The instance boundary and confidence - list. - """ - assert isinstance(preds, list) - assert len(preds) == 2 - assert box_type in ['poly', 'quad'] - - cls_pred = preds[0][0] - tr_pred = cls_pred[0:2] - tcl_pred = cls_pred[2:] - - reg_pred = preds[1][0].transpose([1, 2, 0]) - x_pred = reg_pred[:, :, :2 * fourier_degree + 1] - y_pred = reg_pred[:, :, 2 * fourier_degree + 1:] - - score_pred = (tr_pred[1]**alpha) * (tcl_pred[1]**beta) - tr_pred_mask = (score_pred) > score_thr - tr_mask = fill_hole(tr_pred_mask) - - tr_contours, _ = cv2.findContours( - tr_mask.astype(np.uint8), cv2.RETR_TREE, - cv2.CHAIN_APPROX_SIMPLE) # opencv4 - - mask = np.zeros_like(tr_mask) - boundaries = [] - for cont in tr_contours: - deal_map = mask.copy().astype(np.int8) - cv2.drawContours(deal_map, [cont], -1, 1, -1) - - score_map = score_pred * deal_map - score_mask = score_map > 0 - xy_text = np.argwhere(score_mask) - dxy = xy_text[:, 1] + xy_text[:, 0] * 1j - - x, y = x_pred[score_mask], y_pred[score_mask] - c = x + y * 1j - c[:, fourier_degree] = c[:, fourier_degree] + dxy - c *= scale - - polygons = fourier2poly(c, num_reconstr_points) - score = score_map[score_mask].reshape(-1, 1) - polygons = poly_nms(np.hstack((polygons, score)).tolist(), nms_thr) - - boundaries = boundaries + polygons - - boundaries = poly_nms(boundaries, nms_thr) - - if box_type == 'quad': - new_boundaries = [] - for boundary in boundaries: - poly = np.array(boundary[:-1]).reshape(-1, 2).astype(np.float32) - score = boundary[-1] - points = cv2.boxPoints(cv2.minAreaRect(poly)) - points = np.int0(points) - new_boundaries.append(points.reshape(-1).tolist() + [score]) - boundaries = new_boundaries - - return boundaries diff --git a/backend/ppocr/postprocess/locality_aware_nms.py b/backend/ppocr/postprocess/locality_aware_nms.py deleted file mode 100644 index d305ef68..00000000 --- a/backend/ppocr/postprocess/locality_aware_nms.py +++ /dev/null @@ -1,200 +0,0 @@ -""" -Locality aware nms. -This code is refered from: https://github.com/songdejia/EAST/blob/master/locality_aware_nms.py -""" - -import numpy as np -from shapely.geometry import Polygon - - -def intersection(g, p): - """ - Intersection. - """ - g = Polygon(g[:8].reshape((4, 2))) - p = Polygon(p[:8].reshape((4, 2))) - g = g.buffer(0) - p = p.buffer(0) - if not g.is_valid or not p.is_valid: - return 0 - inter = Polygon(g).intersection(Polygon(p)).area - union = g.area + p.area - inter - if union == 0: - return 0 - else: - return inter / union - - -def intersection_iog(g, p): - """ - Intersection_iog. - """ - g = Polygon(g[:8].reshape((4, 2))) - p = Polygon(p[:8].reshape((4, 2))) - if not g.is_valid or not p.is_valid: - return 0 - inter = Polygon(g).intersection(Polygon(p)).area - #union = g.area + p.area - inter - union = p.area - if union == 0: - print("p_area is very small") - return 0 - else: - return inter / union - - -def weighted_merge(g, p): - """ - Weighted merge. - """ - g[:8] = (g[8] * g[:8] + p[8] * p[:8]) / (g[8] + p[8]) - g[8] = (g[8] + p[8]) - return g - - -def standard_nms(S, thres): - """ - Standard nms. - """ - order = np.argsort(S[:, 8])[::-1] - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - ovr = np.array([intersection(S[i], S[t]) for t in order[1:]]) - - inds = np.where(ovr <= thres)[0] - order = order[inds + 1] - - return S[keep] - - -def standard_nms_inds(S, thres): - """ - Standard nms, retun inds. - """ - order = np.argsort(S[:, 8])[::-1] - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - ovr = np.array([intersection(S[i], S[t]) for t in order[1:]]) - - inds = np.where(ovr <= thres)[0] - order = order[inds + 1] - - return keep - - -def nms(S, thres): - """ - nms. - """ - order = np.argsort(S[:, 8])[::-1] - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - ovr = np.array([intersection(S[i], S[t]) for t in order[1:]]) - - inds = np.where(ovr <= thres)[0] - order = order[inds + 1] - - return keep - - -def soft_nms(boxes_in, Nt_thres=0.3, threshold=0.8, sigma=0.5, method=2): - """ - soft_nms - :para boxes_in, N x 9 (coords + score) - :para threshould, eliminate cases min score(0.001) - :para Nt_thres, iou_threshi - :para sigma, gaussian weght - :method, linear or gaussian - """ - boxes = boxes_in.copy() - N = boxes.shape[0] - if N is None or N < 1: - return np.array([]) - pos, maxpos = 0, 0 - weight = 0.0 - inds = np.arange(N) - tbox, sbox = boxes[0].copy(), boxes[0].copy() - for i in range(N): - maxscore = boxes[i, 8] - maxpos = i - tbox = boxes[i].copy() - ti = inds[i] - pos = i + 1 - #get max box - while pos < N: - if maxscore < boxes[pos, 8]: - maxscore = boxes[pos, 8] - maxpos = pos - pos = pos + 1 - #add max box as a detection - boxes[i, :] = boxes[maxpos, :] - inds[i] = inds[maxpos] - #swap - boxes[maxpos, :] = tbox - inds[maxpos] = ti - tbox = boxes[i].copy() - pos = i + 1 - #NMS iteration - while pos < N: - sbox = boxes[pos].copy() - ts_iou_val = intersection(tbox, sbox) - if ts_iou_val > 0: - if method == 1: - if ts_iou_val > Nt_thres: - weight = 1 - ts_iou_val - else: - weight = 1 - elif method == 2: - weight = np.exp(-1.0 * ts_iou_val**2 / sigma) - else: - if ts_iou_val > Nt_thres: - weight = 0 - else: - weight = 1 - boxes[pos, 8] = weight * boxes[pos, 8] - #if box score falls below thresold, discard the box by - #swaping last box update N - if boxes[pos, 8] < threshold: - boxes[pos, :] = boxes[N - 1, :] - inds[pos] = inds[N - 1] - N = N - 1 - pos = pos - 1 - pos = pos + 1 - - return boxes[:N] - - -def nms_locality(polys, thres=0.3): - """ - locality aware nms of EAST - :param polys: a N*9 numpy array. first 8 coordinates, then prob - :return: boxes after nms - """ - S = [] - p = None - for g in polys: - if p is not None and intersection(g, p) > thres: - p = weighted_merge(g, p) - else: - if p is not None: - S.append(p) - p = g - if p is not None: - S.append(p) - - if len(S) == 0: - return np.array([]) - return standard_nms(np.array(S), thres) - - -if __name__ == '__main__': - # 343,350,448,135,474,143,369,359 - print( - Polygon(np.array([[343, 350], [448, 135], [474, 143], [369, 359]])) - .area) \ No newline at end of file diff --git a/backend/ppocr/postprocess/pg_postprocess.py b/backend/ppocr/postprocess/pg_postprocess.py deleted file mode 100644 index 0b145518..00000000 --- a/backend/ppocr/postprocess/pg_postprocess.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys - -__dir__ = os.path.dirname(__file__) -sys.path.append(__dir__) -sys.path.append(os.path.join(__dir__, '..')) -from ppocr.utils.e2e_utils.pgnet_pp_utils import PGNet_PostProcess - - -class PGPostProcess(object): - """ - The post process for PGNet. - """ - - def __init__(self, character_dict_path, valid_set, score_thresh, mode, - **kwargs): - self.character_dict_path = character_dict_path - self.valid_set = valid_set - self.score_thresh = score_thresh - self.mode = mode - - # c++ la-nms is faster, but only support python 3.5 - self.is_python35 = False - if sys.version_info.major == 3 and sys.version_info.minor == 5: - self.is_python35 = True - - def __call__(self, outs_dict, shape_list): - post = PGNet_PostProcess(self.character_dict_path, self.valid_set, - self.score_thresh, outs_dict, shape_list) - if self.mode == 'fast': - data = post.pg_postprocess_fast() - else: - data = post.pg_postprocess_slow() - return data diff --git a/backend/ppocr/postprocess/pse_postprocess/__init__.py b/backend/ppocr/postprocess/pse_postprocess/__init__.py deleted file mode 100644 index 680473bf..00000000 --- a/backend/ppocr/postprocess/pse_postprocess/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .pse_postprocess import PSEPostProcess \ No newline at end of file diff --git a/backend/ppocr/postprocess/pse_postprocess/pse/README.md b/backend/ppocr/postprocess/pse_postprocess/pse/README.md deleted file mode 100644 index 6a19d5d1..00000000 --- a/backend/ppocr/postprocess/pse_postprocess/pse/README.md +++ /dev/null @@ -1,6 +0,0 @@ -## 编译 -This code is refer from: -https://github.com/whai362/PSENet/blob/python3/models/post_processing/pse -```python -python3 setup.py build_ext --inplace -``` diff --git a/backend/ppocr/postprocess/pse_postprocess/pse/__init__.py b/backend/ppocr/postprocess/pse_postprocess/pse/__init__.py deleted file mode 100644 index 1903a914..00000000 --- a/backend/ppocr/postprocess/pse_postprocess/pse/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import os -import subprocess - -python_path = sys.executable - -ori_path = os.getcwd() -os.chdir('ppocr/postprocess/pse_postprocess/pse') -if subprocess.call( - '{} setup.py build_ext --inplace'.format(python_path), shell=True) != 0: - raise RuntimeError( - 'Cannot compile pse: {}, if your system is windows, you need to install all the default components of `desktop development using C++` in visual studio 2019+'. - format(os.path.dirname(os.path.realpath(__file__)))) -os.chdir(ori_path) - -from .pse import pse diff --git a/backend/ppocr/postprocess/pse_postprocess/pse/pse.pyx b/backend/ppocr/postprocess/pse_postprocess/pse/pse.pyx deleted file mode 100644 index b2be49e9..00000000 --- a/backend/ppocr/postprocess/pse_postprocess/pse/pse.pyx +++ /dev/null @@ -1,70 +0,0 @@ - -import numpy as np -import cv2 -cimport numpy as np -cimport cython -cimport libcpp -cimport libcpp.pair -cimport libcpp.queue -from libcpp.pair cimport * -from libcpp.queue cimport * - -@cython.boundscheck(False) -@cython.wraparound(False) -cdef np.ndarray[np.int32_t, ndim=2] _pse(np.ndarray[np.uint8_t, ndim=3] kernels, - np.ndarray[np.int32_t, ndim=2] label, - int kernel_num, - int label_num, - float min_area=0): - cdef np.ndarray[np.int32_t, ndim=2] pred - pred = np.zeros((label.shape[0], label.shape[1]), dtype=np.int32) - - for label_idx in range(1, label_num): - if np.sum(label == label_idx) < min_area: - label[label == label_idx] = 0 - - cdef libcpp.queue.queue[libcpp.pair.pair[np.int16_t,np.int16_t]] que = \ - queue[libcpp.pair.pair[np.int16_t,np.int16_t]]() - cdef libcpp.queue.queue[libcpp.pair.pair[np.int16_t,np.int16_t]] nxt_que = \ - queue[libcpp.pair.pair[np.int16_t,np.int16_t]]() - cdef np.int16_t* dx = [-1, 1, 0, 0] - cdef np.int16_t* dy = [0, 0, -1, 1] - cdef np.int16_t tmpx, tmpy - - points = np.array(np.where(label > 0)).transpose((1, 0)) - for point_idx in range(points.shape[0]): - tmpx, tmpy = points[point_idx, 0], points[point_idx, 1] - que.push(pair[np.int16_t,np.int16_t](tmpx, tmpy)) - pred[tmpx, tmpy] = label[tmpx, tmpy] - - cdef libcpp.pair.pair[np.int16_t,np.int16_t] cur - cdef int cur_label - for kernel_idx in range(kernel_num - 1, -1, -1): - while not que.empty(): - cur = que.front() - que.pop() - cur_label = pred[cur.first, cur.second] - - is_edge = True - for j in range(4): - tmpx = cur.first + dx[j] - tmpy = cur.second + dy[j] - if tmpx < 0 or tmpx >= label.shape[0] or tmpy < 0 or tmpy >= label.shape[1]: - continue - if kernels[kernel_idx, tmpx, tmpy] == 0 or pred[tmpx, tmpy] > 0: - continue - - que.push(pair[np.int16_t,np.int16_t](tmpx, tmpy)) - pred[tmpx, tmpy] = cur_label - is_edge = False - if is_edge: - nxt_que.push(cur) - - que, nxt_que = nxt_que, que - - return pred - -def pse(kernels, min_area): - kernel_num = kernels.shape[0] - label_num, label = cv2.connectedComponents(kernels[-1], connectivity=4) - return _pse(kernels[:-1], label, kernel_num, label_num, min_area) \ No newline at end of file diff --git a/backend/ppocr/postprocess/pse_postprocess/pse/setup.py b/backend/ppocr/postprocess/pse_postprocess/pse/setup.py deleted file mode 100644 index 03746782..00000000 --- a/backend/ppocr/postprocess/pse_postprocess/pse/setup.py +++ /dev/null @@ -1,14 +0,0 @@ -from distutils.core import setup, Extension -from Cython.Build import cythonize -import numpy - -setup(ext_modules=cythonize(Extension( - 'pse', - sources=['pse.pyx'], - language='c++', - include_dirs=[numpy.get_include()], - library_dirs=[], - libraries=[], - extra_compile_args=['-O3'], - extra_link_args=[] -))) diff --git a/backend/ppocr/postprocess/pse_postprocess/pse_postprocess.py b/backend/ppocr/postprocess/pse_postprocess/pse_postprocess.py deleted file mode 100755 index 34f1b8c9..00000000 --- a/backend/ppocr/postprocess/pse_postprocess/pse_postprocess.py +++ /dev/null @@ -1,118 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import cv2 -import paddle -from paddle.nn import functional as F - -from ppocr.postprocess.pse_postprocess.pse import pse - - -class PSEPostProcess(object): - """ - The post process for PSE. - """ - - def __init__(self, - thresh=0.5, - box_thresh=0.85, - min_area=16, - box_type='quad', - scale=4, - **kwargs): - assert box_type in ['quad', 'poly'], 'Only quad and poly is supported' - self.thresh = thresh - self.box_thresh = box_thresh - self.min_area = min_area - self.box_type = box_type - self.scale = scale - - def __call__(self, outs_dict, shape_list): - pred = outs_dict['maps'] - if not isinstance(pred, paddle.Tensor): - pred = paddle.to_tensor(pred) - pred = F.interpolate( - pred, scale_factor=4 // self.scale, mode='bilinear') - - score = F.sigmoid(pred[:, 0, :, :]) - - kernels = (pred > self.thresh).astype('float32') - text_mask = kernels[:, 0, :, :] - kernels[:, 0:, :, :] = kernels[:, 0:, :, :] * text_mask - - score = score.numpy() - kernels = kernels.numpy().astype(np.uint8) - - boxes_batch = [] - for batch_index in range(pred.shape[0]): - boxes, scores = self.boxes_from_bitmap(score[batch_index], - kernels[batch_index], - shape_list[batch_index]) - - boxes_batch.append({'points': boxes, 'scores': scores}) - return boxes_batch - - def boxes_from_bitmap(self, score, kernels, shape): - label = pse(kernels, self.min_area) - return self.generate_box(score, label, shape) - - def generate_box(self, score, label, shape): - src_h, src_w, ratio_h, ratio_w = shape - label_num = np.max(label) + 1 - - boxes = [] - scores = [] - for i in range(1, label_num): - ind = label == i - points = np.array(np.where(ind)).transpose((1, 0))[:, ::-1] - - if points.shape[0] < self.min_area: - label[ind] = 0 - continue - - score_i = np.mean(score[ind]) - if score_i < self.box_thresh: - label[ind] = 0 - continue - - if self.box_type == 'quad': - rect = cv2.minAreaRect(points) - bbox = cv2.boxPoints(rect) - elif self.box_type == 'poly': - box_height = np.max(points[:, 1]) + 10 - box_width = np.max(points[:, 0]) + 10 - - mask = np.zeros((box_height, box_width), np.uint8) - mask[points[:, 1], points[:, 0]] = 255 - - contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, - cv2.CHAIN_APPROX_SIMPLE) - bbox = np.squeeze(contours[0], 1) - else: - raise NotImplementedError - - bbox[:, 0] = np.clip(np.round(bbox[:, 0] / ratio_w), 0, src_w) - bbox[:, 1] = np.clip(np.round(bbox[:, 1] / ratio_h), 0, src_h) - boxes.append(bbox) - scores.append(score_i) - return boxes, scores diff --git a/backend/ppocr/postprocess/rec_postprocess.py b/backend/ppocr/postprocess/rec_postprocess.py deleted file mode 100644 index bf0fd890..00000000 --- a/backend/ppocr/postprocess/rec_postprocess.py +++ /dev/null @@ -1,754 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import paddle -from paddle.nn import functional as F -import re - - -class BaseRecLabelDecode(object): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False): - self.beg_str = "sos" - self.end_str = "eos" - - self.character_str = [] - if character_dict_path is None: - self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" - dict_character = list(self.character_str) - else: - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n").strip("\r\n") - self.character_str.append(line) - if use_space_char: - self.character_str.append(" ") - dict_character = list(self.character_str) - - dict_character = self.add_special_char(dict_character) - self.dict = {} - for i, char in enumerate(dict_character): - self.dict[char] = i - self.character = dict_character - - def add_special_char(self, dict_character): - return dict_character - - def decode(self, text_index, text_prob=None, is_remove_duplicate=False): - """ convert text-index into text-label. """ - result_list = [] - ignored_tokens = self.get_ignored_tokens() - batch_size = len(text_index) - for batch_idx in range(batch_size): - selection = np.ones(len(text_index[batch_idx]), dtype=bool) - if is_remove_duplicate: - selection[1:] = text_index[batch_idx][1:] != text_index[ - batch_idx][:-1] - for ignored_token in ignored_tokens: - selection &= text_index[batch_idx] != ignored_token - - char_list = [ - self.character[text_id] - for text_id in text_index[batch_idx][selection] - ] - if text_prob is not None: - conf_list = text_prob[batch_idx][selection] - else: - conf_list = [1] * len(selection) - if len(conf_list) == 0: - conf_list = [0] - - text = ''.join(char_list) - result_list.append((text, np.mean(conf_list).tolist())) - return result_list - - def get_ignored_tokens(self): - return [0] # for ctc blank - - -class CTCLabelDecode(BaseRecLabelDecode): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False, - **kwargs): - super(CTCLabelDecode, self).__init__(character_dict_path, - use_space_char) - - def __call__(self, preds, label=None, *args, **kwargs): - if isinstance(preds, tuple) or isinstance(preds, list): - preds = preds[-1] - if isinstance(preds, paddle.Tensor): - preds = preds.numpy() - preds_idx = preds.argmax(axis=2) - preds_prob = preds.max(axis=2) - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True) - if label is None: - return text - label = self.decode(label) - return text, label - - def add_special_char(self, dict_character): - dict_character = ['blank'] + dict_character - return dict_character - - -class DistillationCTCLabelDecode(CTCLabelDecode): - """ - Convert - Convert between text-label and text-index - """ - - def __init__(self, - character_dict_path=None, - use_space_char=False, - model_name=["student"], - key=None, - multi_head=False, - **kwargs): - super(DistillationCTCLabelDecode, self).__init__(character_dict_path, - use_space_char) - if not isinstance(model_name, list): - model_name = [model_name] - self.model_name = model_name - - self.key = key - self.multi_head = multi_head - - def __call__(self, preds, label=None, *args, **kwargs): - output = dict() - for name in self.model_name: - pred = preds[name] - if self.key is not None: - pred = pred[self.key] - if self.multi_head and isinstance(pred, dict): - pred = pred['ctc'] - output[name] = super().__call__(pred, label=label, *args, **kwargs) - return output - - -class NRTRLabelDecode(BaseRecLabelDecode): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=True, **kwargs): - super(NRTRLabelDecode, self).__init__(character_dict_path, - use_space_char) - - def __call__(self, preds, label=None, *args, **kwargs): - - if len(preds) == 2: - preds_id = preds[0] - preds_prob = preds[1] - if isinstance(preds_id, paddle.Tensor): - preds_id = preds_id.numpy() - if isinstance(preds_prob, paddle.Tensor): - preds_prob = preds_prob.numpy() - if preds_id[0][0] == 2: - preds_idx = preds_id[:, 1:] - preds_prob = preds_prob[:, 1:] - else: - preds_idx = preds_id - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) - if label is None: - return text - label = self.decode(label[:, 1:]) - else: - if isinstance(preds, paddle.Tensor): - preds = preds.numpy() - preds_idx = preds.argmax(axis=2) - preds_prob = preds.max(axis=2) - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) - if label is None: - return text - label = self.decode(label[:, 1:]) - return text, label - - def add_special_char(self, dict_character): - dict_character = ['blank', '', '', ''] + dict_character - return dict_character - - def decode(self, text_index, text_prob=None, is_remove_duplicate=False): - """ convert text-index into text-label. """ - result_list = [] - batch_size = len(text_index) - for batch_idx in range(batch_size): - char_list = [] - conf_list = [] - for idx in range(len(text_index[batch_idx])): - if text_index[batch_idx][idx] == 3: # end - break - try: - char_list.append(self.character[int(text_index[batch_idx][ - idx])]) - except: - continue - if text_prob is not None: - conf_list.append(text_prob[batch_idx][idx]) - else: - conf_list.append(1) - text = ''.join(char_list) - result_list.append((text.lower(), np.mean(conf_list).tolist())) - return result_list - - -class AttnLabelDecode(BaseRecLabelDecode): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False, - **kwargs): - super(AttnLabelDecode, self).__init__(character_dict_path, - use_space_char) - - def add_special_char(self, dict_character): - self.beg_str = "sos" - self.end_str = "eos" - dict_character = dict_character - dict_character = [self.beg_str] + dict_character + [self.end_str] - return dict_character - - def decode(self, text_index, text_prob=None, is_remove_duplicate=False): - """ convert text-index into text-label. """ - result_list = [] - ignored_tokens = self.get_ignored_tokens() - [beg_idx, end_idx] = self.get_ignored_tokens() - batch_size = len(text_index) - for batch_idx in range(batch_size): - char_list = [] - conf_list = [] - for idx in range(len(text_index[batch_idx])): - if text_index[batch_idx][idx] in ignored_tokens: - continue - if int(text_index[batch_idx][idx]) == int(end_idx): - break - if is_remove_duplicate: - # only for predict - if idx > 0 and text_index[batch_idx][idx - 1] == text_index[ - batch_idx][idx]: - continue - char_list.append(self.character[int(text_index[batch_idx][ - idx])]) - if text_prob is not None: - conf_list.append(text_prob[batch_idx][idx]) - else: - conf_list.append(1) - text = ''.join(char_list) - result_list.append((text, np.mean(conf_list).tolist())) - return result_list - - def __call__(self, preds, label=None, *args, **kwargs): - """ - text = self.decode(text) - if label is None: - return text - else: - label = self.decode(label, is_remove_duplicate=False) - return text, label - """ - if isinstance(preds, paddle.Tensor): - preds = preds.numpy() - - preds_idx = preds.argmax(axis=2) - preds_prob = preds.max(axis=2) - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) - if label is None: - return text - label = self.decode(label, is_remove_duplicate=False) - return text, label - - def get_ignored_tokens(self): - beg_idx = self.get_beg_end_flag_idx("beg") - end_idx = self.get_beg_end_flag_idx("end") - return [beg_idx, end_idx] - - def get_beg_end_flag_idx(self, beg_or_end): - if beg_or_end == "beg": - idx = np.array(self.dict[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict[self.end_str]) - else: - assert False, "unsupport type %s in get_beg_end_flag_idx" \ - % beg_or_end - return idx - - -class SEEDLabelDecode(BaseRecLabelDecode): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False, - **kwargs): - super(SEEDLabelDecode, self).__init__(character_dict_path, - use_space_char) - - def add_special_char(self, dict_character): - self.padding_str = "padding" - self.end_str = "eos" - self.unknown = "unknown" - dict_character = dict_character + [ - self.end_str, self.padding_str, self.unknown - ] - return dict_character - - def get_ignored_tokens(self): - end_idx = self.get_beg_end_flag_idx("eos") - return [end_idx] - - def get_beg_end_flag_idx(self, beg_or_end): - if beg_or_end == "sos": - idx = np.array(self.dict[self.beg_str]) - elif beg_or_end == "eos": - idx = np.array(self.dict[self.end_str]) - else: - assert False, "unsupport type %s in get_beg_end_flag_idx" % beg_or_end - return idx - - def decode(self, text_index, text_prob=None, is_remove_duplicate=False): - """ convert text-index into text-label. """ - result_list = [] - [end_idx] = self.get_ignored_tokens() - batch_size = len(text_index) - for batch_idx in range(batch_size): - char_list = [] - conf_list = [] - for idx in range(len(text_index[batch_idx])): - if int(text_index[batch_idx][idx]) == int(end_idx): - break - if is_remove_duplicate: - # only for predict - if idx > 0 and text_index[batch_idx][idx - 1] == text_index[ - batch_idx][idx]: - continue - char_list.append(self.character[int(text_index[batch_idx][ - idx])]) - if text_prob is not None: - conf_list.append(text_prob[batch_idx][idx]) - else: - conf_list.append(1) - text = ''.join(char_list) - result_list.append((text, np.mean(conf_list).tolist())) - return result_list - - def __call__(self, preds, label=None, *args, **kwargs): - """ - text = self.decode(text) - if label is None: - return text - else: - label = self.decode(label, is_remove_duplicate=False) - return text, label - """ - preds_idx = preds["rec_pred"] - if isinstance(preds_idx, paddle.Tensor): - preds_idx = preds_idx.numpy() - if "rec_pred_scores" in preds: - preds_idx = preds["rec_pred"] - preds_prob = preds["rec_pred_scores"] - else: - preds_idx = preds["rec_pred"].argmax(axis=2) - preds_prob = preds["rec_pred"].max(axis=2) - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) - if label is None: - return text - label = self.decode(label, is_remove_duplicate=False) - return text, label - - -class SRNLabelDecode(BaseRecLabelDecode): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False, - **kwargs): - super(SRNLabelDecode, self).__init__(character_dict_path, - use_space_char) - self.max_text_length = kwargs.get('max_text_length', 25) - - def __call__(self, preds, label=None, *args, **kwargs): - pred = preds['predict'] - char_num = len(self.character_str) + 2 - if isinstance(pred, paddle.Tensor): - pred = pred.numpy() - pred = np.reshape(pred, [-1, char_num]) - - preds_idx = np.argmax(pred, axis=1) - preds_prob = np.max(pred, axis=1) - - preds_idx = np.reshape(preds_idx, [-1, self.max_text_length]) - - preds_prob = np.reshape(preds_prob, [-1, self.max_text_length]) - - text = self.decode(preds_idx, preds_prob) - - if label is None: - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) - return text - label = self.decode(label) - return text, label - - def decode(self, text_index, text_prob=None, is_remove_duplicate=False): - """ convert text-index into text-label. """ - result_list = [] - ignored_tokens = self.get_ignored_tokens() - batch_size = len(text_index) - - for batch_idx in range(batch_size): - char_list = [] - conf_list = [] - for idx in range(len(text_index[batch_idx])): - if text_index[batch_idx][idx] in ignored_tokens: - continue - if is_remove_duplicate: - # only for predict - if idx > 0 and text_index[batch_idx][idx - 1] == text_index[ - batch_idx][idx]: - continue - char_list.append(self.character[int(text_index[batch_idx][ - idx])]) - if text_prob is not None: - conf_list.append(text_prob[batch_idx][idx]) - else: - conf_list.append(1) - - text = ''.join(char_list) - result_list.append((text, np.mean(conf_list).tolist())) - return result_list - - def add_special_char(self, dict_character): - dict_character = dict_character + [self.beg_str, self.end_str] - return dict_character - - def get_ignored_tokens(self): - beg_idx = self.get_beg_end_flag_idx("beg") - end_idx = self.get_beg_end_flag_idx("end") - return [beg_idx, end_idx] - - def get_beg_end_flag_idx(self, beg_or_end): - if beg_or_end == "beg": - idx = np.array(self.dict[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict[self.end_str]) - else: - assert False, "unsupport type %s in get_beg_end_flag_idx" \ - % beg_or_end - return idx - - -class TableLabelDecode(object): - """ """ - - def __init__(self, character_dict_path, **kwargs): - list_character, list_elem = self.load_char_elem_dict( - character_dict_path) - list_character = self.add_special_char(list_character) - list_elem = self.add_special_char(list_elem) - self.dict_character = {} - self.dict_idx_character = {} - for i, char in enumerate(list_character): - self.dict_idx_character[i] = char - self.dict_character[char] = i - self.dict_elem = {} - self.dict_idx_elem = {} - for i, elem in enumerate(list_elem): - self.dict_idx_elem[i] = elem - self.dict_elem[elem] = i - - def load_char_elem_dict(self, character_dict_path): - list_character = [] - list_elem = [] - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - substr = lines[0].decode('utf-8').strip("\n").strip("\r\n").split( - "\t") - character_num = int(substr[0]) - elem_num = int(substr[1]) - for cno in range(1, 1 + character_num): - character = lines[cno].decode('utf-8').strip("\n").strip("\r\n") - list_character.append(character) - for eno in range(1 + character_num, 1 + character_num + elem_num): - elem = lines[eno].decode('utf-8').strip("\n").strip("\r\n") - list_elem.append(elem) - return list_character, list_elem - - def add_special_char(self, list_character): - self.beg_str = "sos" - self.end_str = "eos" - list_character = [self.beg_str] + list_character + [self.end_str] - return list_character - - def __call__(self, preds): - structure_probs = preds['structure_probs'] - loc_preds = preds['loc_preds'] - if isinstance(structure_probs, paddle.Tensor): - structure_probs = structure_probs.numpy() - if isinstance(loc_preds, paddle.Tensor): - loc_preds = loc_preds.numpy() - structure_idx = structure_probs.argmax(axis=2) - structure_probs = structure_probs.max(axis=2) - structure_str, structure_pos, result_score_list, result_elem_idx_list = self.decode( - structure_idx, structure_probs, 'elem') - res_html_code_list = [] - res_loc_list = [] - batch_num = len(structure_str) - for bno in range(batch_num): - res_loc = [] - for sno in range(len(structure_str[bno])): - text = structure_str[bno][sno] - if text in ['', ' 0 and tmp_elem_idx == end_idx: - break - if tmp_elem_idx in ignored_tokens: - continue - - char_list.append(current_dict[tmp_elem_idx]) - elem_pos_list.append(idx) - score_list.append(structure_probs[batch_idx, idx]) - elem_idx_list.append(tmp_elem_idx) - result_list.append(char_list) - result_pos_list.append(elem_pos_list) - result_score_list.append(score_list) - result_elem_idx_list.append(elem_idx_list) - return result_list, result_pos_list, result_score_list, result_elem_idx_list - - def get_ignored_tokens(self, char_or_elem): - beg_idx = self.get_beg_end_flag_idx("beg", char_or_elem) - end_idx = self.get_beg_end_flag_idx("end", char_or_elem) - return [beg_idx, end_idx] - - def get_beg_end_flag_idx(self, beg_or_end, char_or_elem): - if char_or_elem == "char": - if beg_or_end == "beg": - idx = self.dict_character[self.beg_str] - elif beg_or_end == "end": - idx = self.dict_character[self.end_str] - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx of char" \ - % beg_or_end - elif char_or_elem == "elem": - if beg_or_end == "beg": - idx = self.dict_elem[self.beg_str] - elif beg_or_end == "end": - idx = self.dict_elem[self.end_str] - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx of elem" \ - % beg_or_end - else: - assert False, "Unsupport type %s in char_or_elem" \ - % char_or_elem - return idx - - -class SARLabelDecode(BaseRecLabelDecode): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False, - **kwargs): - super(SARLabelDecode, self).__init__(character_dict_path, - use_space_char) - - self.rm_symbol = kwargs.get('rm_symbol', False) - - def add_special_char(self, dict_character): - beg_end_str = "" - unknown_str = "" - padding_str = "" - dict_character = dict_character + [unknown_str] - self.unknown_idx = len(dict_character) - 1 - dict_character = dict_character + [beg_end_str] - self.start_idx = len(dict_character) - 1 - self.end_idx = len(dict_character) - 1 - dict_character = dict_character + [padding_str] - self.padding_idx = len(dict_character) - 1 - return dict_character - - def decode(self, text_index, text_prob=None, is_remove_duplicate=False): - """ convert text-index into text-label. """ - result_list = [] - ignored_tokens = self.get_ignored_tokens() - - batch_size = len(text_index) - for batch_idx in range(batch_size): - char_list = [] - conf_list = [] - for idx in range(len(text_index[batch_idx])): - if text_index[batch_idx][idx] in ignored_tokens: - continue - if int(text_index[batch_idx][idx]) == int(self.end_idx): - if text_prob is None and idx == 0: - continue - else: - break - if is_remove_duplicate: - # only for predict - if idx > 0 and text_index[batch_idx][idx - 1] == text_index[ - batch_idx][idx]: - continue - char_list.append(self.character[int(text_index[batch_idx][ - idx])]) - if text_prob is not None: - conf_list.append(text_prob[batch_idx][idx]) - else: - conf_list.append(1) - text = ''.join(char_list) - if self.rm_symbol: - comp = re.compile('[^A-Z^a-z^0-9^\u4e00-\u9fa5]') - text = text.lower() - text = comp.sub('', text) - result_list.append((text, np.mean(conf_list).tolist())) - return result_list - - def __call__(self, preds, label=None, *args, **kwargs): - if isinstance(preds, paddle.Tensor): - preds = preds.numpy() - preds_idx = preds.argmax(axis=2) - preds_prob = preds.max(axis=2) - - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) - - if label is None: - return text - label = self.decode(label, is_remove_duplicate=False) - return text, label - - def get_ignored_tokens(self): - return [self.padding_idx] - - -class DistillationSARLabelDecode(SARLabelDecode): - """ - Convert - Convert between text-label and text-index - """ - - def __init__(self, - character_dict_path=None, - use_space_char=False, - model_name=["student"], - key=None, - multi_head=False, - **kwargs): - super(DistillationSARLabelDecode, self).__init__(character_dict_path, - use_space_char) - if not isinstance(model_name, list): - model_name = [model_name] - self.model_name = model_name - - self.key = key - self.multi_head = multi_head - - def __call__(self, preds, label=None, *args, **kwargs): - output = dict() - for name in self.model_name: - pred = preds[name] - if self.key is not None: - pred = pred[self.key] - if self.multi_head and isinstance(pred, dict): - pred = pred['sar'] - output[name] = super().__call__(pred, label=label, *args, **kwargs) - return output - - -class PRENLabelDecode(BaseRecLabelDecode): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False, - **kwargs): - super(PRENLabelDecode, self).__init__(character_dict_path, - use_space_char) - - def add_special_char(self, dict_character): - padding_str = '' # 0 - end_str = '' # 1 - unknown_str = '' # 2 - - dict_character = [padding_str, end_str, unknown_str] + dict_character - self.padding_idx = 0 - self.end_idx = 1 - self.unknown_idx = 2 - - return dict_character - - def decode(self, text_index, text_prob=None): - """ convert text-index into text-label. """ - result_list = [] - batch_size = len(text_index) - - for batch_idx in range(batch_size): - char_list = [] - conf_list = [] - for idx in range(len(text_index[batch_idx])): - if text_index[batch_idx][idx] == self.end_idx: - break - if text_index[batch_idx][idx] in \ - [self.padding_idx, self.unknown_idx]: - continue - char_list.append(self.character[int(text_index[batch_idx][ - idx])]) - if text_prob is not None: - conf_list.append(text_prob[batch_idx][idx]) - else: - conf_list.append(1) - - text = ''.join(char_list) - if len(text) > 0: - result_list.append((text, np.mean(conf_list).tolist())) - else: - # here confidence of empty recog result is 1 - result_list.append(('', 1)) - return result_list - - def __call__(self, preds, label=None, *args, **kwargs): - preds = preds.numpy() - preds_idx = preds.argmax(axis=2) - preds_prob = preds.max(axis=2) - text = self.decode(preds_idx, preds_prob) - if label is None: - return text - label = self.decode(label) - return text, label diff --git a/backend/ppocr/postprocess/sast_postprocess.py b/backend/ppocr/postprocess/sast_postprocess.py deleted file mode 100755 index bee75c05..00000000 --- a/backend/ppocr/postprocess/sast_postprocess.py +++ /dev/null @@ -1,355 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys - -__dir__ = os.path.dirname(__file__) -sys.path.append(__dir__) -sys.path.append(os.path.join(__dir__, '..')) - -import numpy as np -from .locality_aware_nms import nms_locality -import paddle -import cv2 -import time - - -class SASTPostProcess(object): - """ - The post process for SAST. - """ - - def __init__(self, - score_thresh=0.5, - nms_thresh=0.2, - sample_pts_num=2, - shrink_ratio_of_width=0.3, - expand_scale=1.0, - tcl_map_thresh=0.5, - **kwargs): - - self.score_thresh = score_thresh - self.nms_thresh = nms_thresh - self.sample_pts_num = sample_pts_num - self.shrink_ratio_of_width = shrink_ratio_of_width - self.expand_scale = expand_scale - self.tcl_map_thresh = tcl_map_thresh - - # c++ la-nms is faster, but only support python 3.5 - self.is_python35 = False - if sys.version_info.major == 3 and sys.version_info.minor == 5: - self.is_python35 = True - - def point_pair2poly(self, point_pair_list): - """ - Transfer vertical point_pairs into poly point in clockwise. - """ - # constract poly - point_num = len(point_pair_list) * 2 - point_list = [0] * point_num - for idx, point_pair in enumerate(point_pair_list): - point_list[idx] = point_pair[0] - point_list[point_num - 1 - idx] = point_pair[1] - return np.array(point_list).reshape(-1, 2) - - def shrink_quad_along_width(self, - quad, - begin_width_ratio=0., - end_width_ratio=1.): - """ - Generate shrink_quad_along_width. - """ - ratio_pair = np.array( - [[begin_width_ratio], [end_width_ratio]], dtype=np.float32) - p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair - p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair - return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]]) - - def expand_poly_along_width(self, poly, shrink_ratio_of_width=0.3): - """ - expand poly along width. - """ - point_num = poly.shape[0] - left_quad = np.array( - [poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32) - left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \ - (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6) - left_quad_expand = self.shrink_quad_along_width(left_quad, left_ratio, - 1.0) - right_quad = np.array( - [ - poly[point_num // 2 - 2], poly[point_num // 2 - 1], - poly[point_num // 2], poly[point_num // 2 + 1] - ], - dtype=np.float32) - right_ratio = 1.0 + \ - shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \ - (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6) - right_quad_expand = self.shrink_quad_along_width(right_quad, 0.0, - right_ratio) - poly[0] = left_quad_expand[0] - poly[-1] = left_quad_expand[-1] - poly[point_num // 2 - 1] = right_quad_expand[1] - poly[point_num // 2] = right_quad_expand[2] - return poly - - def restore_quad(self, tcl_map, tcl_map_thresh, tvo_map): - """Restore quad.""" - xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh) - xy_text = xy_text[:, ::-1] # (n, 2) - - # Sort the text boxes via the y axis - xy_text = xy_text[np.argsort(xy_text[:, 1])] - - scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0] - scores = scores[:, np.newaxis] - - # Restore - point_num = int(tvo_map.shape[-1] / 2) - assert point_num == 4 - tvo_map = tvo_map[xy_text[:, 1], xy_text[:, 0], :] - xy_text_tile = np.tile(xy_text, (1, point_num)) # (n, point_num * 2) - quads = xy_text_tile - tvo_map - - return scores, quads, xy_text - - def quad_area(self, quad): - """ - compute area of a quad. - """ - edge = [(quad[1][0] - quad[0][0]) * (quad[1][1] + quad[0][1]), - (quad[2][0] - quad[1][0]) * (quad[2][1] + quad[1][1]), - (quad[3][0] - quad[2][0]) * (quad[3][1] + quad[2][1]), - (quad[0][0] - quad[3][0]) * (quad[0][1] + quad[3][1])] - return np.sum(edge) / 2. - - def nms(self, dets): - if self.is_python35: - import lanms - dets = lanms.merge_quadrangle_n9(dets, self.nms_thresh) - else: - dets = nms_locality(dets, self.nms_thresh) - return dets - - def cluster_by_quads_tco(self, tcl_map, tcl_map_thresh, quads, tco_map): - """ - Cluster pixels in tcl_map based on quads. - """ - instance_count = quads.shape[0] + 1 # contain background - instance_label_map = np.zeros(tcl_map.shape[:2], dtype=np.int32) - if instance_count == 1: - return instance_count, instance_label_map - - # predict text center - xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh) - n = xy_text.shape[0] - xy_text = xy_text[:, ::-1] # (n, 2) - tco = tco_map[xy_text[:, 1], xy_text[:, 0], :] # (n, 2) - pred_tc = xy_text - tco - - # get gt text center - m = quads.shape[0] - gt_tc = np.mean(quads, axis=1) # (m, 2) - - pred_tc_tile = np.tile(pred_tc[:, np.newaxis, :], - (1, m, 1)) # (n, m, 2) - gt_tc_tile = np.tile(gt_tc[np.newaxis, :, :], (n, 1, 1)) # (n, m, 2) - dist_mat = np.linalg.norm(pred_tc_tile - gt_tc_tile, axis=2) # (n, m) - xy_text_assign = np.argmin(dist_mat, axis=1) + 1 # (n,) - - instance_label_map[xy_text[:, 1], xy_text[:, 0]] = xy_text_assign - return instance_count, instance_label_map - - def estimate_sample_pts_num(self, quad, xy_text): - """ - Estimate sample points number. - """ - eh = (np.linalg.norm(quad[0] - quad[3]) + - np.linalg.norm(quad[1] - quad[2])) / 2.0 - ew = (np.linalg.norm(quad[0] - quad[1]) + - np.linalg.norm(quad[2] - quad[3])) / 2.0 - - dense_sample_pts_num = max(2, int(ew)) - dense_xy_center_line = xy_text[np.linspace( - 0, - xy_text.shape[0] - 1, - dense_sample_pts_num, - endpoint=True, - dtype=np.float32).astype(np.int32)] - - dense_xy_center_line_diff = dense_xy_center_line[ - 1:] - dense_xy_center_line[:-1] - estimate_arc_len = np.sum( - np.linalg.norm( - dense_xy_center_line_diff, axis=1)) - - sample_pts_num = max(2, int(estimate_arc_len / eh)) - return sample_pts_num - - def detect_sast(self, - tcl_map, - tvo_map, - tbo_map, - tco_map, - ratio_w, - ratio_h, - src_w, - src_h, - shrink_ratio_of_width=0.3, - tcl_map_thresh=0.5, - offset_expand=1.0, - out_strid=4.0): - """ - first resize the tcl_map, tvo_map and tbo_map to the input_size, then restore the polys - """ - # restore quad - scores, quads, xy_text = self.restore_quad(tcl_map, tcl_map_thresh, - tvo_map) - dets = np.hstack((quads, scores)).astype(np.float32, copy=False) - dets = self.nms(dets) - if dets.shape[0] == 0: - return [] - quads = dets[:, :-1].reshape(-1, 4, 2) - - # Compute quad area - quad_areas = [] - for quad in quads: - quad_areas.append(-self.quad_area(quad)) - - # instance segmentation - # instance_count, instance_label_map = cv2.connectedComponents(tcl_map.astype(np.uint8), connectivity=8) - instance_count, instance_label_map = self.cluster_by_quads_tco( - tcl_map, tcl_map_thresh, quads, tco_map) - - # restore single poly with tcl instance. - poly_list = [] - for instance_idx in range(1, instance_count): - xy_text = np.argwhere(instance_label_map == instance_idx)[:, ::-1] - quad = quads[instance_idx - 1] - q_area = quad_areas[instance_idx - 1] - if q_area < 5: - continue - - # - len1 = float(np.linalg.norm(quad[0] - quad[1])) - len2 = float(np.linalg.norm(quad[1] - quad[2])) - min_len = min(len1, len2) - if min_len < 3: - continue - - # filter small CC - if xy_text.shape[0] <= 0: - continue - - # filter low confidence instance - xy_text_scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0] - if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.1: - # if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.05: - continue - - # sort xy_text - left_center_pt = np.array( - [[(quad[0, 0] + quad[-1, 0]) / 2.0, - (quad[0, 1] + quad[-1, 1]) / 2.0]]) # (1, 2) - right_center_pt = np.array( - [[(quad[1, 0] + quad[2, 0]) / 2.0, - (quad[1, 1] + quad[2, 1]) / 2.0]]) # (1, 2) - proj_unit_vec = (right_center_pt - left_center_pt) / \ - (np.linalg.norm(right_center_pt - left_center_pt) + 1e-6) - proj_value = np.sum(xy_text * proj_unit_vec, axis=1) - xy_text = xy_text[np.argsort(proj_value)] - - # Sample pts in tcl map - if self.sample_pts_num == 0: - sample_pts_num = self.estimate_sample_pts_num(quad, xy_text) - else: - sample_pts_num = self.sample_pts_num - xy_center_line = xy_text[np.linspace( - 0, - xy_text.shape[0] - 1, - sample_pts_num, - endpoint=True, - dtype=np.float32).astype(np.int32)] - - point_pair_list = [] - for x, y in xy_center_line: - # get corresponding offset - offset = tbo_map[y, x, :].reshape(2, 2) - if offset_expand != 1.0: - offset_length = np.linalg.norm( - offset, axis=1, keepdims=True) - expand_length = np.clip( - offset_length * (offset_expand - 1), - a_min=0.5, - a_max=3.0) - offset_detal = offset / offset_length * expand_length - offset = offset + offset_detal - # original point - ori_yx = np.array([y, x], dtype=np.float32) - point_pair = (ori_yx + offset)[:, ::-1] * out_strid / np.array( - [ratio_w, ratio_h]).reshape(-1, 2) - point_pair_list.append(point_pair) - - # ndarry: (x, 2), expand poly along width - detected_poly = self.point_pair2poly(point_pair_list) - detected_poly = self.expand_poly_along_width(detected_poly, - shrink_ratio_of_width) - detected_poly[:, 0] = np.clip( - detected_poly[:, 0], a_min=0, a_max=src_w) - detected_poly[:, 1] = np.clip( - detected_poly[:, 1], a_min=0, a_max=src_h) - poly_list.append(detected_poly) - - return poly_list - - def __call__(self, outs_dict, shape_list): - score_list = outs_dict['f_score'] - border_list = outs_dict['f_border'] - tvo_list = outs_dict['f_tvo'] - tco_list = outs_dict['f_tco'] - if isinstance(score_list, paddle.Tensor): - score_list = score_list.numpy() - border_list = border_list.numpy() - tvo_list = tvo_list.numpy() - tco_list = tco_list.numpy() - - img_num = len(shape_list) - poly_lists = [] - for ino in range(img_num): - p_score = score_list[ino].transpose((1, 2, 0)) - p_border = border_list[ino].transpose((1, 2, 0)) - p_tvo = tvo_list[ino].transpose((1, 2, 0)) - p_tco = tco_list[ino].transpose((1, 2, 0)) - src_h, src_w, ratio_h, ratio_w = shape_list[ino] - - poly_list = self.detect_sast( - p_score, - p_tvo, - p_border, - p_tco, - ratio_w, - ratio_h, - src_w, - src_h, - shrink_ratio_of_width=self.shrink_ratio_of_width, - tcl_map_thresh=self.tcl_map_thresh, - offset_expand=self.expand_scale) - poly_lists.append({'points': np.array(poly_list)}) - - return poly_lists diff --git a/backend/ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py b/backend/ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py deleted file mode 100644 index 1d55d13d..00000000 --- a/backend/ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle - - -class VQAReTokenLayoutLMPostProcess(object): - """ Convert between text-label and text-index """ - - def __init__(self, **kwargs): - super(VQAReTokenLayoutLMPostProcess, self).__init__() - - def __call__(self, preds, label=None, *args, **kwargs): - if label is not None: - return self._metric(preds, label) - else: - return self._infer(preds, *args, **kwargs) - - def _metric(self, preds, label): - return preds['pred_relations'], label[6], label[5] - - def _infer(self, preds, *args, **kwargs): - ser_results = kwargs['ser_results'] - entity_idx_dict_batch = kwargs['entity_idx_dict_batch'] - pred_relations = preds['pred_relations'] - - # merge relations and ocr info - results = [] - for pred_relation, ser_result, entity_idx_dict in zip( - pred_relations, ser_results, entity_idx_dict_batch): - result = [] - used_tail_id = [] - for relation in pred_relation: - if relation['tail_id'] in used_tail_id: - continue - used_tail_id.append(relation['tail_id']) - ocr_info_head = ser_result[entity_idx_dict[relation['head_id']]] - ocr_info_tail = ser_result[entity_idx_dict[relation['tail_id']]] - result.append((ocr_info_head, ocr_info_tail)) - results.append(result) - return results diff --git a/backend/ppocr/postprocess/vqa_token_ser_layoutlm_postprocess.py b/backend/ppocr/postprocess/vqa_token_ser_layoutlm_postprocess.py deleted file mode 100644 index 782cdea6..00000000 --- a/backend/ppocr/postprocess/vqa_token_ser_layoutlm_postprocess.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import paddle -from ppocr.utils.utility import load_vqa_bio_label_maps - - -class VQASerTokenLayoutLMPostProcess(object): - """ Convert between text-label and text-index """ - - def __init__(self, class_path, **kwargs): - super(VQASerTokenLayoutLMPostProcess, self).__init__() - label2id_map, self.id2label_map = load_vqa_bio_label_maps(class_path) - - self.label2id_map_for_draw = dict() - for key in label2id_map: - if key.startswith("I-"): - self.label2id_map_for_draw[key] = label2id_map["B" + key[1:]] - else: - self.label2id_map_for_draw[key] = label2id_map[key] - - self.id2label_map_for_show = dict() - for key in self.label2id_map_for_draw: - val = self.label2id_map_for_draw[key] - if key == "O": - self.id2label_map_for_show[val] = key - if key.startswith("B-") or key.startswith("I-"): - self.id2label_map_for_show[val] = key[2:] - else: - self.id2label_map_for_show[val] = key - - def __call__(self, preds, batch=None, *args, **kwargs): - if isinstance(preds, paddle.Tensor): - preds = preds.numpy() - - if batch is not None: - return self._metric(preds, batch[1]) - else: - return self._infer(preds, **kwargs) - - def _metric(self, preds, label): - pred_idxs = preds.argmax(axis=2) - decode_out_list = [[] for _ in range(pred_idxs.shape[0])] - label_decode_out_list = [[] for _ in range(pred_idxs.shape[0])] - - for i in range(pred_idxs.shape[0]): - for j in range(pred_idxs.shape[1]): - if label[i, j] != -100: - label_decode_out_list[i].append(self.id2label_map[label[i, - j]]) - decode_out_list[i].append(self.id2label_map[pred_idxs[i, - j]]) - return decode_out_list, label_decode_out_list - - def _infer(self, preds, attention_masks, segment_offset_ids, ocr_infos): - results = [] - - for pred, attention_mask, segment_offset_id, ocr_info in zip( - preds, attention_masks, segment_offset_ids, ocr_infos): - pred = np.argmax(pred, axis=1) - pred = [self.id2label_map[idx] for idx in pred] - - for idx in range(len(segment_offset_id)): - if idx == 0: - start_id = 0 - else: - start_id = segment_offset_id[idx - 1] - - end_id = segment_offset_id[idx] - - curr_pred = pred[start_id:end_id] - curr_pred = [self.label2id_map_for_draw[p] for p in curr_pred] - - if len(curr_pred) <= 0: - pred_id = 0 - else: - counts = np.bincount(curr_pred) - pred_id = np.argmax(counts) - ocr_info[idx]["pred_id"] = int(pred_id) - ocr_info[idx]["pred"] = self.id2label_map_for_show[int(pred_id)] - results.append(ocr_info) - return results diff --git a/backend/ppocr/utils/__init__.py b/backend/ppocr/utils/__init__.py deleted file mode 100755 index abf198b9..00000000 --- a/backend/ppocr/utils/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/backend/ppocr/utils/dict/ar_dict.txt b/backend/ppocr/utils/dict/ar_dict.txt deleted file mode 100644 index fc638029..00000000 --- a/backend/ppocr/utils/dict/ar_dict.txt +++ /dev/null @@ -1,117 +0,0 @@ -a -r -b -i -c -_ -m -g -/ -1 -0 -I -L -S -V -R -C -2 -v -l -6 -3 -9 -. -j -p -ا -ل -م -ر -ج -و -ح -ي -ة -5 -8 -7 -أ -ب -ض -4 -ك -س -ه -ث -ن -ط -ع -ت -غ -خ -ف -ئ -ز -إ -د -ص -ظ -ذ -ش -ى -ق -ؤ -آ -ء -s -e -n -w -t -u -z -d -A -N -G -h -o -E -T -H -O -B -y -F -U -J -X -W -P -Z -M -k -q -Y -Q -D -f -K -x -' -% -- -# -@ -! -& -$ -, -: -é -? -+ -É -( - diff --git a/backend/ppocr/utils/dict/arabic_dict.txt b/backend/ppocr/utils/dict/arabic_dict.txt deleted file mode 100644 index 916d421c..00000000 --- a/backend/ppocr/utils/dict/arabic_dict.txt +++ /dev/null @@ -1,161 +0,0 @@ -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -ء -آ -أ -ؤ -إ -ئ -ا -ب -ة -ت -ث -ج -ح -خ -د -ذ -ر -ز -س -ش -ص -ض -ط -ظ -ع -غ -ف -ق -ك -ل -م -ن -ه -و -ى -ي -ً -ٌ -ٍ -َ -ُ -ِ -ّ -ْ -ٓ -ٔ -ٰ -ٱ -ٹ -پ -چ -ڈ -ڑ -ژ -ک -ڭ -گ -ں -ھ -ۀ -ہ -ۂ -ۃ -ۆ -ۇ -ۈ -ۋ -ی -ې -ے -ۓ -ە -١ -٢ -٣ -٤ -٥ -٦ -٧ -٨ -٩ diff --git a/backend/ppocr/utils/dict/be_dict.txt b/backend/ppocr/utils/dict/be_dict.txt deleted file mode 100644 index f8458baa..00000000 --- a/backend/ppocr/utils/dict/be_dict.txt +++ /dev/null @@ -1,145 +0,0 @@ -b -e -_ -i -m -g -/ -2 -0 -I -L -S -V -R -C -1 -v -a -l -6 -9 -4 -3 -. -j -p -п -а -з -б -у -г -н -ц -ь -8 -м -л -і -о -ў -ы -7 -5 -М -х -с -р -ф -я -е -д -ж -ю -ч -й -к -Д -в -Б -т -І -ш -ё -э -К -Л -Н -А -Ж -Г -В -П -З -Е -О -Р -С -У -Ё -Й -Т -Ч -Э -Ц -Ю -Ш -Ф -Х -Я -Ь -Ы -Ў -s -c -n -w -M -o -t -T -E -A -B -u -h -y -k -r -H -d -Y -O -U -F -f -x -D -G -N -K -P -z -J -X -W -Z -Q -% -- -q -@ -' -! -# -& -, -: -$ -( -? -é -+ -É - diff --git a/backend/ppocr/utils/dict/bg_dict.txt b/backend/ppocr/utils/dict/bg_dict.txt deleted file mode 100644 index 84713c37..00000000 --- a/backend/ppocr/utils/dict/bg_dict.txt +++ /dev/null @@ -1,140 +0,0 @@ -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -А -Б -В -Г -Д -Е -Ж -З -И -Й -К -Л -М -Н -О -П -Р -С -Т -У -Ф -Х -Ц -Ч -Ш -Щ -Ъ -Ю -Я -а -б -в -г -д -е -ж -з -и -й -к -л -м -н -о -п -р -с -т -у -ф -х -ц -ч -ш -щ -ъ -ь -ю -я - diff --git a/backend/ppocr/utils/dict/ch_dict.txt b/backend/ppocr/utils/dict/ch_dict.txt deleted file mode 100644 index 84b885d8..00000000 --- a/backend/ppocr/utils/dict/ch_dict.txt +++ /dev/null @@ -1,6623 +0,0 @@ -' -疗 -绚 -诚 -娇 -溜 -题 -贿 -者 -廖 -更 -纳 -加 -奉 -公 -一 -就 -汴 -计 -与 -路 -房 -原 -妇 -2 -0 -8 -- -7 -其 -> -: -] -, -, -骑 -刈 -全 -消 -昏 -傈 -安 -久 -钟 -嗅 -不 -影 -处 -驽 -蜿 -资 -关 -椤 -地 -瘸 -专 -问 -忖 -票 -嫉 -炎 -韵 -要 -月 -田 -节 -陂 -鄙 -捌 -备 -拳 -伺 -眼 -网 -盎 -大 -傍 -心 -东 -愉 -汇 -蹿 -科 -每 -业 -里 -航 -晏 -字 -平 -录 -先 -1 -3 -彤 -鲶 -产 -稍 -督 -腴 -有 -象 -岳 -注 -绍 -在 -泺 -文 -定 -核 -名 -水 -过 -理 -让 -偷 -率 -等 -这 -发 -” -为 -含 -肥 -酉 -相 -鄱 -七 -编 -猥 -锛 -日 -镀 -蒂 -掰 -倒 -辆 -栾 -栗 -综 -涩 -州 -雌 -滑 -馀 -了 -机 -块 -司 -宰 -甙 -兴 -矽 -抚 -保 -用 -沧 -秩 -如 -收 -息 -滥 -页 -疑 -埠 -! -! -姥 -异 -橹 -钇 -向 -下 -跄 -的 -椴 -沫 -国 -绥 -獠 -报 -开 -民 -蜇 -何 -分 -凇 -长 -讥 -藏 -掏 -施 -羽 -中 -讲 -派 -嘟 -人 -提 -浼 -间 -世 -而 -古 -多 -倪 -唇 -饯 -控 -庚 -首 -赛 -蜓 -味 -断 -制 -觉 -技 -替 -艰 -溢 -潮 -夕 -钺 -外 -摘 -枋 -动 -双 -单 -啮 -户 -枇 -确 -锦 -曜 -杜 -或 -能 -效 -霜 -盒 -然 -侗 -电 -晁 -放 -步 -鹃 -新 -杖 -蜂 -吒 -濂 -瞬 -评 -总 -隍 -对 -独 -合 -也 -是 -府 -青 -天 -诲 -墙 -组 -滴 -级 -邀 -帘 -示 -已 -时 -骸 -仄 -泅 -和 -遨 -店 -雇 -疫 -持 -巍 -踮 -境 -只 -亨 -目 -鉴 -崤 -闲 -体 -泄 -杂 -作 -般 -轰 -化 -解 -迂 -诿 -蛭 -璀 -腾 -告 -版 -服 -省 -师 -小 -规 -程 -线 -海 -办 -引 -二 -桧 -牌 -砺 -洄 -裴 -修 -图 -痫 -胡 -许 -犊 -事 -郛 -基 -柴 -呼 -食 -研 -奶 -律 -蛋 -因 -葆 -察 -戏 -褒 -戒 -再 -李 -骁 -工 -貂 -油 -鹅 -章 -啄 -休 -场 -给 -睡 -纷 -豆 -器 -捎 -说 -敏 -学 -会 -浒 -设 -诊 -格 -廓 -查 -来 -霓 -室 -溆 -¢ -诡 -寥 -焕 -舜 -柒 -狐 -回 -戟 -砾 -厄 -实 -翩 -尿 -五 -入 -径 -惭 -喹 -股 -宇 -篝 -| -; -美 -期 -云 -九 -祺 -扮 -靠 -锝 -槌 -系 -企 -酰 -阊 -暂 -蚕 -忻 -豁 -本 -羹 -执 -条 -钦 -H -獒 -限 -进 -季 -楦 -于 -芘 -玖 -铋 -茯 -未 -答 -粘 -括 -样 -精 -欠 -矢 -甥 -帷 -嵩 -扣 -令 -仔 -风 -皈 -行 -支 -部 -蓉 -刮 -站 -蜡 -救 -钊 -汗 -松 -嫌 -成 -可 -. -鹤 -院 -从 -交 -政 -怕 -活 -调 -球 -局 -验 -髌 -第 -韫 -谗 -串 -到 -圆 -年 -米 -/ -* -友 -忿 -检 -区 -看 -自 -敢 -刃 -个 -兹 -弄 -流 -留 -同 -没 -齿 -星 -聆 -轼 -湖 -什 -三 -建 -蛔 -儿 -椋 -汕 -震 -颧 -鲤 -跟 -力 -情 -璺 -铨 -陪 -务 -指 -族 -训 -滦 -鄣 -濮 -扒 -商 -箱 -十 -召 -慷 -辗 -所 -莞 -管 -护 -臭 -横 -硒 -嗓 -接 -侦 -六 -露 -党 -馋 -驾 -剖 -高 -侬 -妪 -幂 -猗 -绺 -骐 -央 -酐 -孝 -筝 -课 -徇 -缰 -门 -男 -西 -项 -句 -谙 -瞒 -秃 -篇 -教 -碲 -罚 -声 -呐 -景 -前 -富 -嘴 -鳌 -稀 -免 -朋 -啬 -睐 -去 -赈 -鱼 -住 -肩 -愕 -速 -旁 -波 -厅 -健 -茼 -厥 -鲟 -谅 -投 -攸 -炔 -数 -方 -击 -呋 -谈 -绩 -别 -愫 -僚 -躬 -鹧 -胪 -炳 -招 -喇 -膨 -泵 -蹦 -毛 -结 -5 -4 -谱 -识 -陕 -粽 -婚 -拟 -构 -且 -搜 -任 -潘 -比 -郢 -妨 -醪 -陀 -桔 -碘 -扎 -选 -哈 -骷 -楷 -亿 -明 -缆 -脯 -监 -睫 -逻 -婵 -共 -赴 -淝 -凡 -惦 -及 -达 -揖 -谩 -澹 -减 -焰 -蛹 -番 -祁 -柏 -员 -禄 -怡 -峤 -龙 -白 -叽 -生 -闯 -起 -细 -装 -谕 -竟 -聚 -钙 -上 -导 -渊 -按 -艾 -辘 -挡 -耒 -盹 -饪 -臀 -记 -邮 -蕙 -受 -各 -医 -搂 -普 -滇 -朗 -茸 -带 -翻 -酚 -( -光 -堤 -墟 -蔷 -万 -幻 -〓 -瑙 -辈 -昧 -盏 -亘 -蛀 -吉 -铰 -请 -子 -假 -闻 -税 -井 -诩 -哨 -嫂 -好 -面 -琐 -校 -馊 -鬣 -缂 -营 -访 -炖 -占 -农 -缀 -否 -经 -钚 -棵 -趟 -张 -亟 -吏 -茶 -谨 -捻 -论 -迸 -堂 -玉 -信 -吧 -瞠 -乡 -姬 -寺 -咬 -溏 -苄 -皿 -意 -赉 -宝 -尔 -钰 -艺 -特 -唳 -踉 -都 -荣 -倚 -登 -荐 -丧 -奇 -涵 -批 -炭 -近 -符 -傩 -感 -道 -着 -菊 -虹 -仲 -众 -懈 -濯 -颞 -眺 -南 -释 -北 -缝 -标 -既 -茗 -整 -撼 -迤 -贲 -挎 -耱 -拒 -某 -妍 -卫 -哇 -英 -矶 -藩 -治 -他 -元 -领 -膜 -遮 -穗 -蛾 -飞 -荒 -棺 -劫 -么 -市 -火 -温 -拈 -棚 -洼 -转 -果 -奕 -卸 -迪 -伸 -泳 -斗 -邡 -侄 -涨 -屯 -萋 -胭 -氡 -崮 -枞 -惧 -冒 -彩 -斜 -手 -豚 -随 -旭 -淑 -妞 -形 -菌 -吲 -沱 -争 -驯 -歹 -挟 -兆 -柱 -传 -至 -包 -内 -响 -临 -红 -功 -弩 -衡 -寂 -禁 -老 -棍 -耆 -渍 -织 -害 -氵 -渑 -布 -载 -靥 -嗬 -虽 -苹 -咨 -娄 -库 -雉 -榜 -帜 -嘲 -套 -瑚 -亲 -簸 -欧 -边 -6 -腿 -旮 -抛 -吹 -瞳 -得 -镓 -梗 -厨 -继 -漾 -愣 -憨 -士 -策 -窑 -抑 -躯 -襟 -脏 -参 -贸 -言 -干 -绸 -鳄 -穷 -藜 -音 -折 -详 -) -举 -悍 -甸 -癌 -黎 -谴 -死 -罩 -迁 -寒 -驷 -袖 -媒 -蒋 -掘 -模 -纠 -恣 -观 -祖 -蛆 -碍 -位 -稿 -主 -澧 -跌 -筏 -京 -锏 -帝 -贴 -证 -糠 -才 -黄 -鲸 -略 -炯 -饱 -四 -出 -园 -犀 -牧 -容 -汉 -杆 -浈 -汰 -瑷 -造 -虫 -瘩 -怪 -驴 -济 -应 -花 -沣 -谔 -夙 -旅 -价 -矿 -以 -考 -s -u -呦 -晒 -巡 -茅 -准 -肟 -瓴 -詹 -仟 -褂 -译 -桌 -混 -宁 -怦 -郑 -抿 -些 -余 -鄂 -饴 -攒 -珑 -群 -阖 -岔 -琨 -藓 -预 -环 -洮 -岌 -宀 -杲 -瀵 -最 -常 -囡 -周 -踊 -女 -鼓 -袭 -喉 -简 -范 -薯 -遐 -疏 -粱 -黜 -禧 -法 -箔 -斤 -遥 -汝 -奥 -直 -贞 -撑 -置 -绱 -集 -她 -馅 -逗 -钧 -橱 -魉 -[ -恙 -躁 -唤 -9 -旺 -膘 -待 -脾 -惫 -购 -吗 -依 -盲 -度 -瘿 -蠖 -俾 -之 -镗 -拇 -鲵 -厝 -簧 -续 -款 -展 -啃 -表 -剔 -品 -钻 -腭 -损 -清 -锶 -统 -涌 -寸 -滨 -贪 -链 -吠 -冈 -伎 -迥 -咏 -吁 -览 -防 -迅 -失 -汾 -阔 -逵 -绀 -蔑 -列 -川 -凭 -努 -熨 -揪 -利 -俱 -绉 -抢 -鸨 -我 -即 -责 -膦 -易 -毓 -鹊 -刹 -玷 -岿 -空 -嘞 -绊 -排 -术 -估 -锷 -违 -们 -苟 -铜 -播 -肘 -件 -烫 -审 -鲂 -广 -像 -铌 -惰 -铟 -巳 -胍 -鲍 -康 -憧 -色 -恢 -想 -拷 -尤 -疳 -知 -S -Y -F -D -A -峄 -裕 -帮 -握 -搔 -氐 -氘 -难 -墒 -沮 -雨 -叁 -缥 -悴 -藐 -湫 -娟 -苑 -稠 -颛 -簇 -后 -阕 -闭 -蕤 -缚 -怎 -佞 -码 -嘤 -蔡 -痊 -舱 -螯 -帕 -赫 -昵 -升 -烬 -岫 -、 -疵 -蜻 -髁 -蕨 -隶 -烛 -械 -丑 -盂 -梁 -强 -鲛 -由 -拘 -揉 -劭 -龟 -撤 -钩 -呕 -孛 -费 -妻 -漂 -求 -阑 -崖 -秤 -甘 -通 -深 -补 -赃 -坎 -床 -啪 -承 -吼 -量 -暇 -钼 -烨 -阂 -擎 -脱 -逮 -称 -P -神 -属 -矗 -华 -届 -狍 -葑 -汹 -育 -患 -窒 -蛰 -佼 -静 -槎 -运 -鳗 -庆 -逝 -曼 -疱 -克 -代 -官 -此 -麸 -耧 -蚌 -晟 -例 -础 -榛 -副 -测 -唰 -缢 -迹 -灬 -霁 -身 -岁 -赭 -扛 -又 -菡 -乜 -雾 -板 -读 -陷 -徉 -贯 -郁 -虑 -变 -钓 -菜 -圾 -现 -琢 -式 -乐 -维 -渔 -浜 -左 -吾 -脑 -钡 -警 -T -啵 -拴 -偌 -漱 -湿 -硕 -止 -骼 -魄 -积 -燥 -联 -踢 -玛 -则 -窿 -见 -振 -畿 -送 -班 -钽 -您 -赵 -刨 -印 -讨 -踝 -籍 -谡 -舌 -崧 -汽 -蔽 -沪 -酥 -绒 -怖 -财 -帖 -肱 -私 -莎 -勋 -羔 -霸 -励 -哼 -帐 -将 -帅 -渠 -纪 -婴 -娩 -岭 -厘 -滕 -吻 -伤 -坝 -冠 -戊 -隆 -瘁 -介 -涧 -物 -黍 -并 -姗 -奢 -蹑 -掣 -垸 -锴 -命 -箍 -捉 -病 -辖 -琰 -眭 -迩 -艘 -绌 -繁 -寅 -若 -毋 -思 -诉 -类 -诈 -燮 -轲 -酮 -狂 -重 -反 -职 -筱 -县 -委 -磕 -绣 -奖 -晋 -濉 -志 -徽 -肠 -呈 -獐 -坻 -口 -片 -碰 -几 -村 -柿 -劳 -料 -获 -亩 -惕 -晕 -厌 -号 -罢 -池 -正 -鏖 -煨 -家 -棕 -复 -尝 -懋 -蜥 -锅 -岛 -扰 -队 -坠 -瘾 -钬 -@ -卧 -疣 -镇 -譬 -冰 -彷 -频 -黯 -据 -垄 -采 -八 -缪 -瘫 -型 -熹 -砰 -楠 -襁 -箐 -但 -嘶 -绳 -啤 -拍 -盥 -穆 -傲 -洗 -盯 -塘 -怔 -筛 -丿 -台 -恒 -喂 -葛 -永 -¥ -烟 -酒 -桦 -书 -砂 -蚝 -缉 -态 -瀚 -袄 -圳 -轻 -蛛 -超 -榧 -遛 -姒 -奘 -铮 -右 -荽 -望 -偻 -卡 -丶 -氰 -附 -做 -革 -索 -戚 -坨 -桷 -唁 -垅 -榻 -岐 -偎 -坛 -莨 -山 -殊 -微 -骇 -陈 -爨 -推 -嗝 -驹 -澡 -藁 -呤 -卤 -嘻 -糅 -逛 -侵 -郓 -酌 -德 -摇 -※ -鬃 -被 -慨 -殡 -羸 -昌 -泡 -戛 -鞋 -河 -宪 -沿 -玲 -鲨 -翅 -哽 -源 -铅 -语 -照 -邯 -址 -荃 -佬 -顺 -鸳 -町 -霭 -睾 -瓢 -夸 -椁 -晓 -酿 -痈 -咔 -侏 -券 -噎 -湍 -签 -嚷 -离 -午 -尚 -社 -锤 -背 -孟 -使 -浪 -缦 -潍 -鞅 -军 -姹 -驶 -笑 -鳟 -鲁 -》 -孽 -钜 -绿 -洱 -礴 -焯 -椰 -颖 -囔 -乌 -孔 -巴 -互 -性 -椽 -哞 -聘 -昨 -早 -暮 -胶 -炀 -隧 -低 -彗 -昝 -铁 -呓 -氽 -藉 -喔 -癖 -瑗 -姨 -权 -胱 -韦 -堑 -蜜 -酋 -楝 -砝 -毁 -靓 -歙 -锲 -究 -屋 -喳 -骨 -辨 -碑 -武 -鸠 -宫 -辜 -烊 -适 -坡 -殃 -培 -佩 -供 -走 -蜈 -迟 -翼 -况 -姣 -凛 -浔 -吃 -飘 -债 -犟 -金 -促 -苛 -崇 -坂 -莳 -畔 -绂 -兵 -蠕 -斋 -根 -砍 -亢 -欢 -恬 -崔 -剁 -餐 -榫 -快 -扶 -‖ -濒 -缠 -鳜 -当 -彭 -驭 -浦 -篮 -昀 -锆 -秸 -钳 -弋 -娣 -瞑 -夷 -龛 -苫 -拱 -致 -% -嵊 -障 -隐 -弑 -初 -娓 -抉 -汩 -累 -蓖 -" -唬 -助 -苓 -昙 -押 -毙 -破 -城 -郧 -逢 -嚏 -獭 -瞻 -溱 -婿 -赊 -跨 -恼 -璧 -萃 -姻 -貉 -灵 -炉 -密 -氛 -陶 -砸 -谬 -衔 -点 -琛 -沛 -枳 -层 -岱 -诺 -脍 -榈 -埂 -征 -冷 -裁 -打 -蹴 -素 -瘘 -逞 -蛐 -聊 -激 -腱 -萘 -踵 -飒 -蓟 -吆 -取 -咙 -簋 -涓 -矩 -曝 -挺 -揣 -座 -你 -史 -舵 -焱 -尘 -苏 -笈 -脚 -溉 -榨 -诵 -樊 -邓 -焊 -义 -庶 -儋 -蟋 -蒲 -赦 -呷 -杞 -诠 -豪 -还 -试 -颓 -茉 -太 -除 -紫 -逃 -痴 -草 -充 -鳕 -珉 -祗 -墨 -渭 -烩 -蘸 -慕 -璇 -镶 -穴 -嵘 -恶 -骂 -险 -绋 -幕 -碉 -肺 -戳 -刘 -潞 -秣 -纾 -潜 -銮 -洛 -须 -罘 -销 -瘪 -汞 -兮 -屉 -r -林 -厕 -质 -探 -划 -狸 -殚 -善 -煊 -烹 -〒 -锈 -逯 -宸 -辍 -泱 -柚 -袍 -远 -蹋 -嶙 -绝 -峥 -娥 -缍 -雀 -徵 -认 -镱 -谷 -= -贩 -勉 -撩 -鄯 -斐 -洋 -非 -祚 -泾 -诒 -饿 -撬 -威 -晷 -搭 -芍 -锥 -笺 -蓦 -候 -琊 -档 -礁 -沼 -卵 -荠 -忑 -朝 -凹 -瑞 -头 -仪 -弧 -孵 -畏 -铆 -突 -衲 -车 -浩 -气 -茂 -悖 -厢 -枕 -酝 -戴 -湾 -邹 -飚 -攘 -锂 -写 -宵 -翁 -岷 -无 -喜 -丈 -挑 -嗟 -绛 -殉 -议 -槽 -具 -醇 -淞 -笃 -郴 -阅 -饼 -底 -壕 -砚 -弈 -询 -缕 -庹 -翟 -零 -筷 -暨 -舟 -闺 -甯 -撞 -麂 -茌 -蔼 -很 -珲 -捕 -棠 -角 -阉 -媛 -娲 -诽 -剿 -尉 -爵 -睬 -韩 -诰 -匣 -危 -糍 -镯 -立 -浏 -阳 -少 -盆 -舔 -擘 -匪 -申 -尬 -铣 -旯 -抖 -赘 -瓯 -居 -ˇ -哮 -游 -锭 -茏 -歌 -坏 -甚 -秒 -舞 -沙 -仗 -劲 -潺 -阿 -燧 -郭 -嗖 -霏 -忠 -材 -奂 -耐 -跺 -砀 -输 -岖 -媳 -氟 -极 -摆 -灿 -今 -扔 -腻 -枝 -奎 -药 -熄 -吨 -话 -q -额 -慑 -嘌 -协 -喀 -壳 -埭 -视 -著 -於 -愧 -陲 -翌 -峁 -颅 -佛 -腹 -聋 -侯 -咎 -叟 -秀 -颇 -存 -较 -罪 -哄 -岗 -扫 -栏 -钾 -羌 -己 -璨 -枭 -霉 -煌 -涸 -衿 -键 -镝 -益 -岢 -奏 -连 -夯 -睿 -冥 -均 -糖 -狞 -蹊 -稻 -爸 -刿 -胥 -煜 -丽 -肿 -璃 -掸 -跚 -灾 -垂 -樾 -濑 -乎 -莲 -窄 -犹 -撮 -战 -馄 -软 -络 -显 -鸢 -胸 -宾 -妲 -恕 -埔 -蝌 -份 -遇 -巧 -瞟 -粒 -恰 -剥 -桡 -博 -讯 -凯 -堇 -阶 -滤 -卖 -斌 -骚 -彬 -兑 -磺 -樱 -舷 -两 -娱 -福 -仃 -差 -找 -桁 -÷ -净 -把 -阴 -污 -戬 -雷 -碓 -蕲 -楚 -罡 -焖 -抽 -妫 -咒 -仑 -闱 -尽 -邑 -菁 -爱 -贷 -沥 -鞑 -牡 -嗉 -崴 -骤 -塌 -嗦 -订 -拮 -滓 -捡 -锻 -次 -坪 -杩 -臃 -箬 -融 -珂 -鹗 -宗 -枚 -降 -鸬 -妯 -阄 -堰 -盐 -毅 -必 -杨 -崃 -俺 -甬 -状 -莘 -货 -耸 -菱 -腼 -铸 -唏 -痤 -孚 -澳 -懒 -溅 -翘 -疙 -杷 -淼 -缙 -骰 -喊 -悉 -砻 -坷 -艇 -赁 -界 -谤 -纣 -宴 -晃 -茹 -归 -饭 -梢 -铡 -街 -抄 -肼 -鬟 -苯 -颂 -撷 -戈 -炒 -咆 -茭 -瘙 -负 -仰 -客 -琉 -铢 -封 -卑 -珥 -椿 -镧 -窨 -鬲 -寿 -御 -袤 -铃 -萎 -砖 -餮 -脒 -裳 -肪 -孕 -嫣 -馗 -嵇 -恳 -氯 -江 -石 -褶 -冢 -祸 -阻 -狈 -羞 -银 -靳 -透 -咳 -叼 -敷 -芷 -啥 -它 -瓤 -兰 -痘 -懊 -逑 -肌 -往 -捺 -坊 -甩 -呻 -〃 -沦 -忘 -膻 -祟 -菅 -剧 -崆 -智 -坯 -臧 -霍 -墅 -攻 -眯 -倘 -拢 -骠 -铐 -庭 -岙 -瓠 -′ -缺 -泥 -迢 -捶 -? -? -郏 -喙 -掷 -沌 -纯 -秘 -种 -听 -绘 -固 -螨 -团 -香 -盗 -妒 -埚 -蓝 -拖 -旱 -荞 -铀 -血 -遏 -汲 -辰 -叩 -拽 -幅 -硬 -惶 -桀 -漠 -措 -泼 -唑 -齐 -肾 -念 -酱 -虚 -屁 -耶 -旗 -砦 -闵 -婉 -馆 -拭 -绅 -韧 -忏 -窝 -醋 -葺 -顾 -辞 -倜 -堆 -辋 -逆 -玟 -贱 -疾 -董 -惘 -倌 -锕 -淘 -嘀 -莽 -俭 -笏 -绑 -鲷 -杈 -择 -蟀 -粥 -嗯 -驰 -逾 -案 -谪 -褓 -胫 -哩 -昕 -颚 -鲢 -绠 -躺 -鹄 -崂 -儒 -俨 -丝 -尕 -泌 -啊 -萸 -彰 -幺 -吟 -骄 -苣 -弦 -脊 -瑰 -〈 -诛 -镁 -析 -闪 -剪 -侧 -哟 -框 -螃 -守 -嬗 -燕 -狭 -铈 -缮 -概 -迳 -痧 -鲲 -俯 -售 -笼 -痣 -扉 -挖 -满 -咋 -援 -邱 -扇 -歪 -便 -玑 -绦 -峡 -蛇 -叨 -〖 -泽 -胃 -斓 -喋 -怂 -坟 -猪 -该 -蚬 -炕 -弥 -赞 -棣 -晔 -娠 -挲 -狡 -创 -疖 -铕 -镭 -稷 -挫 -弭 -啾 -翔 -粉 -履 -苘 -哦 -楼 -秕 -铂 -土 -锣 -瘟 -挣 -栉 -习 -享 -桢 -袅 -磨 -桂 -谦 -延 -坚 -蔚 -噗 -署 -谟 -猬 -钎 -恐 -嬉 -雒 -倦 -衅 -亏 -璩 -睹 -刻 -殿 -王 -算 -雕 -麻 -丘 -柯 -骆 -丸 -塍 -谚 -添 -鲈 -垓 -桎 -蚯 -芥 -予 -飕 -镦 -谌 -窗 -醚 -菀 -亮 -搪 -莺 -蒿 -羁 -足 -J -真 -轶 -悬 -衷 -靛 -翊 -掩 -哒 -炅 -掐 -冼 -妮 -l -谐 -稚 -荆 -擒 -犯 -陵 -虏 -浓 -崽 -刍 -陌 -傻 -孜 -千 -靖 -演 -矜 -钕 -煽 -杰 -酗 -渗 -伞 -栋 -俗 -泫 -戍 -罕 -沾 -疽 -灏 -煦 -芬 -磴 -叱 -阱 -榉 -湃 -蜀 -叉 -醒 -彪 -租 -郡 -篷 -屎 -良 -垢 -隗 -弱 -陨 -峪 -砷 -掴 -颁 -胎 -雯 -绵 -贬 -沐 -撵 -隘 -篙 -暖 -曹 -陡 -栓 -填 -臼 -彦 -瓶 -琪 -潼 -哪 -鸡 -摩 -啦 -俟 -锋 -域 -耻 -蔫 -疯 -纹 -撇 -毒 -绶 -痛 -酯 -忍 -爪 -赳 -歆 -嘹 -辕 -烈 -册 -朴 -钱 -吮 -毯 -癜 -娃 -谀 -邵 -厮 -炽 -璞 -邃 -丐 -追 -词 -瓒 -忆 -轧 -芫 -谯 -喷 -弟 -半 -冕 -裙 -掖 -墉 -绮 -寝 -苔 -势 -顷 -褥 -切 -衮 -君 -佳 -嫒 -蚩 -霞 -佚 -洙 -逊 -镖 -暹 -唛 -& -殒 -顶 -碗 -獗 -轭 -铺 -蛊 -废 -恹 -汨 -崩 -珍 -那 -杵 -曲 -纺 -夏 -薰 -傀 -闳 -淬 -姘 -舀 -拧 -卷 -楂 -恍 -讪 -厩 -寮 -篪 -赓 -乘 -灭 -盅 -鞣 -沟 -慎 -挂 -饺 -鼾 -杳 -树 -缨 -丛 -絮 -娌 -臻 -嗳 -篡 -侩 -述 -衰 -矛 -圈 -蚜 -匕 -筹 -匿 -濞 -晨 -叶 -骋 -郝 -挚 -蚴 -滞 -增 -侍 -描 -瓣 -吖 -嫦 -蟒 -匾 -圣 -赌 -毡 -癞 -恺 -百 -曳 -需 -篓 -肮 -庖 -帏 -卿 -驿 -遗 -蹬 -鬓 -骡 -歉 -芎 -胳 -屐 -禽 -烦 -晌 -寄 -媾 -狄 -翡 -苒 -船 -廉 -终 -痞 -殇 -々 -畦 -饶 -改 -拆 -悻 -萄 -£ -瓿 -乃 -訾 -桅 -匮 -溧 -拥 -纱 -铍 -骗 -蕃 -龋 -缬 -父 -佐 -疚 -栎 -醍 -掳 -蓄 -x -惆 -颜 -鲆 -榆 -〔 -猎 -敌 -暴 -谥 -鲫 -贾 -罗 -玻 -缄 -扦 -芪 -癣 -落 -徒 -臾 -恿 -猩 -托 -邴 -肄 -牵 -春 -陛 -耀 -刊 -拓 -蓓 -邳 -堕 -寇 -枉 -淌 -啡 -湄 -兽 -酷 -萼 -碚 -濠 -萤 -夹 -旬 -戮 -梭 -琥 -椭 -昔 -勺 -蜊 -绐 -晚 -孺 -僵 -宣 -摄 -冽 -旨 -萌 -忙 -蚤 -眉 -噼 -蟑 -付 -契 -瓜 -悼 -颡 -壁 -曾 -窕 -颢 -澎 -仿 -俑 -浑 -嵌 -浣 -乍 -碌 -褪 -乱 -蔟 -隙 -玩 -剐 -葫 -箫 -纲 -围 -伐 -决 -伙 -漩 -瑟 -刑 -肓 -镳 -缓 -蹭 -氨 -皓 -典 -畲 -坍 -铑 -檐 -塑 -洞 -倬 -储 -胴 -淳 -戾 -吐 -灼 -惺 -妙 -毕 -珐 -缈 -虱 -盖 -羰 -鸿 -磅 -谓 -髅 -娴 -苴 -唷 -蚣 -霹 -抨 -贤 -唠 -犬 -誓 -逍 -庠 -逼 -麓 -籼 -釉 -呜 -碧 -秧 -氩 -摔 -霄 -穸 -纨 -辟 -妈 -映 -完 -牛 -缴 -嗷 -炊 -恩 -荔 -茆 -掉 -紊 -慌 -莓 -羟 -阙 -萁 -磐 -另 -蕹 -辱 -鳐 -湮 -吡 -吩 -唐 -睦 -垠 -舒 -圜 -冗 -瞿 -溺 -芾 -囱 -匠 -僳 -汐 -菩 -饬 -漓 -黑 -霰 -浸 -濡 -窥 -毂 -蒡 -兢 -驻 -鹉 -芮 -诙 -迫 -雳 -厂 -忐 -臆 -猴 -鸣 -蚪 -栈 -箕 -羡 -渐 -莆 -捍 -眈 -哓 -趴 -蹼 -埕 -嚣 -骛 -宏 -淄 -斑 -噜 -严 -瑛 -垃 -椎 -诱 -压 -庾 -绞 -焘 -廿 -抡 -迄 -棘 -夫 -纬 -锹 -眨 -瞌 -侠 -脐 -竞 -瀑 -孳 -骧 -遁 -姜 -颦 -荪 -滚 -萦 -伪 -逸 -粳 -爬 -锁 -矣 -役 -趣 -洒 -颔 -诏 -逐 -奸 -甭 -惠 -攀 -蹄 -泛 -尼 -拼 -阮 -鹰 -亚 -颈 -惑 -勒 -〉 -际 -肛 -爷 -刚 -钨 -丰 -养 -冶 -鲽 -辉 -蔻 -画 -覆 -皴 -妊 -麦 -返 -醉 -皂 -擀 -〗 -酶 -凑 -粹 -悟 -诀 -硖 -港 -卜 -z -杀 -涕 -± -舍 -铠 -抵 -弛 -段 -敝 -镐 -奠 -拂 -轴 -跛 -袱 -e -t -沉 -菇 -俎 -薪 -峦 -秭 -蟹 -历 -盟 -菠 -寡 -液 -肢 -喻 -染 -裱 -悱 -抱 -氙 -赤 -捅 -猛 -跑 -氮 -谣 -仁 -尺 -辊 -窍 -烙 -衍 -架 -擦 -倏 -璐 -瑁 -币 -楞 -胖 -夔 -趸 -邛 -惴 -饕 -虔 -蝎 -§ -哉 -贝 -宽 -辫 -炮 -扩 -饲 -籽 -魏 -菟 -锰 -伍 -猝 -末 -琳 -哚 -蛎 -邂 -呀 -姿 -鄞 -却 -歧 -仙 -恸 -椐 -森 -牒 -寤 -袒 -婆 -虢 -雅 -钉 -朵 -贼 -欲 -苞 -寰 -故 -龚 -坭 -嘘 -咫 -礼 -硷 -兀 -睢 -汶 -’ -铲 -烧 -绕 -诃 -浃 -钿 -哺 -柜 -讼 -颊 -璁 -腔 -洽 -咐 -脲 -簌 -筠 -镣 -玮 -鞠 -谁 -兼 -姆 -挥 -梯 -蝴 -谘 -漕 -刷 -躏 -宦 -弼 -b -垌 -劈 -麟 -莉 -揭 -笙 -渎 -仕 -嗤 -仓 -配 -怏 -抬 -错 -泯 -镊 -孰 -猿 -邪 -仍 -秋 -鼬 -壹 -歇 -吵 -炼 -< -尧 -射 -柬 -廷 -胧 -霾 -凳 -隋 -肚 -浮 -梦 -祥 -株 -堵 -退 -L -鹫 -跎 -凶 -毽 -荟 -炫 -栩 -玳 -甜 -沂 -鹿 -顽 -伯 -爹 -赔 -蛴 -徐 -匡 -欣 -狰 -缸 -雹 -蟆 -疤 -默 -沤 -啜 -痂 -衣 -禅 -w -i -h -辽 -葳 -黝 -钗 -停 -沽 -棒 -馨 -颌 -肉 -吴 -硫 -悯 -劾 -娈 -马 -啧 -吊 -悌 -镑 -峭 -帆 -瀣 -涉 -咸 -疸 -滋 -泣 -翦 -拙 -癸 -钥 -蜒 -+ -尾 -庄 -凝 -泉 -婢 -渴 -谊 -乞 -陆 -锉 -糊 -鸦 -淮 -I -B -N -晦 -弗 -乔 -庥 -葡 -尻 -席 -橡 -傣 -渣 -拿 -惩 -麋 -斛 -缃 -矮 -蛏 -岘 -鸽 -姐 -膏 -催 -奔 -镒 -喱 -蠡 -摧 -钯 -胤 -柠 -拐 -璋 -鸥 -卢 -荡 -倾 -^ -_ -珀 -逄 -萧 -塾 -掇 -贮 -笆 -聂 -圃 -冲 -嵬 -M -滔 -笕 -值 -炙 -偶 -蜱 -搐 -梆 -汪 -蔬 -腑 -鸯 -蹇 -敞 -绯 -仨 -祯 -谆 -梧 -糗 -鑫 -啸 -豺 -囹 -猾 -巢 -柄 -瀛 -筑 -踌 -沭 -暗 -苁 -鱿 -蹉 -脂 -蘖 -牢 -热 -木 -吸 -溃 -宠 -序 -泞 -偿 -拜 -檩 -厚 -朐 -毗 -螳 -吞 -媚 -朽 -担 -蝗 -橘 -畴 -祈 -糟 -盱 -隼 -郜 -惜 -珠 -裨 -铵 -焙 -琚 -唯 -咚 -噪 -骊 -丫 -滢 -勤 -棉 -呸 -咣 -淀 -隔 -蕾 -窈 -饨 -挨 -煅 -短 -匙 -粕 -镜 -赣 -撕 -墩 -酬 -馁 -豌 -颐 -抗 -酣 -氓 -佑 -搁 -哭 -递 -耷 -涡 -桃 -贻 -碣 -截 -瘦 -昭 -镌 -蔓 -氚 -甲 -猕 -蕴 -蓬 -散 -拾 -纛 -狼 -猷 -铎 -埋 -旖 -矾 -讳 -囊 -糜 -迈 -粟 -蚂 -紧 -鲳 -瘢 -栽 -稼 -羊 -锄 -斟 -睁 -桥 -瓮 -蹙 -祉 -醺 -鼻 -昱 -剃 -跳 -篱 -跷 -蒜 -翎 -宅 -晖 -嗑 -壑 -峻 -癫 -屏 -狠 -陋 -袜 -途 -憎 -祀 -莹 -滟 -佶 -溥 -臣 -约 -盛 -峰 -磁 -慵 -婪 -拦 -莅 -朕 -鹦 -粲 -裤 -哎 -疡 -嫖 -琵 -窟 -堪 -谛 -嘉 -儡 -鳝 -斩 -郾 -驸 -酊 -妄 -胜 -贺 -徙 -傅 -噌 -钢 -栅 -庇 -恋 -匝 -巯 -邈 -尸 -锚 -粗 -佟 -蛟 -薹 -纵 -蚊 -郅 -绢 -锐 -苗 -俞 -篆 -淆 -膀 -鲜 -煎 -诶 -秽 -寻 -涮 -刺 -怀 -噶 -巨 -褰 -魅 -灶 -灌 -桉 -藕 -谜 -舸 -薄 -搀 -恽 -借 -牯 -痉 -渥 -愿 -亓 -耘 -杠 -柩 -锔 -蚶 -钣 -珈 -喘 -蹒 -幽 -赐 -稗 -晤 -莱 -泔 -扯 -肯 -菪 -裆 -腩 -豉 -疆 -骜 -腐 -倭 -珏 -唔 -粮 -亡 -润 -慰 -伽 -橄 -玄 -誉 -醐 -胆 -龊 -粼 -塬 -陇 -彼 -削 -嗣 -绾 -芽 -妗 -垭 -瘴 -爽 -薏 -寨 -龈 -泠 -弹 -赢 -漪 -猫 -嘧 -涂 -恤 -圭 -茧 -烽 -屑 -痕 -巾 -赖 -荸 -凰 -腮 -畈 -亵 -蹲 -偃 -苇 -澜 -艮 -换 -骺 -烘 -苕 -梓 -颉 -肇 -哗 -悄 -氤 -涠 -葬 -屠 -鹭 -植 -竺 -佯 -诣 -鲇 -瘀 -鲅 -邦 -移 -滁 -冯 -耕 -癔 -戌 -茬 -沁 -巩 -悠 -湘 -洪 -痹 -锟 -循 -谋 -腕 -鳃 -钠 -捞 -焉 -迎 -碱 -伫 -急 -榷 -奈 -邝 -卯 -辄 -皲 -卟 -醛 -畹 -忧 -稳 -雄 -昼 -缩 -阈 -睑 -扌 -耗 -曦 -涅 -捏 -瞧 -邕 -淖 -漉 -铝 -耦 -禹 -湛 -喽 -莼 -琅 -诸 -苎 -纂 -硅 -始 -嗨 -傥 -燃 -臂 -赅 -嘈 -呆 -贵 -屹 -壮 -肋 -亍 -蚀 -卅 -豹 -腆 -邬 -迭 -浊 -} -童 -螂 -捐 -圩 -勐 -触 -寞 -汊 -壤 -荫 -膺 -渌 -芳 -懿 -遴 -螈 -泰 -蓼 -蛤 -茜 -舅 -枫 -朔 -膝 -眙 -避 -梅 -判 -鹜 -璜 -牍 -缅 -垫 -藻 -黔 -侥 -惚 -懂 -踩 -腰 -腈 -札 -丞 -唾 -慈 -顿 -摹 -荻 -琬 -~ -斧 -沈 -滂 -胁 -胀 -幄 -莜 -Z -匀 -鄄 -掌 -绰 -茎 -焚 -赋 -萱 -谑 -汁 -铒 -瞎 -夺 -蜗 -野 -娆 -冀 -弯 -篁 -懵 -灞 -隽 -芡 -脘 -俐 -辩 -芯 -掺 -喏 -膈 -蝈 -觐 -悚 -踹 -蔗 -熠 -鼠 -呵 -抓 -橼 -峨 -畜 -缔 -禾 -崭 -弃 -熊 -摒 -凸 -拗 -穹 -蒙 -抒 -祛 -劝 -闫 -扳 -阵 -醌 -踪 -喵 -侣 -搬 -仅 -荧 -赎 -蝾 -琦 -买 -婧 -瞄 -寓 -皎 -冻 -赝 -箩 -莫 -瞰 -郊 -笫 -姝 -筒 -枪 -遣 -煸 -袋 -舆 -痱 -涛 -母 -〇 -启 -践 -耙 -绲 -盘 -遂 -昊 -搞 -槿 -诬 -纰 -泓 -惨 -檬 -亻 -越 -C -o -憩 -熵 -祷 -钒 -暧 -塔 -阗 -胰 -咄 -娶 -魔 -琶 -钞 -邻 -扬 -杉 -殴 -咽 -弓 -〆 -髻 -】 -吭 -揽 -霆 -拄 -殖 -脆 -彻 -岩 -芝 -勃 -辣 -剌 -钝 -嘎 -甄 -佘 -皖 -伦 -授 -徕 -憔 -挪 -皇 -庞 -稔 -芜 -踏 -溴 -兖 -卒 -擢 -饥 -鳞 -煲 -‰ -账 -颗 -叻 -斯 -捧 -鳍 -琮 -讹 -蛙 -纽 -谭 -酸 -兔 -莒 -睇 -伟 -觑 -羲 -嗜 -宜 -褐 -旎 -辛 -卦 -诘 -筋 -鎏 -溪 -挛 -熔 -阜 -晰 -鳅 -丢 -奚 -灸 -呱 -献 -陉 -黛 -鸪 -甾 -萨 -疮 -拯 -洲 -疹 -辑 -叙 -恻 -谒 -允 -柔 -烂 -氏 -逅 -漆 -拎 -惋 -扈 -湟 -纭 -啕 -掬 -擞 -哥 -忽 -涤 -鸵 -靡 -郗 -瓷 -扁 -廊 -怨 -雏 -钮 -敦 -E -懦 -憋 -汀 -拚 -啉 -腌 -岸 -f -痼 -瞅 -尊 -咀 -眩 -飙 -忌 -仝 -迦 -熬 -毫 -胯 -篑 -茄 -腺 -凄 -舛 -碴 -锵 -诧 -羯 -後 -漏 -汤 -宓 -仞 -蚁 -壶 -谰 -皑 -铄 -棰 -罔 -辅 -晶 -苦 -牟 -闽 -\ -烃 -饮 -聿 -丙 -蛳 -朱 -煤 -涔 -鳖 -犁 -罐 -荼 -砒 -淦 -妤 -黏 -戎 -孑 -婕 -瑾 -戢 -钵 -枣 -捋 -砥 -衩 -狙 -桠 -稣 -阎 -肃 -梏 -诫 -孪 -昶 -婊 -衫 -嗔 -侃 -塞 -蜃 -樵 -峒 -貌 -屿 -欺 -缫 -阐 -栖 -诟 -珞 -荭 -吝 -萍 -嗽 -恂 -啻 -蜴 -磬 -峋 -俸 -豫 -谎 -徊 -镍 -韬 -魇 -晴 -U -囟 -猜 -蛮 -坐 -囿 -伴 -亭 -肝 -佗 -蝠 -妃 -胞 -滩 -榴 -氖 -垩 -苋 -砣 -扪 -馏 -姓 -轩 -厉 -夥 -侈 -禀 -垒 -岑 -赏 -钛 -辐 -痔 -披 -纸 -碳 -“ -坞 -蠓 -挤 -荥 -沅 -悔 -铧 -帼 -蒌 -蝇 -a -p -y -n -g -哀 -浆 -瑶 -凿 -桶 -馈 -皮 -奴 -苜 -佤 -伶 -晗 -铱 -炬 -优 -弊 -氢 -恃 -甫 -攥 -端 -锌 -灰 -稹 -炝 -曙 -邋 -亥 -眶 -碾 -拉 -萝 -绔 -捷 -浍 -腋 -姑 -菖 -凌 -涞 -麽 -锢 -桨 -潢 -绎 -镰 -殆 -锑 -渝 -铬 -困 -绽 -觎 -匈 -糙 -暑 -裹 -鸟 -盔 -肽 -迷 -綦 -『 -亳 -佝 -俘 -钴 -觇 -骥 -仆 -疝 -跪 -婶 -郯 -瀹 -唉 -脖 -踞 -针 -晾 -忒 -扼 -瞩 -叛 -椒 -疟 -嗡 -邗 -肆 -跆 -玫 -忡 -捣 -咧 -唆 -艄 -蘑 -潦 -笛 -阚 -沸 -泻 -掊 -菽 -贫 -斥 -髂 -孢 -镂 -赂 -麝 -鸾 -屡 -衬 -苷 -恪 -叠 -希 -粤 -爻 -喝 -茫 -惬 -郸 -绻 -庸 -撅 -碟 -宄 -妹 -膛 -叮 -饵 -崛 -嗲 -椅 -冤 -搅 -咕 -敛 -尹 -垦 -闷 -蝉 -霎 -勰 -败 -蓑 -泸 -肤 -鹌 -幌 -焦 -浠 -鞍 -刁 -舰 -乙 -竿 -裔 -。 -茵 -函 -伊 -兄 -丨 -娜 -匍 -謇 -莪 -宥 -似 -蝽 -翳 -酪 -翠 -粑 -薇 -祢 -骏 -赠 -叫 -Q -噤 -噻 -竖 -芗 -莠 -潭 -俊 -羿 -耜 -O -郫 -趁 -嗪 -囚 -蹶 -芒 -洁 -笋 -鹑 -敲 -硝 -啶 -堡 -渲 -揩 -』 -携 -宿 -遒 -颍 -扭 -棱 -割 -萜 -蔸 -葵 -琴 -捂 -饰 -衙 -耿 -掠 -募 -岂 -窖 -涟 -蔺 -瘤 -柞 -瞪 -怜 -匹 -距 -楔 -炜 -哆 -秦 -缎 -幼 -茁 -绪 -痨 -恨 -楸 -娅 -瓦 -桩 -雪 -嬴 -伏 -榔 -妥 -铿 -拌 -眠 -雍 -缇 -‘ -卓 -搓 -哌 -觞 -噩 -屈 -哧 -髓 -咦 -巅 -娑 -侑 -淫 -膳 -祝 -勾 -姊 -莴 -胄 -疃 -薛 -蜷 -胛 -巷 -芙 -芋 -熙 -闰 -勿 -窃 -狱 -剩 -钏 -幢 -陟 -铛 -慧 -靴 -耍 -k -浙 -浇 -飨 -惟 -绗 -祜 -澈 -啼 -咪 -磷 -摞 -诅 -郦 -抹 -跃 -壬 -吕 -肖 -琏 -颤 -尴 -剡 -抠 -凋 -赚 -泊 -津 -宕 -殷 -倔 -氲 -漫 -邺 -涎 -怠 -$ -垮 -荬 -遵 -俏 -叹 -噢 -饽 -蜘 -孙 -筵 -疼 -鞭 -羧 -牦 -箭 -潴 -c -眸 -祭 -髯 -啖 -坳 -愁 -芩 -驮 -倡 -巽 -穰 -沃 -胚 -怒 -凤 -槛 -剂 -趵 -嫁 -v -邢 -灯 -鄢 -桐 -睽 -檗 -锯 -槟 -婷 -嵋 -圻 -诗 -蕈 -颠 -遭 -痢 -芸 -怯 -馥 -竭 -锗 -徜 -恭 -遍 -籁 -剑 -嘱 -苡 -龄 -僧 -桑 -潸 -弘 -澶 -楹 -悲 -讫 -愤 -腥 -悸 -谍 -椹 -呢 -桓 -葭 -攫 -阀 -翰 -躲 -敖 -柑 -郎 -笨 -橇 -呃 -魁 -燎 -脓 -葩 -磋 -垛 -玺 -狮 -沓 -砜 -蕊 -锺 -罹 -蕉 -翱 -虐 -闾 -巫 -旦 -茱 -嬷 -枯 -鹏 -贡 -芹 -汛 -矫 -绁 -拣 -禺 -佃 -讣 -舫 -惯 -乳 -趋 -疲 -挽 -岚 -虾 -衾 -蠹 -蹂 -飓 -氦 -铖 -孩 -稞 -瑜 -壅 -掀 -勘 -妓 -畅 -髋 -W -庐 -牲 -蓿 -榕 -练 -垣 -唱 -邸 -菲 -昆 -婺 -穿 -绡 -麒 -蚱 -掂 -愚 -泷 -涪 -漳 -妩 -娉 -榄 -讷 -觅 -旧 -藤 -煮 -呛 -柳 -腓 -叭 -庵 -烷 -阡 -罂 -蜕 -擂 -猖 -咿 -媲 -脉 -【 -沏 -貅 -黠 -熏 -哲 -烁 -坦 -酵 -兜 -× -潇 -撒 -剽 -珩 -圹 -乾 -摸 -樟 -帽 -嗒 -襄 -魂 -轿 -憬 -锡 -〕 -喃 -皆 -咖 -隅 -脸 -残 -泮 -袂 -鹂 -珊 -囤 -捆 -咤 -误 -徨 -闹 -淙 -芊 -淋 -怆 -囗 -拨 -梳 -渤 -R -G -绨 -蚓 -婀 -幡 -狩 -麾 -谢 -唢 -裸 -旌 -伉 -纶 -裂 -驳 -砼 -咛 -澄 -樨 -蹈 -宙 -澍 -倍 -貔 -操 -勇 -蟠 -摈 -砧 -虬 -够 -缁 -悦 -藿 -撸 -艹 -摁 -淹 -豇 -虎 -榭 -ˉ -吱 -d -° -喧 -荀 -踱 -侮 -奋 -偕 -饷 -犍 -惮 -坑 -璎 -徘 -宛 -妆 -袈 -倩 -窦 -昂 -荏 -乖 -K -怅 -撰 -鳙 -牙 -袁 -酞 -X -痿 -琼 -闸 -雁 -趾 -荚 -虻 -涝 -《 -杏 -韭 -偈 -烤 -绫 -鞘 -卉 -症 -遢 -蓥 -诋 -杭 -荨 -匆 -竣 -簪 -辙 -敕 -虞 -丹 -缭 -咩 -黟 -m -淤 -瑕 -咂 -铉 -硼 -茨 -嶂 -痒 -畸 -敬 -涿 -粪 -窘 -熟 -叔 -嫔 -盾 -忱 -裘 -憾 -梵 -赡 -珙 -咯 -娘 -庙 -溯 -胺 -葱 -痪 -摊 -荷 -卞 -乒 -髦 -寐 -铭 -坩 -胗 -枷 -爆 -溟 -嚼 -羚 -砬 -轨 -惊 -挠 -罄 -竽 -菏 -氧 -浅 -楣 -盼 -枢 -炸 -阆 -杯 -谏 -噬 -淇 -渺 -俪 -秆 -墓 -泪 -跻 -砌 -痰 -垡 -渡 -耽 -釜 -讶 -鳎 -煞 -呗 -韶 -舶 -绷 -鹳 -缜 -旷 -铊 -皱 -龌 -檀 -霖 -奄 -槐 -艳 -蝶 -旋 -哝 -赶 -骞 -蚧 -腊 -盈 -丁 -` -蜚 -矸 -蝙 -睨 -嚓 -僻 -鬼 -醴 -夜 -彝 -磊 -笔 -拔 -栀 -糕 -厦 -邰 -纫 -逭 -纤 -眦 -膊 -馍 -躇 -烯 -蘼 -冬 -诤 -暄 -骶 -哑 -瘠 -」 -臊 -丕 -愈 -咱 -螺 -擅 -跋 -搏 -硪 -谄 -笠 -淡 -嘿 -骅 -谧 -鼎 -皋 -姚 -歼 -蠢 -驼 -耳 -胬 -挝 -涯 -狗 -蒽 -孓 -犷 -凉 -芦 -箴 -铤 -孤 -嘛 -坤 -V -茴 -朦 -挞 -尖 -橙 -诞 -搴 -碇 -洵 -浚 -帚 -蜍 -漯 -柘 -嚎 -讽 -芭 -荤 -咻 -祠 -秉 -跖 -埃 -吓 -糯 -眷 -馒 -惹 -娼 -鲑 -嫩 -讴 -轮 -瞥 -靶 -褚 -乏 -缤 -宋 -帧 -删 -驱 -碎 -扑 -俩 -俄 -偏 -涣 -竹 -噱 -皙 -佰 -渚 -唧 -斡 -# -镉 -刀 -崎 -筐 -佣 -夭 -贰 -肴 -峙 -哔 -艿 -匐 -牺 -镛 -缘 -仡 -嫡 -劣 -枸 -堀 -梨 -簿 -鸭 -蒸 -亦 -稽 -浴 -{ -衢 -束 -槲 -j -阁 -揍 -疥 -棋 -潋 -聪 -窜 -乓 -睛 -插 -冉 -阪 -苍 -搽 -「 -蟾 -螟 -幸 -仇 -樽 -撂 -慢 -跤 -幔 -俚 -淅 -覃 -觊 -溶 -妖 -帛 -侨 -曰 -妾 -泗 -· -: -瀘 -風 -Ë -( -) -∶ -紅 -紗 -瑭 -雲 -頭 -鶏 -財 -許 -• -¥ -樂 -焗 -麗 -— -; -滙 -東 -榮 -繪 -興 -… -門 -業 -π -楊 -國 -顧 -é -盤 -寳 -Λ -龍 -鳳 -島 -誌 -緣 -結 -銭 -萬 -勝 -祎 -璟 -優 -歡 -臨 -時 -購 -= -★ -藍 -昇 -鐵 -觀 -勅 -農 -聲 -畫 -兿 -術 -發 -劉 -記 -專 -耑 -園 -書 -壴 -種 -Ο -● -褀 -號 -銀 -匯 -敟 -锘 -葉 -橪 -廣 -進 -蒄 -鑽 -阝 -祙 -貢 -鍋 -豊 -夬 -喆 -團 -閣 -開 -燁 -賓 -館 -酡 -沔 -順 -+ -硚 -劵 -饸 -陽 -車 -湓 -復 -萊 -氣 -軒 -華 -堃 -迮 -纟 -戶 -馬 -學 -裡 -電 -嶽 -獨 -マ -シ -サ -ジ -燘 -袪 -環 -❤ -臺 -灣 -専 -賣 -孖 -聖 -攝 -線 -▪ -α -傢 -俬 -夢 -達 -莊 -喬 -貝 -薩 -劍 -羅 -壓 -棛 -饦 -尃 -璈 -囍 -醫 -G -I -A -# -N -鷄 -髙 -嬰 -啓 -約 -隹 -潔 -賴 -藝 -~ -寶 -籣 -麺 -  -嶺 -√ -義 -網 -峩 -長 -∧ -魚 -機 -構 -② -鳯 -偉 -L -B -㙟 -畵 -鴿 -' -詩 -溝 -嚞 -屌 -藔 -佧 -玥 -蘭 -織 -1 -3 -9 -0 -7 -點 -砭 -鴨 -鋪 -銘 -廳 -弍 -‧ -創 -湯 -坶 -℃ -卩 -骝 -& -烜 -荘 -當 -潤 -扞 -係 -懷 -碶 -钅 -蚨 -讠 -☆ -叢 -爲 -埗 -涫 -塗 -→ -楽 -現 -鯨 -愛 -瑪 -鈺 -忄 -悶 -藥 -飾 -樓 -視 -孬 -ㆍ -燚 -苪 -師 -① -丼 -锽 -│ -韓 -標 -è -兒 -閏 -匋 -張 -漢 -Ü -髪 -會 -閑 -檔 -習 -裝 -の -峯 -菘 -輝 -И -雞 -釣 -億 -浐 -K -O -R -8 -H -E -P -T -W -D -S -C -M -F -姌 -饹 -» -晞 -廰 -ä -嵯 -鷹 -負 -飲 -絲 -冚 -楗 -澤 -綫 -區 -❋ -← -質 -靑 -揚 -③ -滬 -統 -産 -協 -﹑ -乸 -畐 -經 -運 -際 -洺 -岽 -為 -粵 -諾 -崋 -豐 -碁 -ɔ -V -2 -6 -齋 -誠 -訂 -´ -勑 -雙 -陳 -無 -í -泩 -媄 -夌 -刂 -i -c -t -o -r -a -嘢 -耄 -燴 -暃 -壽 -媽 -靈 -抻 -體 -唻 -É -冮 -甹 -鎮 -錦 -ʌ -蜛 -蠄 -尓 -駕 -戀 -飬 -逹 -倫 -貴 -極 -Я -Й -寬 -磚 -嶪 -郎 -職 -| -間 -n -d -剎 -伈 -課 -飛 -橋 -瘊 -№ -譜 -骓 -圗 -滘 -縣 -粿 -咅 -養 -濤 -彳 -® -% -Ⅱ -啰 -㴪 -見 -矞 -薬 -糁 -邨 -鲮 -顔 -罱 -З -選 -話 -贏 -氪 -俵 -競 -瑩 -繡 -枱 -β -綉 -á -獅 -爾 -™ -麵 -戋 -淩 -徳 -個 -劇 -場 -務 -簡 -寵 -h -實 -膠 -轱 -圖 -築 -嘣 -樹 -㸃 -營 -耵 -孫 -饃 -鄺 -飯 -麯 -遠 -輸 -坫 -孃 -乚 -閃 -鏢 -㎡ -題 -廠 -關 -↑ -爺 -將 -軍 -連 -篦 -覌 -參 -箸 -- -窠 -棽 -寕 -夀 -爰 -歐 -呙 -閥 -頡 -熱 -雎 -垟 -裟 -凬 -勁 -帑 -馕 -夆 -疌 -枼 -馮 -貨 -蒤 -樸 -彧 -旸 -靜 -龢 -暢 -㐱 -鳥 -珺 -鏡 -灡 -爭 -堷 -廚 -Ó -騰 -診 -┅ -蘇 -褔 -凱 -頂 -豕 -亞 -帥 -嘬 -⊥ -仺 -桖 -複 -饣 -絡 -穂 -顏 -棟 -納 -▏ -濟 -親 -設 -計 -攵 -埌 -烺 -ò -頤 -燦 -蓮 -撻 -節 -講 -濱 -濃 -娽 -洳 -朿 -燈 -鈴 -護 -膚 -铔 -過 -補 -Z -U -5 -4 -坋 -闿 -䖝 -餘 -缐 -铞 -貿 -铪 -桼 -趙 -鍊 -[ -㐂 -垚 -菓 -揸 -捲 -鐘 -滏 -𣇉 -爍 -輪 -燜 -鴻 -鮮 -動 -鹞 -鷗 -丄 -慶 -鉌 -翥 -飮 -腸 -⇋ -漁 -覺 -來 -熘 -昴 -翏 -鲱 -圧 -鄉 -萭 -頔 -爐 -嫚 -г -貭 -類 -聯 -幛 -輕 -訓 -鑒 -夋 -锨 -芃 -珣 -䝉 -扙 -嵐 -銷 -處 -ㄱ -語 -誘 -苝 -歸 -儀 -燒 -楿 -內 -粢 -葒 -奧 -麥 -礻 -滿 -蠔 -穵 -瞭 -態 -鱬 -榞 -硂 -鄭 -黃 -煙 -祐 -奓 -逺 -* -瑄 -獲 -聞 -薦 -讀 -這 -樣 -決 -問 -啟 -們 -執 -説 -轉 -單 -隨 -唘 -帶 -倉 -庫 -還 -贈 -尙 -皺 -■ -餅 -產 -○ -∈ -報 -狀 -楓 -賠 -琯 -嗮 -禮 -` -傳 -> -≤ -嗞 -Φ -≥ -換 -咭 -∣ -↓ -曬 -ε -応 -寫 -″ -終 -様 -純 -費 -療 -聨 -凍 -壐 -郵 -ü -黒 -∫ -製 -塊 -調 -軽 -確 -撃 -級 -馴 -Ⅲ -涇 -繹 -數 -碼 -證 -狒 -処 -劑 -< -晧 -賀 -衆 -] -櫥 -兩 -陰 -絶 -對 -鯉 -憶 -◎ -p -e -Y -蕒 -煖 -頓 -測 -試 -鼽 -僑 -碩 -妝 -帯 -≈ -鐡 -舖 -權 -喫 -倆 -ˋ -該 -悅 -ā -俫 -. -f -s -b -m -k -g -u -j -貼 -淨 -濕 -針 -適 -備 -l -/ -給 -謢 -強 -觸 -衛 -與 -⊙ -$ -緯 -變 -⑴ -⑵ -⑶ -㎏ -殺 -∩ -幚 -─ -價 -▲ -離 -ú -ó -飄 -烏 -関 -閟 -﹝ -﹞ -邏 -輯 -鍵 -驗 -訣 -導 -歷 -屆 -層 -▼ -儱 -錄 -熳 -ē -艦 -吋 -錶 -辧 -飼 -顯 -④ -禦 -販 -気 -対 -枰 -閩 -紀 -幹 -瞓 -貊 -淚 -△ -眞 -墊 -Ω -獻 -褲 -縫 -緑 -亜 -鉅 -餠 -{ -} -◆ -蘆 -薈 -█ -◇ -溫 -彈 -晳 -粧 -犸 -穩 -訊 -崬 -凖 -熥 -П -舊 -條 -紋 -圍 -Ⅳ -筆 -尷 -難 -雜 -錯 -綁 -識 -頰 -鎖 -艶 -□ -殁 -殼 -⑧ -├ -▕ -鵬 -ǐ -ō -ǒ -糝 -綱 -▎ -μ -盜 -饅 -醬 -籤 -蓋 -釀 -鹽 -據 -à -ɡ -辦 -◥ -彐 -┌ -婦 -獸 -鲩 -伱 -ī -蒟 -蒻 -齊 -袆 -腦 -寧 -凈 -妳 -煥 -詢 -偽 -謹 -啫 -鯽 -騷 -鱸 -損 -傷 -鎻 -髮 -買 -冏 -儥 -両 -﹢ -∞ -載 -喰 -z -羙 -悵 -燙 -曉 -員 -組 -徹 -艷 -痠 -鋼 -鼙 -縮 -細 -嚒 -爯 -≠ -維 -" -鱻 -壇 -厍 -帰 -浥 -犇 -薡 -軎 -² -應 -醜 -刪 -緻 -鶴 -賜 -噁 -軌 -尨 -镔 -鷺 -槗 -彌 -葚 -濛 -請 -溇 -緹 -賢 -訪 -獴 -瑅 -資 -縤 -陣 -蕟 -栢 -韻 -祼 -恁 -伢 -謝 -劃 -涑 -總 -衖 -踺 -砋 -凉 -籃 -駿 -苼 -瘋 -昽 -紡 -驊 -腎 -﹗ -響 -杋 -剛 -嚴 -禪 -歓 -槍 -傘 -檸 -檫 -炣 -勢 -鏜 -鎢 -銑 -尐 -減 -奪 -惡 -θ -僮 -婭 -臘 -ū -ì -殻 -鉄 -∑ -蛲 -焼 -緖 -續 -紹 -懮 \ No newline at end of file diff --git a/backend/ppocr/utils/dict/chinese_cht_dict.txt b/backend/ppocr/utils/dict/chinese_cht_dict.txt deleted file mode 100644 index cc1aa472..00000000 --- a/backend/ppocr/utils/dict/chinese_cht_dict.txt +++ /dev/null @@ -1,8421 +0,0 @@ -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -[ -\ -] -^ -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -{ -| -} -~ -¥ -® -° -± -² -´ -· -» -É -Ë -Ó -× -Ü -à -á -ä -è -é -ì -í -ò -ó -÷ -ú -ü -ā -ē -ī -ō -ū -ǐ -ǒ -ɔ -ɡ -ʌ -ˋ -Λ -Ο -Φ -Ω -α -β -ε -θ -μ -π -З -И -Й -П -Я -г -— -‖ -‘ -’ -“ -” -• -… -‧ -′ -″ -※ -℃ -№ -™ -Ⅱ -Ⅲ -Ⅳ -← -↑ -→ -↓ -⇋ -∈ -∑ -√ -∞ -∣ -∧ -∩ -∫ -∶ -≈ -≠ -≤ -≥ -⊙ -⊥ -① -② -③ -④ -⑧ -⑴ -⑵ -⑶ -─ -│ -┅ -┌ -├ -█ -▎ -▏ -▕ -■ -□ -▪ -▲ -△ -▼ -◆ -◇ -○ -◎ -● -◥ -★ -☆ -❋ -❤ -  -、 -。 -〇 -〉 -《 -》 -「 -」 -『 -』 -【 -】 -〔 -〕 -〖 -〗 -の -サ -シ -ジ -マ -ㄱ -ㆍ -㎏ -㎡ -㐂 -㐱 -㙟 -㴪 -㸃 -䖝 -䝉 -䰾 -䲁 -一 -丁 -七 -丄 -丈 -三 -上 -下 -丌 -不 -与 -丏 -丐 -丑 -且 -丕 -世 -丘 -丙 -丞 -丟 -両 -並 -丨 -丫 -中 -丰 -串 -丶 -丸 -丹 -主 -丼 -丿 -乂 -乃 -久 -么 -之 -乍 -乎 -乏 -乒 -乓 -乖 -乗 -乘 -乙 -乚 -乜 -九 -乞 -也 -乩 -乭 -乳 -乸 -乹 -乾 -亀 -亂 -亅 -了 -予 -亊 -事 -二 -亍 -云 -互 -亓 -五 -井 -亘 -些 -亜 -亞 -亟 -亠 -亡 -亢 -交 -亥 -亦 -亨 -享 -京 -亭 -亮 -亰 -亳 -亶 -亹 -人 -亻 -什 -仁 -仂 -仃 -仄 -仇 -仉 -今 -介 -仍 -仏 -仔 -仕 -他 -仗 -付 -仙 -仛 -仝 -仞 -仟 -仡 -代 -令 -以 -仨 -仫 -仮 -仰 -仲 -仳 -仵 -件 -仺 -任 -仼 -份 -仿 -企 -伃 -伈 -伉 -伊 -伋 -伍 -伎 -伏 -伐 -休 -伕 -伙 -伝 -伢 -伯 -估 -伱 -伴 -伶 -伷 -伸 -伺 -似 -伽 -伾 -佀 -佁 -佃 -但 -佇 -佈 -佉 -佋 -位 -低 -住 -佐 -佑 -体 -佔 -何 -佗 -佘 -余 -佚 -佛 -作 -佝 -佞 -佟 -你 -佣 -佤 -佧 -佩 -佬 -佯 -佰 -佳 -併 -佶 -佹 -佺 -佼 -佾 -使 -侁 -侃 -侄 -侅 -來 -侈 -侊 -例 -侍 -侏 -侑 -侖 -侗 -侘 -侚 -供 -依 -侞 -価 -侮 -侯 -侵 -侶 -侷 -侹 -便 -俁 -係 -促 -俄 -俅 -俊 -俋 -俌 -俍 -俎 -俏 -俐 -俑 -俗 -俘 -俚 -俛 -保 -俞 -俟 -俠 -信 -俬 -修 -俯 -俱 -俳 -俴 -俵 -俶 -俸 -俺 -俽 -俾 -倆 -倈 -倉 -個 -倌 -倍 -們 -倒 -倓 -倔 -倖 -倗 -倘 -候 -倚 -倜 -倞 -借 -倡 -倢 -倣 -値 -倦 -倧 -倩 -倪 -倫 -倬 -倭 -倮 -倻 -值 -偁 -偃 -假 -偈 -偉 -偊 -偌 -偍 -偎 -偏 -偓 -偕 -做 -停 -健 -偪 -偲 -側 -偵 -偶 -偷 -偸 -偽 -傀 -傃 -傅 -傈 -傉 -傍 -傑 -傒 -傕 -傖 -傘 -備 -傜 -傢 -傣 -催 -傭 -傲 -傳 -債 -傷 -傻 -傾 -僅 -僉 -僊 -働 -像 -僑 -僔 -僕 -僖 -僙 -僚 -僜 -僡 -僧 -僩 -僭 -僮 -僰 -僱 -僳 -僴 -僵 -價 -僻 -儀 -儁 -儂 -億 -儆 -儇 -儈 -儉 -儋 -儐 -儒 -儔 -儕 -儘 -儚 -儞 -償 -儡 -儥 -儦 -優 -儫 -儱 -儲 -儷 -儺 -儻 -儼 -兀 -允 -元 -兄 -充 -兆 -先 -光 -克 -兌 -免 -児 -兒 -兔 -兕 -兗 -兜 -入 -內 -全 -兩 -兪 -八 -公 -六 -兮 -共 -兵 -其 -具 -典 -兼 -兿 -冀 -冂 -円 -冇 -冉 -冊 -再 -冏 -冑 -冒 -冕 -冖 -冗 -冚 -冠 -冢 -冤 -冥 -冧 -冨 -冪 -冫 -冬 -冮 -冰 -冴 -冶 -冷 -冼 -冽 -凃 -凄 -准 -凈 -凋 -凌 -凍 -凖 -凜 -凝 -凞 -几 -凡 -処 -凪 -凬 -凰 -凱 -凳 -凵 -凶 -凸 -凹 -出 -函 -刀 -刁 -刂 -刃 -刄 -分 -切 -刈 -刊 -刎 -刑 -划 -列 -初 -判 -別 -刦 -刧 -刨 -利 -刪 -刮 -到 -制 -刷 -券 -刺 -刻 -刼 -剁 -剃 -則 -削 -剋 -剌 -前 -剎 -剏 -剔 -剖 -剛 -剝 -剡 -剣 -剩 -剪 -剮 -副 -割 -創 -剿 -劃 -劄 -劇 -劈 -劉 -劊 -劌 -劍 -劑 -劔 -力 -功 -加 -劣 -助 -努 -劫 -劬 -劭 -劵 -効 -劼 -劾 -勁 -勃 -勅 -勇 -勉 -勐 -勑 -勒 -勔 -動 -勖 -勗 -勘 -務 -勛 -勝 -勞 -募 -勢 -勣 -勤 -勦 -勰 -勱 -勲 -勳 -勵 -勷 -勸 -勺 -勻 -勾 -勿 -匂 -匄 -包 -匆 -匈 -匋 -匍 -匏 -匐 -匕 -化 -北 -匙 -匚 -匝 -匠 -匡 -匣 -匪 -匯 -匱 -匸 -匹 -匾 -匿 -區 -十 -千 -卅 -升 -午 -卉 -半 -卋 -卍 -卐 -卑 -卒 -卓 -協 -南 -博 -卜 -卞 -卟 -占 -卡 -卣 -卦 -卧 -卩 -卬 -卮 -卯 -印 -危 -卲 -即 -卵 -卷 -卸 -卹 -卺 -卻 -卽 -卿 -厄 -厓 -厔 -厙 -厚 -厝 -原 -厥 -厭 -厰 -厲 -厴 -厶 -去 -參 -叄 -又 -叉 -及 -友 -反 -収 -叔 -叕 -取 -受 -叛 -叟 -叡 -叢 -口 -古 -句 -另 -叨 -叩 -只 -叫 -召 -叭 -叮 -可 -台 -叱 -史 -右 -叵 -司 -叻 -叼 -吁 -吃 -各 -吆 -合 -吉 -吊 -吋 -同 -名 -后 -吏 -吐 -向 -吒 -吔 -吖 -君 -吝 -吞 -吟 -吠 -吡 -吥 -否 -吧 -吩 -含 -吮 -吱 -吲 -吳 -吵 -吶 -吸 -吹 -吻 -吼 -吾 -呀 -呂 -呃 -呈 -呉 -告 -呋 -呎 -呢 -呤 -呦 -周 -呱 -味 -呵 -呷 -呸 -呼 -命 -呾 -咀 -咁 -咂 -咄 -咅 -咆 -咋 -和 -咎 -咑 -咒 -咔 -咕 -咖 -咗 -咘 -咚 -咟 -咤 -咥 -咧 -咨 -咩 -咪 -咫 -咬 -咭 -咯 -咱 -咲 -咳 -咸 -咻 -咼 -咽 -咾 -咿 -哀 -品 -哂 -哄 -哆 -哇 -哈 -哉 -哌 -哎 -哏 -哐 -哖 -哚 -哞 -員 -哥 -哦 -哨 -哩 -哪 -哭 -哮 -哱 -哲 -哺 -哼 -唃 -唄 -唆 -唇 -唉 -唏 -唐 -唑 -唔 -唘 -唧 -唫 -唬 -唭 -售 -唯 -唱 -唳 -唵 -唷 -唸 -唻 -唾 -啁 -啃 -啄 -商 -啉 -啊 -啍 -問 -啓 -啖 -啚 -啜 -啞 -啟 -啡 -啣 -啤 -啥 -啦 -啪 -啫 -啯 -啰 -啱 -啲 -啵 -啶 -啷 -啻 -啼 -啾 -喀 -喂 -喃 -善 -喆 -喇 -喈 -喉 -喊 -喋 -喏 -喔 -喘 -喙 -喚 -喜 -喝 -喢 -喦 -喧 -喪 -喫 -喬 -單 -喰 -喱 -喲 -喳 -喵 -喹 -喻 -喼 -嗄 -嗅 -嗆 -嗇 -嗊 -嗎 -嗑 -嗒 -嗓 -嗔 -嗖 -嗚 -嗜 -嗝 -嗞 -嗡 -嗢 -嗣 -嗦 -嗨 -嗩 -嗪 -嗮 -嗯 -嗲 -嗶 -嗹 -嗽 -嘀 -嘅 -嘆 -嘉 -嘌 -嘍 -嘎 -嘏 -嘔 -嘗 -嘚 -嘛 -嘜 -嘞 -嘟 -嘢 -嘣 -嘥 -嘧 -嘩 -嘬 -嘮 -嘯 -嘰 -嘲 -嘴 -嘶 -嘸 -嘹 -嘻 -嘿 -噁 -噌 -噍 -噏 -噓 -噗 -噝 -噠 -噢 -噤 -噥 -噦 -器 -噩 -噪 -噬 -噯 -噰 -噲 -噴 -噶 -噸 -噹 -噻 -嚇 -嚈 -嚎 -嚏 -嚐 -嚒 -嚓 -嚕 -嚗 -嚙 -嚞 -嚟 -嚤 -嚦 -嚧 -嚨 -嚩 -嚮 -嚳 -嚴 -嚶 -嚷 -嚼 -嚿 -囀 -囂 -囃 -囉 -囊 -囍 -囑 -囒 -囓 -囗 -囚 -四 -囝 -回 -因 -囡 -団 -囤 -囧 -囪 -囮 -囯 -困 -囲 -図 -囶 -囷 -囹 -固 -囿 -圂 -圃 -圄 -圈 -圉 -國 -圍 -圏 -園 -圓 -圖 -圗 -團 -圜 -土 -圧 -在 -圩 -圪 -圭 -圯 -地 -圳 -圻 -圾 -址 -均 -坊 -坋 -坌 -坍 -坎 -坐 -坑 -坖 -坡 -坣 -坤 -坦 -坨 -坩 -坪 -坫 -坬 -坭 -坮 -坯 -坳 -坵 -坶 -坷 -坻 -垂 -垃 -垈 -型 -垍 -垓 -垕 -垚 -垛 -垞 -垟 -垠 -垢 -垣 -垮 -垯 -垰 -垵 -垸 -垻 -垿 -埃 -埅 -埇 -埈 -埋 -埌 -城 -埏 -埒 -埔 -埕 -埗 -埜 -域 -埠 -埡 -埤 -埧 -埨 -埪 -埭 -埮 -埴 -埵 -執 -培 -基 -埻 -埼 -堀 -堂 -堃 -堅 -堆 -堇 -堈 -堉 -堊 -堍 -堖 -堝 -堡 -堤 -堦 -堪 -堮 -堯 -堰 -報 -場 -堵 -堷 -堺 -塀 -塅 -塆 -塊 -塋 -塌 -塍 -塏 -塑 -塔 -塗 -塘 -塙 -塜 -塞 -塡 -塢 -塤 -塨 -塩 -填 -塬 -塭 -塰 -塱 -塲 -塵 -塹 -塽 -塾 -墀 -境 -墅 -墉 -墊 -墎 -墓 -増 -墘 -墜 -增 -墟 -墡 -墣 -墨 -墩 -墫 -墬 -墮 -墱 -墳 -墺 -墼 -墾 -壁 -壄 -壆 -壇 -壋 -壌 -壎 -壐 -壑 -壓 -壔 -壕 -壘 -壙 -壞 -壟 -壠 -壢 -壤 -壩 -士 -壬 -壯 -壱 -壴 -壹 -壺 -壽 -夀 -夆 -変 -夊 -夋 -夌 -夏 -夔 -夕 -外 -夙 -多 -夜 -夠 -夢 -夤 -夥 -大 -天 -太 -夫 -夬 -夭 -央 -夯 -失 -夷 -夾 -奀 -奄 -奇 -奈 -奉 -奎 -奏 -奐 -契 -奓 -奔 -奕 -套 -奘 -奚 -奠 -奢 -奣 -奧 -奩 -奪 -奫 -奭 -奮 -女 -奴 -奶 -她 -好 -妀 -妁 -如 -妃 -妄 -妊 -妍 -妏 -妑 -妒 -妓 -妖 -妙 -妝 -妞 -妠 -妤 -妥 -妧 -妨 -妭 -妮 -妯 -妲 -妳 -妸 -妹 -妺 -妻 -妾 -姀 -姁 -姃 -姆 -姈 -姉 -姊 -始 -姌 -姍 -姐 -姑 -姒 -姓 -委 -姚 -姜 -姝 -姣 -姥 -姦 -姨 -姪 -姫 -姬 -姮 -姵 -姶 -姸 -姻 -姿 -威 -娃 -娉 -娋 -娌 -娍 -娎 -娑 -娖 -娘 -娛 -娜 -娟 -娠 -娣 -娥 -娩 -娫 -娳 -娶 -娸 -娼 -娽 -婀 -婁 -婆 -婉 -婊 -婑 -婕 -婚 -婢 -婦 -婧 -婪 -婭 -婯 -婷 -婺 -婻 -婼 -婿 -媃 -媄 -媊 -媐 -媒 -媓 -媖 -媗 -媚 -媛 -媜 -媞 -媧 -媭 -媯 -媲 -媳 -媺 -媼 -媽 -媾 -媿 -嫁 -嫂 -嫄 -嫈 -嫉 -嫌 -嫖 -嫘 -嫚 -嫡 -嫣 -嫦 -嫩 -嫪 -嫲 -嫳 -嫵 -嫺 -嫻 -嬅 -嬈 -嬉 -嬋 -嬌 -嬗 -嬛 -嬝 -嬡 -嬤 -嬨 -嬪 -嬬 -嬭 -嬰 -嬴 -嬸 -嬾 -嬿 -孀 -孃 -孆 -孋 -孌 -子 -孑 -孔 -孕 -孖 -字 -存 -孚 -孛 -孜 -孝 -孟 -孢 -季 -孤 -孩 -孫 -孬 -孮 -孰 -孳 -孵 -學 -孺 -孻 -孽 -孿 -宀 -它 -宅 -宇 -守 -安 -宋 -完 -宍 -宏 -宓 -宕 -宗 -官 -宙 -定 -宛 -宜 -実 -客 -宣 -室 -宥 -宦 -宧 -宮 -宰 -害 -宴 -宵 -家 -宸 -容 -宿 -寀 -寁 -寂 -寄 -寅 -密 -寇 -寈 -寊 -富 -寐 -寒 -寓 -寔 -寕 -寖 -寗 -寘 -寛 -寜 -寞 -察 -寡 -寢 -寤 -寥 -實 -寧 -寨 -審 -寫 -寬 -寮 -寯 -寰 -寳 -寵 -寶 -寸 -寺 -対 -封 -専 -尃 -射 -將 -專 -尉 -尊 -尋 -對 -導 -小 -尐 -少 -尓 -尕 -尖 -尗 -尙 -尚 -尢 -尤 -尨 -尪 -尬 -就 -尷 -尹 -尺 -尻 -尼 -尾 -尿 -局 -屁 -屄 -居 -屆 -屇 -屈 -屋 -屌 -屍 -屎 -屏 -屐 -屑 -屓 -展 -屚 -屜 -屠 -屢 -層 -履 -屬 -屭 -屯 -山 -屹 -屺 -屻 -岀 -岈 -岌 -岐 -岑 -岔 -岡 -岢 -岣 -岧 -岩 -岪 -岫 -岬 -岰 -岱 -岳 -岵 -岷 -岸 -岻 -峁 -峅 -峇 -峋 -峍 -峒 -峘 -峙 -峚 -峠 -峨 -峩 -峪 -峭 -峯 -峰 -峴 -島 -峻 -峼 -峽 -崁 -崆 -崇 -崈 -崋 -崍 -崎 -崐 -崑 -崒 -崔 -崖 -崗 -崘 -崙 -崚 -崛 -崞 -崟 -崠 -崢 -崤 -崧 -崩 -崬 -崮 -崱 -崴 -崵 -崶 -崽 -嵇 -嵊 -嵋 -嵌 -嵎 -嵐 -嵒 -嵕 -嵖 -嵗 -嵙 -嵛 -嵜 -嵨 -嵩 -嵬 -嵮 -嵯 -嵰 -嵴 -嵻 -嵿 -嶁 -嶂 -嶃 -嶄 -嶇 -嶋 -嶌 -嶍 -嶒 -嶔 -嶗 -嶝 -嶠 -嶢 -嶦 -嶧 -嶪 -嶬 -嶰 -嶲 -嶴 -嶷 -嶸 -嶺 -嶼 -嶽 -巂 -巄 -巆 -巋 -巌 -巍 -巎 -巑 -巒 -巔 -巖 -巘 -巛 -川 -州 -巡 -巢 -工 -左 -巧 -巨 -巫 -差 -巰 -己 -已 -巳 -巴 -巶 -巷 -巻 -巽 -巾 -巿 -市 -布 -帆 -希 -帑 -帔 -帕 -帖 -帘 -帙 -帚 -帛 -帝 -帡 -帢 -帥 -師 -席 -帯 -帰 -帳 -帶 -帷 -常 -帽 -幀 -幃 -幄 -幅 -幌 -幔 -幕 -幗 -幚 -幛 -幟 -幡 -幢 -幣 -幪 -幫 -干 -平 -年 -幵 -幷 -幸 -幹 -幺 -幻 -幼 -幽 -幾 -庀 -庁 -広 -庇 -床 -序 -底 -庖 -店 -庚 -府 -庠 -庢 -庥 -度 -座 -庫 -庭 -庲 -庵 -庶 -康 -庸 -庹 -庼 -庾 -廁 -廂 -廄 -廆 -廈 -廉 -廊 -廋 -廌 -廍 -廑 -廓 -廔 -廕 -廖 -廙 -廚 -廝 -廞 -廟 -廠 -廡 -廢 -廣 -廧 -廨 -廩 -廬 -廰 -廱 -廳 -延 -廷 -廸 -建 -廻 -廼 -廿 -弁 -弄 -弅 -弇 -弈 -弉 -弊 -弋 -弍 -式 -弐 -弒 -弓 -弔 -引 -弖 -弗 -弘 -弛 -弟 -弢 -弦 -弧 -弨 -弩 -弭 -弱 -張 -強 -弸 -弼 -弾 -彀 -彄 -彅 -彆 -彈 -彊 -彌 -彎 -彐 -彔 -彖 -彗 -彘 -彙 -彜 -彞 -彠 -彡 -形 -彣 -彤 -彥 -彧 -彩 -彪 -彫 -彬 -彭 -彰 -影 -彳 -彷 -役 -彼 -彿 -往 -征 -徂 -待 -徇 -很 -徉 -徊 -律 -後 -徐 -徑 -徒 -得 -徘 -徙 -徜 -從 -徠 -御 -徧 -徨 -復 -循 -徫 -徬 -徭 -微 -徳 -徴 -徵 -德 -徸 -徹 -徽 -心 -忄 -必 -忉 -忌 -忍 -忐 -忑 -忒 -志 -忘 -忙 -応 -忝 -忞 -忠 -快 -忬 -忯 -忱 -忳 -念 -忻 -忽 -忿 -怍 -怎 -怒 -怕 -怖 -怙 -怛 -思 -怠 -怡 -急 -怦 -性 -怨 -怪 -怯 -怵 -恁 -恂 -恃 -恆 -恊 -恍 -恐 -恕 -恙 -恢 -恣 -恤 -恥 -恨 -恩 -恪 -恬 -恭 -息 -恰 -恵 -恿 -悄 -悅 -悆 -悉 -悌 -悍 -悔 -悖 -悚 -悛 -悝 -悞 -悟 -悠 -患 -悧 -您 -悪 -悰 -悲 -悳 -悵 -悶 -悸 -悼 -情 -惆 -惇 -惑 -惔 -惕 -惘 -惚 -惜 -惟 -惠 -惡 -惣 -惦 -惰 -惱 -惲 -想 -惶 -惹 -惺 -愁 -愃 -愆 -愈 -愉 -愍 -意 -愐 -愒 -愔 -愕 -愚 -愛 -愜 -感 -愣 -愧 -愨 -愫 -愭 -愴 -愷 -愼 -愾 -愿 -慄 -慈 -態 -慌 -慎 -慕 -慘 -慚 -慜 -慟 -慢 -慣 -慥 -慧 -慨 -慮 -慰 -慳 -慵 -慶 -慷 -慾 -憂 -憊 -憋 -憍 -憎 -憐 -憑 -憓 -憕 -憙 -憚 -憤 -憧 -憨 -憩 -憫 -憬 -憲 -憶 -憺 -憻 -憾 -懂 -懃 -懇 -懈 -應 -懋 -懌 -懍 -懐 -懣 -懦 -懮 -懲 -懵 -懶 -懷 -懸 -懺 -懼 -懽 -懾 -懿 -戀 -戇 -戈 -戊 -戌 -戍 -戎 -成 -我 -戒 -戔 -戕 -或 -戙 -戚 -戛 -戟 -戡 -戢 -戥 -戦 -戩 -截 -戮 -戰 -戱 -戲 -戳 -戴 -戶 -戸 -戻 -戽 -戾 -房 -所 -扁 -扆 -扇 -扈 -扉 -手 -扌 -才 -扎 -扒 -打 -扔 -托 -扙 -扛 -扞 -扣 -扥 -扦 -扭 -扮 -扯 -扳 -扶 -批 -扼 -找 -承 -技 -抃 -抄 -抇 -抉 -把 -抑 -抒 -抓 -投 -抖 -抗 -折 -抦 -披 -抬 -抱 -抵 -抹 -抻 -押 -抽 -抿 -拂 -拆 -拇 -拈 -拉 -拋 -拌 -拍 -拎 -拏 -拐 -拒 -拓 -拔 -拖 -拗 -拘 -拙 -拚 -招 -拜 -拝 -拡 -括 -拭 -拮 -拯 -拱 -拳 -拴 -拷 -拺 -拼 -拽 -拾 -拿 -持 -指 -按 -挎 -挑 -挖 -挙 -挨 -挪 -挫 -振 -挲 -挵 -挹 -挺 -挻 -挾 -捂 -捆 -捉 -捌 -捍 -捎 -捏 -捐 -捒 -捕 -捜 -捦 -捧 -捨 -捩 -捫 -捭 -捱 -捲 -捶 -捷 -捺 -捻 -掀 -掂 -掃 -掄 -掇 -授 -掉 -掌 -掏 -掐 -排 -掖 -掘 -掙 -掛 -掞 -掟 -掠 -採 -探 -掣 -接 -控 -推 -掩 -措 -掬 -掰 -掾 -揀 -揄 -揆 -揉 -揍 -描 -提 -插 -揔 -揖 -揚 -換 -握 -揪 -揭 -揮 -援 -揸 -揺 -損 -搏 -搐 -搓 -搔 -搖 -搗 -搜 -搞 -搠 -搢 -搪 -搬 -搭 -搳 -搴 -搵 -搶 -搽 -搾 -摂 -摒 -摔 -摘 -摜 -摞 -摟 -摠 -摧 -摩 -摭 -摯 -摳 -摴 -摵 -摶 -摸 -摹 -摺 -摻 -摽 -撃 -撇 -撈 -撐 -撒 -撓 -撕 -撖 -撙 -撚 -撞 -撣 -撤 -撥 -撩 -撫 -撬 -播 -撮 -撰 -撲 -撳 -撻 -撼 -撾 -撿 -擀 -擁 -擂 -擅 -擇 -擊 -擋 -操 -擎 -擒 -擔 -擘 -據 -擠 -擢 -擥 -擦 -擬 -擯 -擰 -擱 -擲 -擴 -擷 -擺 -擼 -擾 -攀 -攏 -攔 -攖 -攘 -攜 -攝 -攞 -攢 -攣 -攤 -攪 -攫 -攬 -支 -攴 -攵 -收 -攷 -攸 -改 -攻 -攽 -放 -政 -故 -效 -敍 -敎 -敏 -救 -敔 -敕 -敖 -敗 -敘 -教 -敝 -敞 -敟 -敢 -散 -敦 -敫 -敬 -敭 -敲 -整 -敵 -敷 -數 -敻 -敾 -斂 -斃 -文 -斌 -斎 -斐 -斑 -斕 -斖 -斗 -料 -斛 -斜 -斝 -斟 -斡 -斤 -斥 -斧 -斬 -斯 -新 -斷 -方 -於 -施 -斿 -旁 -旂 -旃 -旄 -旅 -旉 -旋 -旌 -旎 -族 -旖 -旗 -旙 -旛 -旡 -既 -日 -旦 -旨 -早 -旬 -旭 -旱 -旲 -旳 -旺 -旻 -旼 -旽 -旾 -旿 -昀 -昂 -昃 -昆 -昇 -昉 -昊 -昌 -昍 -明 -昏 -昐 -易 -昔 -昕 -昚 -昛 -昜 -昝 -昞 -星 -映 -昡 -昣 -昤 -春 -昧 -昨 -昪 -昫 -昭 -是 -昰 -昱 -昴 -昵 -昶 -昺 -晁 -時 -晃 -晈 -晉 -晊 -晏 -晗 -晙 -晚 -晛 -晝 -晞 -晟 -晤 -晦 -晧 -晨 -晩 -晪 -晫 -晭 -普 -景 -晰 -晳 -晴 -晶 -晷 -晸 -智 -晾 -暃 -暄 -暅 -暇 -暈 -暉 -暊 -暌 -暎 -暏 -暐 -暑 -暕 -暖 -暗 -暘 -暝 -暟 -暠 -暢 -暦 -暨 -暫 -暮 -暱 -暲 -暴 -暸 -暹 -暻 -暾 -曄 -曅 -曆 -曇 -曉 -曌 -曔 -曖 -曙 -曜 -曝 -曠 -曦 -曧 -曨 -曩 -曬 -曮 -曰 -曲 -曳 -更 -曶 -曷 -書 -曹 -曺 -曼 -曽 -曾 -替 -最 -會 -月 -有 -朊 -朋 -服 -朏 -朐 -朓 -朔 -朕 -朖 -朗 -望 -朝 -期 -朦 -朧 -木 -未 -末 -本 -札 -朱 -朴 -朵 -朶 -朽 -朿 -杁 -杉 -杋 -杌 -李 -杏 -材 -村 -杓 -杖 -杙 -杜 -杞 -束 -杠 -杣 -杤 -杧 -杬 -杭 -杯 -東 -杲 -杳 -杴 -杵 -杷 -杻 -杼 -松 -板 -极 -枇 -枉 -枋 -枏 -析 -枕 -枖 -林 -枚 -枛 -果 -枝 -枠 -枡 -枯 -枰 -枱 -枲 -枳 -架 -枷 -枸 -枹 -枼 -柁 -柃 -柄 -柉 -柊 -柎 -柏 -某 -柑 -柒 -染 -柔 -柘 -柚 -柜 -柝 -柞 -柟 -查 -柩 -柬 -柯 -柰 -柱 -柳 -柴 -柵 -柶 -柷 -査 -柾 -柿 -栃 -栄 -栐 -栒 -栓 -栜 -栝 -栞 -校 -栢 -栨 -栩 -株 -栲 -栴 -核 -根 -栻 -格 -栽 -桀 -桁 -桂 -桃 -桄 -桅 -框 -案 -桉 -桌 -桎 -桐 -桑 -桓 -桔 -桕 -桖 -桙 -桜 -桝 -桫 -桱 -桲 -桴 -桶 -桷 -桼 -桿 -梀 -梁 -梂 -梃 -梅 -梆 -梉 -梏 -梓 -梔 -梗 -梘 -條 -梟 -梠 -梢 -梣 -梧 -梨 -梫 -梭 -梯 -械 -梱 -梳 -梵 -梶 -梽 -棄 -棆 -棉 -棋 -棍 -棐 -棒 -棓 -棕 -棖 -棗 -棘 -棚 -棛 -棟 -棠 -棡 -棣 -棧 -棨 -棩 -棪 -棫 -森 -棱 -棲 -棵 -棶 -棹 -棺 -棻 -棼 -棽 -椅 -椆 -椇 -椋 -植 -椎 -椏 -椒 -椙 -椥 -椪 -椰 -椲 -椴 -椵 -椹 -椽 -椿 -楂 -楊 -楓 -楔 -楗 -楙 -楚 -楝 -楞 -楠 -楡 -楢 -楣 -楤 -楦 -楧 -楨 -楫 -業 -楮 -楯 -楳 -極 -楷 -楸 -楹 -楽 -楿 -概 -榆 -榊 -榍 -榎 -榑 -榔 -榕 -榖 -榗 -榘 -榛 -榜 -榞 -榢 -榣 -榤 -榦 -榧 -榨 -榫 -榭 -榮 -榲 -榴 -榷 -榻 -榿 -槀 -槁 -槃 -槊 -構 -槌 -槍 -槎 -槐 -槓 -槔 -槗 -様 -槙 -槤 -槩 -槭 -槰 -槱 -槲 -槳 -槺 -槻 -槼 -槽 -槿 -樀 -樁 -樂 -樅 -樆 -樊 -樋 -樑 -樓 -樗 -樘 -標 -樞 -樟 -模 -樣 -樨 -権 -樫 -樵 -樸 -樹 -樺 -樻 -樽 -樾 -橄 -橇 -橈 -橋 -橐 -橒 -橓 -橘 -橙 -橚 -機 -橡 -橢 -橪 -橫 -橿 -檀 -檄 -檇 -檉 -檊 -檎 -檐 -檔 -檗 -檜 -檞 -檠 -檡 -檢 -檣 -檦 -檨 -檫 -檬 -檯 -檳 -檵 -檸 -檻 -檽 -櫂 -櫃 -櫆 -櫈 -櫓 -櫚 -櫛 -櫞 -櫟 -櫥 -櫨 -櫪 -櫱 -櫸 -櫻 -櫾 -櫿 -欄 -欉 -權 -欏 -欒 -欖 -欞 -欠 -次 -欣 -欥 -欲 -欸 -欹 -欺 -欽 -款 -歆 -歇 -歉 -歊 -歌 -歎 -歐 -歓 -歙 -歛 -歡 -止 -正 -此 -步 -武 -歧 -歩 -歪 -歲 -歳 -歴 -歷 -歸 -歹 -死 -歿 -殂 -殃 -殄 -殆 -殉 -殊 -殑 -殖 -殘 -殛 -殞 -殟 -殤 -殭 -殮 -殯 -殲 -殳 -段 -殷 -殺 -殻 -殼 -殿 -毀 -毅 -毆 -毉 -毋 -毌 -母 -毎 -每 -毐 -毒 -毓 -比 -毖 -毗 -毘 -毛 -毫 -毬 -毯 -毴 -毸 -毽 -毿 -氂 -氈 -氍 -氏 -氐 -民 -氓 -氖 -気 -氘 -氙 -氚 -氛 -氟 -氣 -氦 -氧 -氨 -氪 -氫 -氬 -氮 -氯 -氰 -水 -氵 -氷 -永 -氹 -氻 -氽 -氾 -汀 -汁 -求 -汊 -汎 -汐 -汕 -汗 -汛 -汜 -汝 -汞 -江 -池 -污 -汧 -汨 -汩 -汪 -汭 -汰 -汲 -汴 -汶 -決 -汽 -汾 -沁 -沂 -沃 -沄 -沅 -沆 -沇 -沈 -沉 -沌 -沍 -沏 -沐 -沒 -沓 -沔 -沖 -沘 -沙 -沚 -沛 -沜 -沢 -沨 -沫 -沭 -沮 -沯 -沱 -河 -沸 -油 -沺 -治 -沼 -沽 -沾 -沿 -況 -泂 -泄 -泆 -泇 -泉 -泊 -泌 -泐 -泓 -泔 -法 -泖 -泗 -泚 -泛 -泠 -泡 -波 -泣 -泥 -泩 -泫 -泮 -泯 -泰 -泱 -泳 -泵 -洄 -洋 -洌 -洎 -洗 -洙 -洛 -洞 -洢 -洣 -洤 -津 -洨 -洩 -洪 -洮 -洱 -洲 -洳 -洵 -洸 -洹 -洺 -活 -洽 -派 -流 -浄 -浙 -浚 -浛 -浜 -浞 -浟 -浠 -浡 -浣 -浤 -浥 -浦 -浩 -浪 -浮 -浯 -浴 -浵 -海 -浸 -浹 -涅 -涇 -消 -涉 -涌 -涎 -涑 -涓 -涔 -涕 -涙 -涪 -涫 -涮 -涯 -液 -涵 -涸 -涼 -涿 -淄 -淅 -淆 -淇 -淋 -淌 -淍 -淎 -淏 -淑 -淓 -淖 -淘 -淙 -淚 -淛 -淝 -淞 -淠 -淡 -淤 -淥 -淦 -淨 -淩 -淪 -淫 -淬 -淮 -淯 -淰 -深 -淳 -淵 -淶 -混 -淸 -淹 -淺 -添 -淼 -淽 -渃 -清 -済 -渉 -渋 -渕 -渙 -渚 -減 -渝 -渟 -渠 -渡 -渣 -渤 -渥 -渦 -渫 -測 -渭 -港 -渲 -渴 -游 -渺 -渼 -渽 -渾 -湃 -湄 -湉 -湊 -湍 -湓 -湔 -湖 -湘 -湛 -湜 -湞 -湟 -湣 -湥 -湧 -湫 -湮 -湯 -湳 -湴 -湼 -満 -溁 -溇 -溈 -溉 -溋 -溎 -溏 -源 -準 -溙 -溜 -溝 -溟 -溢 -溥 -溦 -溧 -溪 -溫 -溯 -溱 -溲 -溴 -溵 -溶 -溺 -溼 -滀 -滁 -滂 -滄 -滅 -滇 -滈 -滉 -滋 -滌 -滎 -滏 -滑 -滓 -滔 -滕 -滘 -滙 -滝 -滬 -滯 -滲 -滴 -滷 -滸 -滹 -滻 -滽 -滾 -滿 -漁 -漂 -漆 -漇 -漈 -漎 -漏 -漓 -演 -漕 -漚 -漠 -漢 -漣 -漩 -漪 -漫 -漬 -漯 -漱 -漲 -漳 -漴 -漵 -漷 -漸 -漼 -漾 -漿 -潁 -潑 -潔 -潘 -潛 -潞 -潟 -潢 -潤 -潭 -潮 -潯 -潰 -潲 -潺 -潼 -潽 -潾 -潿 -澀 -澁 -澂 -澄 -澆 -澇 -澈 -澉 -澋 -澌 -澍 -澎 -澔 -澗 -澠 -澡 -澣 -澤 -澥 -澧 -澪 -澮 -澯 -澱 -澳 -澶 -澹 -澻 -激 -濁 -濂 -濃 -濉 -濊 -濋 -濕 -濘 -濙 -濛 -濞 -濟 -濠 -濡 -濤 -濫 -濬 -濮 -濯 -濰 -濱 -濲 -濶 -濺 -濼 -濾 -瀁 -瀅 -瀆 -瀉 -瀍 -瀏 -瀑 -瀔 -瀕 -瀘 -瀚 -瀛 -瀝 -瀞 -瀟 -瀠 -瀣 -瀦 -瀧 -瀨 -瀬 -瀰 -瀲 -瀴 -瀶 -瀹 -瀾 -灃 -灊 -灌 -灑 -灘 -灝 -灞 -灡 -灣 -灤 -灧 -火 -灰 -灴 -灸 -灼 -災 -炁 -炅 -炆 -炊 -炎 -炒 -炔 -炕 -炘 -炙 -炟 -炣 -炤 -炫 -炬 -炭 -炮 -炯 -炱 -炲 -炳 -炷 -炸 -為 -炻 -烈 -烉 -烊 -烋 -烏 -烒 -烔 -烘 -烙 -烜 -烝 -烤 -烯 -烱 -烴 -烷 -烹 -烺 -烽 -焃 -焄 -焉 -焊 -焌 -焓 -焗 -焙 -焚 -焜 -焞 -無 -焦 -焯 -焰 -焱 -焴 -然 -焻 -焼 -焿 -煇 -煉 -煊 -煌 -煎 -煐 -煒 -煔 -煕 -煖 -煙 -煚 -煜 -煞 -煠 -煤 -煥 -煦 -照 -煨 -煩 -煬 -煮 -煲 -煳 -煵 -煶 -煸 -煽 -熄 -熅 -熇 -熈 -熊 -熏 -熒 -熔 -熖 -熗 -熘 -熙 -熜 -熟 -熠 -熤 -熥 -熨 -熬 -熯 -熱 -熲 -熳 -熵 -熹 -熺 -熼 -熾 -熿 -燁 -燃 -燄 -燈 -燉 -燊 -燎 -燏 -燐 -燒 -燔 -燕 -燘 -燙 -燚 -燜 -燝 -營 -燥 -燦 -燧 -燫 -燬 -燭 -燮 -燴 -燹 -燻 -燼 -燾 -燿 -爀 -爆 -爌 -爍 -爐 -爔 -爚 -爛 -爝 -爨 -爪 -爬 -爭 -爯 -爰 -爲 -爵 -父 -爸 -爹 -爺 -爻 -爽 -爾 -爿 -牁 -牂 -牆 -片 -版 -牌 -牒 -牕 -牖 -牘 -牙 -牛 -牝 -牟 -牠 -牡 -牢 -牧 -物 -牯 -牲 -特 -牻 -牼 -牽 -犀 -犁 -犂 -犇 -犍 -犎 -犖 -犛 -犢 -犧 -犨 -犬 -犯 -犰 -犴 -犽 -狀 -狂 -狄 -狍 -狎 -狐 -狒 -狓 -狗 -狙 -狛 -狟 -狠 -狡 -狦 -狨 -狩 -狳 -狶 -狷 -狸 -狹 -狻 -狼 -猁 -猄 -猇 -猊 -猗 -猙 -猛 -猜 -猝 -猞 -猢 -猥 -猨 -猩 -猳 -猴 -猶 -猷 -猺 -猻 -猾 -猿 -獁 -獃 -獄 -獅 -獇 -獎 -獏 -獐 -獒 -獠 -獢 -獣 -獨 -獬 -獮 -獯 -獰 -獲 -獴 -獵 -獷 -獸 -獺 -獻 -獼 -獾 -玀 -玄 -玆 -率 -玉 -王 -玎 -玏 -玓 -玕 -玖 -玗 -玘 -玙 -玟 -玠 -玡 -玢 -玥 -玧 -玨 -玩 -玫 -玭 -玲 -玳 -玶 -玷 -玹 -玻 -玾 -珀 -珂 -珅 -珈 -珉 -珊 -珌 -珍 -珎 -珏 -珖 -珙 -珝 -珞 -珠 -珡 -珣 -珤 -珥 -珦 -珧 -珩 -珪 -班 -珮 -珵 -珹 -珺 -珽 -現 -琁 -球 -琄 -琅 -理 -琇 -琉 -琊 -琍 -琎 -琚 -琛 -琡 -琢 -琤 -琥 -琦 -琨 -琪 -琬 -琮 -琯 -琰 -琱 -琳 -琴 -琵 -琶 -琹 -琺 -琿 -瑀 -瑁 -瑂 -瑄 -瑅 -瑆 -瑈 -瑊 -瑋 -瑑 -瑒 -瑕 -瑗 -瑙 -瑚 -瑛 -瑜 -瑝 -瑞 -瑟 -瑠 -瑢 -瑣 -瑤 -瑥 -瑧 -瑨 -瑩 -瑪 -瑭 -瑯 -瑰 -瑱 -瑳 -瑴 -瑺 -瑾 -璀 -璁 -璃 -璄 -璆 -璇 -璈 -璉 -璋 -璌 -璐 -璕 -璘 -璙 -璚 -璜 -璞 -璟 -璠 -璡 -璣 -璥 -璦 -璧 -璨 -璩 -璪 -璫 -璬 -璮 -環 -璱 -璵 -璸 -璹 -璽 -璿 -瓈 -瓊 -瓌 -瓏 -瓑 -瓔 -瓖 -瓘 -瓚 -瓛 -瓜 -瓞 -瓠 -瓢 -瓣 -瓤 -瓦 -瓮 -瓴 -瓶 -瓷 -瓿 -甂 -甄 -甌 -甍 -甑 -甕 -甘 -甙 -甚 -甜 -生 -甡 -產 -産 -甥 -甦 -用 -甩 -甪 -甫 -甬 -甯 -田 -由 -甲 -申 -男 -甸 -甹 -町 -甾 -畀 -畇 -畈 -畊 -畋 -界 -畎 -畏 -畐 -畑 -畔 -留 -畜 -畝 -畠 -畢 -略 -畦 -畧 -番 -畫 -畬 -畯 -異 -畲 -畳 -畵 -當 -畷 -畸 -畹 -畿 -疃 -疆 -疇 -疊 -疋 -疌 -疍 -疏 -疑 -疒 -疕 -疙 -疚 -疝 -疣 -疤 -疥 -疫 -疲 -疳 -疵 -疸 -疹 -疼 -疽 -疾 -痂 -病 -症 -痊 -痍 -痔 -痕 -痘 -痙 -痛 -痞 -痟 -痠 -痢 -痣 -痤 -痧 -痩 -痰 -痱 -痲 -痴 -痹 -痺 -痿 -瘀 -瘁 -瘊 -瘋 -瘍 -瘓 -瘙 -瘜 -瘞 -瘟 -瘠 -瘡 -瘢 -瘤 -瘦 -瘧 -瘩 -瘰 -瘴 -瘺 -癀 -療 -癆 -癇 -癌 -癒 -癖 -癘 -癜 -癟 -癡 -癢 -癤 -癥 -癩 -癬 -癭 -癮 -癯 -癰 -癱 -癲 -癸 -発 -登 -發 -白 -百 -皂 -的 -皆 -皇 -皈 -皋 -皎 -皐 -皓 -皖 -皙 -皚 -皛 -皝 -皞 -皮 -皰 -皴 -皷 -皸 -皺 -皿 -盂 -盃 -盅 -盆 -盈 -益 -盋 -盌 -盎 -盒 -盔 -盛 -盜 -盞 -盟 -盡 -監 -盤 -盥 -盦 -盧 -盨 -盩 -盪 -盫 -目 -盯 -盱 -盲 -直 -盷 -相 -盹 -盺 -盼 -盾 -眀 -省 -眉 -看 -県 -眙 -眛 -眜 -眞 -真 -眠 -眥 -眨 -眩 -眭 -眯 -眵 -眶 -眷 -眸 -眺 -眼 -眾 -着 -睇 -睛 -睜 -睞 -睡 -睢 -督 -睥 -睦 -睨 -睪 -睫 -睭 -睹 -睺 -睽 -睾 -睿 -瞄 -瞅 -瞋 -瞌 -瞎 -瞑 -瞓 -瞞 -瞢 -瞥 -瞧 -瞪 -瞫 -瞬 -瞭 -瞰 -瞳 -瞻 -瞼 -瞽 -瞿 -矇 -矍 -矗 -矚 -矛 -矜 -矞 -矢 -矣 -知 -矧 -矩 -短 -矮 -矯 -石 -矸 -矽 -砂 -砋 -砌 -砍 -砒 -研 -砝 -砢 -砥 -砦 -砧 -砩 -砫 -砭 -砮 -砯 -砰 -砲 -砳 -破 -砵 -砷 -砸 -砼 -硂 -硃 -硅 -硇 -硏 -硐 -硒 -硓 -硚 -硜 -硝 -硤 -硨 -硫 -硬 -硭 -硯 -硼 -碁 -碇 -碉 -碌 -碎 -碑 -碓 -碕 -碗 -碘 -碚 -碟 -碡 -碣 -碧 -碩 -碪 -碭 -碰 -碲 -碳 -碴 -碶 -碸 -確 -碻 -碼 -碽 -碾 -磁 -磅 -磊 -磋 -磐 -磔 -磕 -磘 -磙 -磚 -磜 -磡 -磨 -磪 -磬 -磯 -磱 -磲 -磵 -磷 -磺 -磻 -磾 -礁 -礄 -礎 -礐 -礑 -礒 -礙 -礠 -礦 -礪 -礫 -礬 -礮 -礱 -礴 -示 -礻 -礽 -社 -祀 -祁 -祂 -祆 -祇 -祈 -祉 -祋 -祏 -祐 -祓 -祕 -祖 -祗 -祙 -祚 -祛 -祜 -祝 -神 -祟 -祠 -祥 -祧 -票 -祭 -祹 -祺 -祼 -祿 -禁 -禃 -禇 -禍 -禎 -福 -禑 -禓 -禔 -禕 -禘 -禛 -禟 -禠 -禤 -禦 -禧 -禨 -禩 -禪 -禮 -禰 -禱 -禵 -禹 -禺 -禼 -禽 -禾 -禿 -秀 -私 -秈 -秉 -秋 -科 -秒 -秕 -秘 -租 -秠 -秣 -秤 -秦 -秧 -秩 -秭 -秳 -秸 -移 -稀 -稅 -稈 -稉 -程 -稍 -稑 -稔 -稗 -稘 -稙 -稚 -稜 -稞 -稟 -稠 -種 -稱 -稲 -稷 -稹 -稺 -稻 -稼 -稽 -稾 -稿 -穀 -穂 -穆 -穈 -穉 -穌 -積 -穎 -穗 -穟 -穠 -穡 -穢 -穣 -穩 -穫 -穰 -穴 -穵 -究 -穹 -空 -穿 -突 -窄 -窅 -窈 -窋 -窒 -窕 -窖 -窗 -窘 -窟 -窠 -窣 -窨 -窩 -窪 -窮 -窯 -窰 -窶 -窺 -窿 -竄 -竅 -竇 -竈 -竊 -立 -竑 -站 -竜 -竟 -章 -竣 -童 -竦 -竩 -竭 -端 -競 -竹 -竺 -竻 -竿 -笄 -笆 -笈 -笏 -笑 -笘 -笙 -笛 -笞 -笠 -笥 -符 -笨 -笩 -笪 -第 -笭 -笮 -笯 -笱 -笳 -笹 -筅 -筆 -等 -筊 -筋 -筌 -筍 -筏 -筐 -筒 -答 -策 -筘 -筠 -筥 -筦 -筧 -筬 -筭 -筱 -筲 -筳 -筵 -筶 -筷 -筻 -箆 -箇 -箋 -箍 -箏 -箐 -箑 -箒 -箔 -箕 -算 -箜 -管 -箬 -箭 -箱 -箴 -箸 -節 -篁 -範 -篆 -篇 -築 -篊 -篋 -篌 -篔 -篙 -篝 -篠 -篡 -篤 -篥 -篦 -篩 -篪 -篭 -篯 -篳 -篷 -簀 -簃 -簇 -簉 -簋 -簍 -簑 -簕 -簗 -簞 -簠 -簡 -簧 -簪 -簫 -簷 -簸 -簹 -簺 -簽 -簾 -簿 -籀 -籃 -籌 -籍 -籐 -籙 -籛 -籜 -籝 -籟 -籠 -籣 -籤 -籥 -籪 -籬 -籮 -籲 -米 -籽 -籾 -粄 -粉 -粍 -粑 -粒 -粕 -粗 -粘 -粟 -粢 -粥 -粦 -粧 -粩 -粱 -粲 -粳 -粵 -粹 -粼 -粽 -精 -粿 -糀 -糅 -糊 -糌 -糍 -糎 -糕 -糖 -糙 -糜 -糝 -糞 -糟 -糠 -糢 -糧 -糬 -糯 -糰 -糴 -糶 -糸 -糹 -糺 -系 -糾 -紀 -紂 -約 -紅 -紆 -紇 -紈 -紉 -紊 -紋 -納 -紐 -紑 -紓 -純 -紕 -紗 -紘 -紙 -級 -紛 -紜 -紝 -紞 -素 -紡 -索 -紫 -紮 -累 -細 -紱 -紲 -紳 -紵 -紹 -紺 -紿 -終 -絃 -組 -絆 -経 -絎 -結 -絕 -絛 -絜 -絞 -絡 -絢 -給 -絨 -絪 -絮 -統 -絲 -絳 -絵 -絶 -絹 -絺 -綁 -綃 -綈 -綉 -綎 -綏 -經 -綖 -継 -続 -綜 -綝 -綞 -綠 -綢 -綣 -綦 -綧 -綫 -綬 -維 -綮 -綰 -綱 -網 -綳 -綴 -綸 -綺 -綻 -綽 -綾 -綿 -緁 -緃 -緄 -緈 -緊 -緋 -総 -緑 -緒 -緖 -緘 -線 -緜 -緝 -緞 -締 -緡 -緣 -緤 -編 -緩 -緬 -緯 -緱 -緲 -練 -緹 -緻 -縂 -縄 -縈 -縉 -縊 -縕 -縛 -縝 -縞 -縠 -縡 -縣 -縤 -縫 -縮 -縯 -縱 -縴 -縵 -縷 -縹 -縻 -總 -績 -繁 -繃 -繆 -繇 -繒 -織 -繕 -繖 -繙 -繚 -繞 -繡 -繩 -繪 -繫 -繭 -繰 -繳 -繹 -繻 -繼 -繽 -繾 -纁 -纂 -纈 -續 -纍 -纏 -纓 -纔 -纕 -纖 -纘 -纛 -纜 -缐 -缶 -缸 -缺 -缽 -罃 -罄 -罅 -罈 -罉 -罌 -罍 -罐 -罔 -罕 -罘 -罟 -罡 -罨 -罩 -罪 -置 -罰 -罱 -署 -罳 -罵 -罶 -罷 -罹 -罽 -羂 -羅 -羆 -羈 -羊 -羋 -羌 -美 -羔 -羕 -羗 -羙 -羚 -羞 -羡 -羣 -群 -羥 -羧 -羨 -義 -羯 -羰 -羱 -羲 -羸 -羹 -羽 -羿 -翀 -翁 -翂 -翃 -翅 -翊 -翌 -翎 -翏 -習 -翔 -翕 -翙 -翜 -翟 -翠 -翡 -翥 -翦 -翩 -翬 -翮 -翰 -翱 -翳 -翹 -翻 -翼 -耀 -老 -考 -耄 -者 -耆 -而 -耍 -耎 -耐 -耑 -耒 -耔 -耕 -耗 -耘 -耙 -耜 -耦 -耨 -耬 -耳 -耵 -耶 -耷 -耽 -耿 -聃 -聆 -聊 -聒 -聖 -聘 -聚 -聞 -聟 -聨 -聯 -聰 -聱 -聲 -聳 -聴 -聶 -職 -聽 -聾 -聿 -肄 -肅 -肆 -肇 -肉 -肋 -肌 -肏 -肖 -肘 -肚 -肛 -肜 -肝 -肟 -股 -肢 -肥 -肩 -肪 -肫 -肯 -肱 -育 -肸 -肹 -肺 -肼 -肽 -胂 -胃 -胄 -胅 -胇 -胊 -背 -胍 -胎 -胖 -胗 -胙 -胚 -胛 -胝 -胞 -胡 -胤 -胥 -胬 -胭 -胰 -胱 -胳 -胴 -胸 -胺 -胼 -能 -脂 -脅 -脆 -脇 -脈 -脊 -脒 -脖 -脘 -脛 -脣 -脩 -脫 -脬 -脭 -脯 -脲 -脳 -脷 -脹 -脾 -腆 -腈 -腊 -腋 -腌 -腎 -腐 -腑 -腓 -腔 -腕 -腥 -腦 -腧 -腩 -腫 -腮 -腰 -腱 -腳 -腴 -腸 -腹 -腺 -腿 -膀 -膂 -膈 -膊 -膏 -膚 -膛 -膜 -膝 -膠 -膣 -膥 -膦 -膨 -膩 -膮 -膳 -膺 -膽 -膾 -膿 -臀 -臂 -臃 -臆 -臉 -臊 -臍 -臏 -臘 -臚 -臞 -臟 -臠 -臣 -臧 -臨 -自 -臭 -臯 -至 -致 -臺 -臻 -臼 -臾 -舂 -舅 -與 -興 -舉 -舊 -舌 -舍 -舎 -舒 -舔 -舖 -舘 -舛 -舜 -舞 -舟 -舢 -舥 -舨 -舩 -航 -舫 -般 -舲 -舵 -舶 -舷 -舸 -船 -舺 -艅 -艇 -艉 -艋 -艎 -艏 -艔 -艘 -艙 -艚 -艦 -艮 -良 -艱 -色 -艶 -艷 -艸 -艽 -艾 -艿 -芃 -芊 -芋 -芍 -芎 -芑 -芒 -芘 -芙 -芛 -芝 -芡 -芥 -芨 -芩 -芪 -芫 -芬 -芭 -芮 -芯 -花 -芳 -芴 -芷 -芸 -芹 -芻 -芽 -芾 -苄 -苅 -苑 -苒 -苓 -苔 -苕 -苗 -苛 -苜 -苝 -苞 -苟 -苡 -苣 -苤 -若 -苦 -苧 -苪 -苫 -苯 -英 -苳 -苴 -苷 -苺 -苻 -苼 -苾 -茀 -茁 -茂 -范 -茄 -茅 -茆 -茇 -茈 -茉 -茌 -茗 -茘 -茚 -茛 -茜 -茝 -茨 -茫 -茬 -茭 -茮 -茯 -茱 -茲 -茴 -茵 -茶 -茷 -茸 -茹 -茺 -茼 -荀 -荃 -荅 -荇 -草 -荊 -荎 -荏 -荒 -荔 -荖 -荘 -荳 -荷 -荸 -荻 -荼 -荽 -莆 -莉 -莊 -莎 -莒 -莓 -莕 -莖 -莘 -莙 -莛 -莜 -莞 -莠 -莢 -莧 -莨 -莩 -莪 -莫 -莽 -莿 -菀 -菁 -菅 -菇 -菈 -菉 -菊 -菌 -菍 -菏 -菑 -菓 -菔 -菖 -菘 -菜 -菝 -菟 -菠 -菡 -菥 -菩 -菪 -菫 -華 -菰 -菱 -菲 -菴 -菶 -菸 -菹 -菺 -菼 -菽 -菾 -萁 -萃 -萄 -萇 -萊 -萌 -萍 -萎 -萐 -萘 -萜 -萠 -萡 -萣 -萩 -萬 -萭 -萱 -萵 -萸 -萹 -萼 -落 -葃 -葆 -葉 -葊 -葎 -葑 -葒 -著 -葙 -葚 -葛 -葜 -葝 -葡 -董 -葦 -葩 -葫 -葬 -葭 -葯 -葰 -葳 -葵 -葶 -葷 -葺 -蒂 -蒄 -蒍 -蒎 -蒐 -蒓 -蒔 -蒗 -蒙 -蒜 -蒞 -蒟 -蒡 -蒢 -蒤 -蒧 -蒨 -蒭 -蒯 -蒲 -蒴 -蒸 -蒹 -蒺 -蒻 -蒼 -蒽 -蒾 -蒿 -蓀 -蓁 -蓂 -蓄 -蓆 -蓉 -蓋 -蓍 -蓑 -蓓 -蓖 -蓘 -蓚 -蓧 -蓨 -蓪 -蓬 -蓭 -蓮 -蓯 -蓳 -蓼 -蓽 -蓿 -蔆 -蔎 -蔑 -蔓 -蔔 -蔕 -蔗 -蔘 -蔚 -蔝 -蔞 -蔡 -蔣 -蔥 -蔦 -蔬 -蔭 -蔴 -蔵 -蔻 -蔽 -蕁 -蕃 -蕅 -蕈 -蕉 -蕊 -蕎 -蕑 -蕒 -蕖 -蕘 -蕙 -蕚 -蕟 -蕡 -蕢 -蕤 -蕨 -蕩 -蕪 -蕭 -蕷 -蕹 -蕺 -蕻 -蕾 -薀 -薄 -薆 -薇 -薈 -薊 -薌 -薏 -薐 -薑 -薔 -薗 -薘 -薙 -薛 -薜 -薞 -薟 -薡 -薦 -薨 -薩 -薪 -薫 -薬 -薯 -薰 -薲 -薷 -薸 -薹 -薺 -薾 -薿 -藁 -藉 -藍 -藎 -藏 -藐 -藔 -藕 -藜 -藝 -藟 -藤 -藥 -藦 -藨 -藩 -藪 -藶 -藸 -藹 -藺 -藻 -藿 -蘂 -蘄 -蘅 -蘆 -蘇 -蘊 -蘋 -蘐 -蘑 -蘓 -蘗 -蘘 -蘚 -蘞 -蘢 -蘧 -蘩 -蘭 -蘵 -蘶 -蘸 -蘼 -蘿 -虉 -虎 -虐 -虓 -虔 -處 -虖 -虛 -虜 -虞 -號 -虢 -虧 -虨 -虯 -虱 -虵 -虹 -虺 -虻 -蚆 -蚊 -蚋 -蚌 -蚍 -蚓 -蚖 -蚜 -蚝 -蚡 -蚢 -蚣 -蚤 -蚧 -蚨 -蚩 -蚪 -蚯 -蚱 -蚴 -蚵 -蚶 -蚺 -蚼 -蛀 -蛄 -蛇 -蛉 -蛋 -蛍 -蛐 -蛑 -蛔 -蛙 -蛛 -蛞 -蛟 -蛤 -蛭 -蛯 -蛸 -蛹 -蛺 -蛻 -蛾 -蜀 -蜂 -蜃 -蜆 -蜇 -蜈 -蜉 -蜊 -蜍 -蜑 -蜒 -蜓 -蜘 -蜚 -蜛 -蜜 -蜞 -蜢 -蜣 -蜥 -蜨 -蜮 -蜯 -蜱 -蜴 -蜷 -蜻 -蜾 -蜿 -蝀 -蝌 -蝍 -蝎 -蝓 -蝕 -蝗 -蝘 -蝙 -蝚 -蝟 -蝠 -蝣 -蝤 -蝦 -蝨 -蝮 -蝯 -蝰 -蝲 -蝴 -蝶 -蝸 -蝽 -螂 -螃 -螄 -螅 -螈 -螋 -融 -螐 -螔 -螞 -螟 -螠 -螢 -螣 -螥 -螫 -螭 -螯 -螳 -螶 -螺 -螻 -螽 -螾 -蟀 -蟄 -蟅 -蟆 -蟊 -蟋 -蟌 -蟎 -蟑 -蟒 -蟜 -蟠 -蟥 -蟪 -蟫 -蟬 -蟯 -蟲 -蟳 -蟴 -蟶 -蟹 -蟻 -蟾 -蠂 -蠃 -蠄 -蠅 -蠆 -蠊 -蠋 -蠍 -蠐 -蠑 -蠓 -蠔 -蠕 -蠖 -蠘 -蠙 -蠟 -蠡 -蠢 -蠣 -蠱 -蠲 -蠵 -蠶 -蠷 -蠹 -蠻 -血 -衂 -衆 -行 -衍 -衎 -術 -衕 -衖 -街 -衙 -衚 -衛 -衜 -衝 -衞 -衡 -衢 -衣 -表 -衩 -衫 -衰 -衲 -衷 -衽 -衾 -衿 -袁 -袂 -袈 -袋 -袍 -袓 -袖 -袛 -袞 -袤 -袪 -被 -袱 -袴 -袾 -裁 -裂 -裊 -裎 -裒 -裔 -裕 -裖 -裘 -裙 -補 -裝 -裟 -裡 -裨 -裬 -裱 -裳 -裴 -裵 -裸 -裹 -製 -裾 -裿 -褀 -褂 -複 -褌 -褍 -褎 -褐 -褒 -褓 -褔 -褘 -褙 -褚 -褞 -褥 -褧 -褪 -褫 -褭 -褲 -褶 -褸 -褻 -襄 -襌 -襖 -襞 -襟 -襠 -襤 -襦 -襪 -襯 -襲 -襴 -襶 -襻 -襾 -西 -要 -覃 -覆 -覇 -覈 -見 -覌 -規 -覓 -視 -覚 -覡 -覦 -覧 -親 -覬 -覲 -観 -覺 -覽 -覿 -觀 -角 -觔 -觙 -觚 -觜 -解 -觭 -觱 -觴 -觶 -觸 -觿 -言 -訁 -訂 -訃 -訇 -計 -訊 -訌 -討 -訏 -訐 -訒 -訓 -訔 -訕 -訖 -託 -記 -訛 -訝 -訟 -訣 -訥 -訪 -設 -許 -訴 -訶 -診 -註 -証 -訾 -詁 -詆 -詈 -詐 -詒 -詔 -評 -詛 -詞 -詠 -詡 -詢 -詣 -詥 -試 -詧 -詩 -詫 -詭 -詮 -詰 -話 -該 -詳 -詵 -詹 -詼 -誄 -誅 -誇 -誌 -認 -誒 -誓 -誕 -誘 -語 -誠 -誡 -誣 -誤 -誥 -誦 -誨 -說 -説 -読 -誰 -課 -誴 -誹 -誼 -誾 -調 -談 -請 -諍 -諏 -諒 -論 -諗 -諜 -諟 -諠 -諡 -諤 -諦 -諧 -諪 -諫 -諭 -諮 -諱 -諲 -諳 -諴 -諶 -諷 -諸 -諺 -諼 -諾 -謀 -謁 -謂 -謄 -謇 -謊 -謌 -謎 -謏 -謐 -謔 -謖 -謗 -謙 -謚 -講 -謜 -謝 -謠 -謢 -謤 -謨 -謩 -謫 -謬 -謳 -謹 -謾 -證 -譏 -譓 -譔 -識 -譙 -譚 -譜 -譞 -警 -譫 -譬 -譭 -譯 -議 -譲 -譳 -譴 -護 -譽 -譿 -讀 -讃 -變 -讌 -讎 -讓 -讖 -讙 -讚 -讜 -讞 -谷 -谿 -豁 -豆 -豇 -豈 -豉 -豊 -豌 -豎 -豐 -豔 -豕 -豚 -象 -豢 -豨 -豪 -豫 -豬 -豳 -豸 -豹 -豺 -豿 -貂 -貅 -貉 -貊 -貌 -貐 -貒 -貓 -貔 -貘 -貝 -貞 -負 -財 -貢 -貤 -貧 -貨 -販 -貪 -貫 -責 -貭 -貮 -貯 -貲 -貳 -貴 -貶 -買 -貸 -貺 -費 -貼 -貽 -貿 -賀 -賁 -賂 -賃 -賄 -資 -賈 -賊 -賑 -賒 -賓 -賔 -賕 -賚 -賜 -賞 -賠 -賡 -賢 -賣 -賤 -賦 -賨 -質 -賬 -賭 -賴 -賹 -賺 -賻 -購 -賽 -賾 -贄 -贅 -贇 -贈 -贊 -贌 -贍 -贏 -贓 -贔 -贖 -贛 -赤 -赦 -赧 -赫 -赬 -赭 -走 -赳 -赴 -起 -趁 -超 -越 -趐 -趕 -趖 -趙 -趟 -趣 -趨 -足 -趴 -趵 -趺 -趼 -趾 -跅 -跆 -跋 -跌 -跏 -跑 -跖 -跗 -跛 -距 -跟 -跡 -跣 -跤 -跨 -跩 -跪 -路 -跳 -踎 -踏 -踐 -踝 -踞 -踢 -踩 -踰 -踴 -踹 -踺 -蹂 -蹄 -蹇 -蹈 -蹉 -蹊 -蹋 -蹕 -蹙 -蹟 -蹠 -蹤 -蹦 -蹬 -蹭 -蹯 -蹲 -蹴 -蹶 -蹺 -蹻 -蹼 -躁 -躂 -躄 -躉 -躋 -躍 -躑 -躒 -躔 -躝 -躪 -身 -躬 -躰 -躲 -躺 -軀 -車 -軋 -軌 -軍 -軎 -軒 -軔 -軛 -軟 -転 -軫 -軲 -軸 -軹 -軺 -軻 -軼 -軽 -軾 -較 -輄 -輅 -載 -輋 -輒 -輓 -輔 -輕 -輛 -輝 -輞 -輟 -輥 -輦 -輩 -輪 -輬 -輭 -輯 -輶 -輸 -輻 -輾 -輿 -轀 -轂 -轄 -轅 -轆 -轉 -轍 -轎 -轘 -轝 -轟 -轤 -辛 -辜 -辟 -辣 -辦 -辧 -辨 -辭 -辮 -辯 -辰 -辱 -農 -辵 -辺 -辻 -込 -迂 -迄 -迅 -迎 -近 -返 -迢 -迤 -迥 -迦 -迪 -迫 -迭 -迮 -述 -迴 -迵 -迷 -迸 -迺 -追 -退 -送 -逃 -逄 -逅 -逆 -逈 -逋 -逌 -逍 -逎 -透 -逐 -逑 -途 -逕 -逖 -逗 -這 -通 -逛 -逝 -逞 -速 -造 -逢 -連 -逤 -逨 -逮 -逯 -進 -逴 -逵 -逸 -逹 -逺 -逼 -逾 -遁 -遂 -遄 -遇 -遊 -運 -遍 -過 -遏 -遐 -遒 -道 -達 -違 -遘 -遙 -遛 -遜 -遞 -遠 -遢 -遣 -遨 -適 -遭 -遮 -遯 -遲 -遴 -遵 -遶 -遷 -選 -遹 -遺 -遼 -避 -邀 -邁 -邂 -邃 -還 -邇 -邈 -邉 -邊 -邋 -邏 -邑 -邕 -邗 -邙 -邛 -邠 -邡 -邢 -那 -邦 -邨 -邪 -邯 -邰 -邱 -邲 -邳 -邴 -邵 -邸 -邽 -邾 -郁 -郃 -郄 -郅 -郇 -郊 -郋 -郎 -郗 -郛 -郜 -郝 -郞 -郟 -郡 -郢 -郤 -部 -郪 -郫 -郭 -郯 -郳 -郴 -郵 -郷 -都 -郾 -郿 -鄂 -鄃 -鄄 -鄆 -鄉 -鄋 -鄑 -鄒 -鄔 -鄖 -鄗 -鄘 -鄙 -鄚 -鄜 -鄞 -鄠 -鄢 -鄣 -鄤 -鄧 -鄩 -鄫 -鄭 -鄯 -鄰 -鄱 -鄲 -鄳 -鄴 -鄺 -酃 -酆 -酈 -酉 -酊 -酋 -酌 -配 -酎 -酏 -酐 -酒 -酔 -酗 -酚 -酞 -酡 -酢 -酣 -酥 -酩 -酪 -酬 -酮 -酯 -酰 -酴 -酵 -酶 -酷 -酸 -酺 -酼 -醁 -醂 -醃 -醅 -醇 -醉 -醋 -醌 -醍 -醐 -醒 -醚 -醛 -醜 -醞 -醢 -醣 -醪 -醫 -醬 -醮 -醯 -醴 -醺 -醾 -醿 -釀 -釁 -釆 -采 -釉 -釋 -里 -重 -野 -量 -釐 -金 -釒 -釓 -釔 -釕 -釗 -釘 -釙 -釚 -釜 -針 -釣 -釤 -釦 -釧 -釩 -釪 -釭 -釴 -釵 -釷 -釹 -釺 -鈀 -鈁 -鈄 -鈇 -鈈 -鈉 -鈊 -鈍 -鈏 -鈐 -鈑 -鈔 -鈕 -鈖 -鈞 -鈢 -鈣 -鈥 -鈦 -鈫 -鈮 -鈰 -鈳 -鈴 -鈷 -鈸 -鈹 -鈺 -鈾 -鈿 -鉀 -鉄 -鉅 -鉆 -鉈 -鉉 -鉋 -鉌 -鉍 -鉏 -鉑 -鉓 -鉗 -鉚 -鉛 -鉞 -鉟 -鉤 -鉦 -鉬 -鉭 -鉲 -鉶 -鉷 -鉸 -鉻 -鉾 -鉿 -銀 -銂 -銃 -銅 -銋 -銍 -銑 -銓 -銕 -銖 -銘 -銚 -銜 -銠 -銣 -銥 -銦 -銨 -銩 -銪 -銫 -銬 -銭 -銱 -銲 -銳 -銶 -銷 -銹 -銻 -銼 -銾 -鋁 -鋅 -鋆 -鋇 -鋌 -鋏 -鋐 -鋒 -鋕 -鋗 -鋙 -鋡 -鋤 -鋥 -鋦 -鋨 -鋪 -鋮 -鋯 -鋰 -鋱 -鋳 -鋶 -鋸 -鋹 -鋼 -錀 -錄 -錏 -錐 -錒 -錕 -錘 -錚 -錞 -錟 -錠 -錡 -錢 -錦 -錨 -錫 -錬 -錮 -錯 -錳 -錶 -錸 -錻 -鍀 -鍇 -鍈 -鍉 -鍊 -鍋 -鍍 -鍏 -鍔 -鍘 -鍛 -鍝 -鍟 -鍠 -鍥 -鍩 -鍬 -鍱 -鍳 -鍵 -鍶 -鍷 -鍺 -鍼 -鍾 -鎂 -鎅 -鎊 -鎌 -鎏 -鎓 -鎔 -鎖 -鎗 -鎘 -鎚 -鎛 -鎢 -鎣 -鎦 -鎧 -鎪 -鎬 -鎭 -鎮 -鎰 -鎳 -鎵 -鎻 -鏃 -鏇 -鏈 -鏊 -鏌 -鏐 -鏑 -鏓 -鏖 -鏗 -鏘 -鏜 -鏝 -鏞 -鏟 -鏡 -鏢 -鏤 -鏦 -鏳 -鏴 -鏵 -鏷 -鏻 -鏽 -鐃 -鐇 -鐈 -鐓 -鐔 -鐘 -鐙 -鐠 -鐡 -鐤 -鐦 -鐧 -鐫 -鐬 -鐭 -鐮 -鐲 -鐳 -鐵 -鐸 -鐺 -鐽 -鐿 -鑀 -鑁 -鑂 -鑄 -鑅 -鑊 -鑌 -鑑 -鑒 -鑛 -鑠 -鑣 -鑨 -鑪 -鑫 -鑭 -鑰 -鑲 -鑴 -鑷 -鑼 -鑽 -鑾 -鑿 -長 -門 -閂 -閃 -閆 -閉 -開 -閎 -閏 -閑 -閒 -間 -閔 -閘 -閜 -閞 -閟 -関 -閣 -閥 -閦 -閨 -閩 -閬 -閭 -閰 -閱 -閶 -閹 -閻 -閼 -閾 -閿 -闆 -闇 -闈 -闊 -闋 -闌 -闍 -闐 -闓 -闔 -闕 -闖 -闘 -關 -闞 -闡 -闢 -闥 -阜 -阝 -阡 -阪 -阭 -阮 -阯 -阱 -防 -阻 -阿 -陀 -陁 -陂 -附 -陋 -陌 -降 -限 -陔 -陘 -陛 -陜 -陝 -陞 -陟 -陡 -院 -陣 -除 -陪 -陬 -陰 -陲 -陳 -陵 -陶 -陷 -陸 -険 -陽 -隄 -隅 -隆 -隈 -隊 -隋 -隍 -階 -隔 -隕 -隗 -隘 -隙 -際 -障 -隣 -隧 -隨 -險 -隰 -隱 -隲 -隳 -隴 -隷 -隸 -隹 -隻 -隼 -雀 -雁 -雄 -雅 -集 -雇 -雉 -雋 -雌 -雍 -雎 -雑 -雒 -雕 -雖 -雙 -雛 -雜 -雝 -雞 -離 -難 -雨 -雩 -雪 -雫 -雯 -雱 -雲 -零 -雷 -雹 -電 -需 -霄 -霅 -霆 -震 -霈 -霉 -霊 -霍 -霎 -霏 -霑 -霓 -霖 -霙 -霜 -霞 -霤 -霧 -霨 -霰 -露 -霶 -霸 -霹 -霽 -霾 -靁 -靂 -靄 -靈 -靉 -靑 -青 -靖 -靚 -靛 -靜 -非 -靠 -靡 -面 -革 -靫 -靬 -靭 -靳 -靴 -靶 -靺 -靼 -鞅 -鞆 -鞋 -鞍 -鞏 -鞘 -鞞 -鞠 -鞣 -鞥 -鞦 -鞨 -鞭 -鞮 -鞴 -韁 -韃 -韆 -韋 -韌 -韑 -韓 -韙 -韜 -韞 -韠 -韡 -韭 -韮 -音 -韶 -韺 -韻 -韾 -響 -頁 -頂 -頃 -項 -順 -須 -頊 -頌 -頍 -頎 -頏 -預 -頑 -頒 -頓 -頔 -頗 -領 -頜 -頠 -頡 -頤 -頦 -頫 -頭 -頰 -頴 -頵 -頷 -頸 -頹 -頻 -頼 -顆 -題 -額 -顎 -顏 -顒 -顓 -顔 -顕 -顗 -願 -顙 -顛 -類 -顥 -顧 -顫 -顯 -顰 -顱 -顳 -顴 -風 -颮 -颯 -颱 -颶 -颺 -颼 -飄 -飆 -飈 -飛 -食 -飠 -飡 -飢 -飥 -飩 -飪 -飫 -飬 -飭 -飮 -飯 -飲 -飴 -飼 -飽 -飾 -餃 -餄 -餅 -餉 -養 -餌 -餎 -餐 -餒 -餓 -餗 -餘 -餚 -餛 -餞 -餠 -餡 -館 -餮 -餵 -餺 -餾 -餿 -饃 -饅 -饋 -饌 -饑 -饒 -饕 -饗 -饞 -饟 -饢 -首 -馗 -馘 -香 -馛 -馥 -馦 -馨 -馬 -馭 -馮 -馯 -馱 -馳 -馴 -馼 -駁 -駄 -駅 -駆 -駐 -駑 -駒 -駔 -駕 -駘 -駙 -駛 -駝 -駟 -駢 -駭 -駰 -駱 -駿 -騁 -騂 -騄 -騅 -騋 -騎 -騏 -験 -騖 -騙 -騤 -騨 -騫 -騭 -騮 -騰 -騶 -騷 -騾 -驁 -驃 -驄 -驅 -驊 -驌 -驍 -驎 -驒 -驕 -驗 -驚 -驛 -驟 -驢 -驤 -驥 -驩 -驪 -骨 -骯 -骰 -骶 -骷 -骸 -骼 -髀 -髂 -髎 -髏 -髑 -髒 -髓 -體 -高 -髙 -髡 -髦 -髪 -髭 -髮 -髯 -髲 -髷 -髹 -髻 -鬃 -鬄 -鬅 -鬆 -鬍 -鬚 -鬟 -鬢 -鬣 -鬥 -鬧 -鬨 -鬩 -鬪 -鬬 -鬮 -鬯 -鬱 -鬲 -鬹 -鬻 -鬼 -魁 -魂 -魃 -魄 -魅 -魈 -魋 -魍 -魎 -魏 -魔 -魕 -魘 -魚 -魛 -魞 -魟 -魣 -魨 -魩 -魮 -魯 -魴 -魷 -鮀 -鮁 -鮃 -鮄 -鮊 -鮋 -鮍 -鮐 -鮑 -鮒 -鮓 -鮗 -鮜 -鮟 -鮠 -鮡 -鮣 -鮨 -鮪 -鮫 -鮭 -鮮 -鮰 -鮸 -鮹 -鮻 -鯀 -鯁 -鯃 -鯇 -鯉 -鯊 -鯏 -鯒 -鯓 -鯔 -鯕 -鯖 -鯗 -鯙 -鯛 -鯡 -鯢 -鯤 -鯧 -鯨 -鯪 -鯭 -鯮 -鯰 -鯶 -鯷 -鯻 -鯽 -鯿 -鰂 -鰃 -鰆 -鰈 -鰉 -鰍 -鰏 -鰒 -鰓 -鰕 -鰗 -鰛 -鰜 -鰟 -鰣 -鰤 -鰧 -鰨 -鰩 -鰭 -鰮 -鰱 -鰲 -鰳 -鰶 -鰷 -鰹 -鰺 -鰻 -鰼 -鰾 -鱀 -鱂 -鱅 -鱇 -鱈 -鱉 -鱊 -鱒 -鱓 -鱔 -鱖 -鱗 -鱘 -鱚 -鱝 -鱟 -鱠 -鱣 -鱥 -鱧 -鱨 -鱬 -鱮 -鱰 -鱲 -鱵 -鱷 -鱸 -鱺 -鱻 -鳥 -鳧 -鳩 -鳯 -鳰 -鳳 -鳴 -鳶 -鳽 -鴆 -鴇 -鴉 -鴒 -鴓 -鴕 -鴗 -鴛 -鴝 -鴞 -鴟 -鴡 -鴣 -鴦 -鴨 -鴫 -鴯 -鴰 -鴴 -鴻 -鴿 -鵂 -鵄 -鵎 -鵐 -鵑 -鵒 -鵓 -鵙 -鵜 -鵝 -鵞 -鵟 -鵠 -鵡 -鵪 -鵬 -鵯 -鵰 -鵲 -鵵 -鵼 -鵾 -鶆 -鶇 -鶉 -鶏 -鶒 -鶓 -鶘 -鶚 -鶡 -鶥 -鶩 -鶬 -鶯 -鶲 -鶴 -鶹 -鶺 -鶻 -鶼 -鶿 -鷂 -鷄 -鷉 -鷎 -鷓 -鷗 -鷙 -鷚 -鷟 -鷥 -鷦 -鷫 -鷯 -鷲 -鷳 -鷸 -鷹 -鷺 -鸊 -鸌 -鸐 -鸑 -鸕 -鸘 -鸚 -鸛 -鸜 -鸝 -鸞 -鹮 -鹵 -鹹 -鹼 -鹽 -鹿 -麂 -麅 -麇 -麈 -麊 -麋 -麐 -麒 -麓 -麗 -麝 -麞 -麟 -麥 -麩 -麪 -麯 -麴 -麵 -麹 -麺 -麻 -麼 -麽 -麾 -麿 -黁 -黃 -黇 -黌 -黍 -黎 -黏 -黐 -黑 -黒 -黔 -默 -黙 -黛 -黜 -黝 -點 -黟 -黥 -黧 -黨 -黯 -黴 -黶 -黻 -黼 -黽 -黿 -鼂 -鼇 -鼈 -鼉 -鼎 -鼐 -鼒 -鼓 -鼕 -鼙 -鼠 -鼢 -鼩 -鼬 -鼯 -鼱 -鼴 -鼷 -鼻 -鼽 -鼾 -齊 -齋 -齒 -齕 -齡 -齣 -齦 -齧 -齲 -齶 -龍 -龎 -龐 -龑 -龔 -龕 -龜 -龝 -龠 -龢 -郎 -凉 -﹑ -﹗ -﹝ -﹞ -﹢ -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -A -B -C -D -E -F -G -H -I -K -L -M -N -O -P -R -S -T -U -V -W -Y -Z -[ -] -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -r -s -t -u -z -{ -| -} -~ -¥ -𣇉 - diff --git a/backend/ppocr/utils/dict/cyrillic_dict.txt b/backend/ppocr/utils/dict/cyrillic_dict.txt deleted file mode 100644 index 2b6f6649..00000000 --- a/backend/ppocr/utils/dict/cyrillic_dict.txt +++ /dev/null @@ -1,163 +0,0 @@ - -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -Ё -Є -І -Ј -Љ -Ў -А -Б -В -Г -Д -Е -Ж -З -И -Й -К -Л -М -Н -О -П -Р -С -Т -У -Ф -Х -Ц -Ч -Ш -Щ -Ъ -Ы -Ь -Э -Ю -Я -а -б -в -г -д -е -ж -з -и -й -к -л -м -н -о -п -р -с -т -у -ф -х -ц -ч -ш -щ -ъ -ы -ь -э -ю -я -ё -ђ -є -і -ј -љ -њ -ћ -ў -џ -Ґ -ґ diff --git a/backend/ppocr/utils/dict/devanagari_dict.txt b/backend/ppocr/utils/dict/devanagari_dict.txt deleted file mode 100644 index f5592306..00000000 --- a/backend/ppocr/utils/dict/devanagari_dict.txt +++ /dev/null @@ -1,167 +0,0 @@ - -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -ँ -ं -ः -अ -आ -इ -ई -उ -ऊ -ऋ -ए -ऐ -ऑ -ओ -औ -क -ख -ग -घ -ङ -च -छ -ज -झ -ञ -ट -ठ -ड -ढ -ण -त -थ -द -ध -न -ऩ -प -फ -ब -भ -म -य -र -ऱ -ल -ळ -व -श -ष -स -ह -़ -ा -ि -ी -ु -ू -ृ -ॅ -े -ै -ॉ -ो -ौ -् -॒ -क़ -ख़ -ग़ -ज़ -ड़ -ढ़ -फ़ -ॠ -। -० -१ -२ -३ -४ -५ -६ -७ -८ -९ -॰ diff --git a/backend/ppocr/utils/dict/en_dict.txt b/backend/ppocr/utils/dict/en_dict.txt deleted file mode 100644 index 7677d31b..00000000 --- a/backend/ppocr/utils/dict/en_dict.txt +++ /dev/null @@ -1,95 +0,0 @@ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -[ -\ -] -^ -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -{ -| -} -~ -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ - diff --git a/backend/ppocr/utils/dict/es_dict.txt b/backend/ppocr/utils/dict/es_dict.txt deleted file mode 100644 index f195f1ea..00000000 --- a/backend/ppocr/utils/dict/es_dict.txt +++ /dev/null @@ -1,110 +0,0 @@ -x -i -_ -m -g -/ -1 -0 -I -L -S -V -R -C -2 -v -a -l -3 -6 -4 -5 -. -j -p - -Q -u -e -r -o -8 -7 -n -c -9 -t -b -é -q -d -ó -y -F -s -, -O -í -T -f -" -U -M -h -: -P -H -A -E -D -z -N -á -ñ -ú -% -; -è -+ -Y -- -B -G -( -) -¿ -? -w -¡ -! -X -É -K -k -Á -ü -Ú -« -» -J -' -ö -W -Z -º -Ö -­ -[ -] -Ç -ç -à -ä -û -ò -Í -ê -ô -ø -ª diff --git a/backend/ppocr/utils/dict/fa_dict.txt b/backend/ppocr/utils/dict/fa_dict.txt deleted file mode 100644 index 2328fbd8..00000000 --- a/backend/ppocr/utils/dict/fa_dict.txt +++ /dev/null @@ -1,136 +0,0 @@ -f -a -_ -i -m -g -/ -1 -3 -I -L -S -V -R -C -2 -0 -v -l -6 -8 -5 -. -j -p -و -د -ر -ك -ن -ش -ه -ا -4 -9 -ی -ج -ِ -7 -غ -ل -س -ز -ّ -ت -ک -گ -ي -م -ب -ف -چ -خ -ق -ژ -آ -ص -پ -َ -ع -ئ -ح -ٔ -ض -ُ -ذ -أ -ى -ط -ظ -ث -ة -ً -ء -ؤ -ْ -ۀ -إ -ٍ -ٌ -ٰ -ٓ -ٱ -s -c -e -n -w -N -E -W -Y -D -O -H -A -d -z -r -T -G -o -t -x -h -b -B -M -Z -u -P -F -y -q -U -K -k -J -Q -' -X -# -? -% -$ -, -: -& -! -- -( -É -@ -é -+ - diff --git a/backend/ppocr/utils/dict/french_dict.txt b/backend/ppocr/utils/dict/french_dict.txt deleted file mode 100644 index e8f657db..00000000 --- a/backend/ppocr/utils/dict/french_dict.txt +++ /dev/null @@ -1,136 +0,0 @@ -f -e -n -c -h -_ -i -m -g -/ -r -v -a -l -t -w -o -d -6 -1 -. -p -B -u -2 -à -3 -R -y -4 -U -E -A -5 -P -O -S -T -D -7 -Z -8 -I -N -L -G -M -H -0 -J -K -- -9 -F -C -V -é -X -' -s -Q -: -è -x -b -Y -Œ -É -z -W -Ç -È -k -Ô -ô -€ -À -Ê -q -ù -° -ê -î -* - -j -" -, -â -% -û -ç -ü -? -! -; -ö -( -) -ï -º -ó -ø -å -+ -™ -á -Ë -< -² -Á -Î -& -@ -œ -ε -Ü -ë -[ -] -í -ò -Ö -ä -ß -« -» -ú -ñ -æ -µ -³ -Å -$ -# - diff --git a/backend/ppocr/utils/dict/german_dict.txt b/backend/ppocr/utils/dict/german_dict.txt deleted file mode 100644 index 5e121af2..00000000 --- a/backend/ppocr/utils/dict/german_dict.txt +++ /dev/null @@ -1,143 +0,0 @@ - -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -= -> -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -[ -] -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -£ -§ -­ -° -´ -µ -· -º -¿ -Á -Ä -Å -É -Ï -Ô -Ö -Ü -ß -à -á -â -ã -ä -å -æ -ç -è -é -ê -ë -í -ï -ñ -ò -ó -ô -ö -ø -ù -ú -û -ü -ō -Š -Ÿ -ʒ -β -δ -з -Ṡ -‘ -€ -© -ª -« -¬ diff --git a/backend/ppocr/utils/dict/hi_dict.txt b/backend/ppocr/utils/dict/hi_dict.txt deleted file mode 100644 index 8dfedb5a..00000000 --- a/backend/ppocr/utils/dict/hi_dict.txt +++ /dev/null @@ -1,162 +0,0 @@ - -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -ँ -ं -ः -अ -आ -इ -ई -उ -ऊ -ऋ -ए -ऐ -ऑ -ओ -औ -क -ख -ग -घ -ङ -च -छ -ज -झ -ञ -ट -ठ -ड -ढ -ण -त -थ -द -ध -न -प -फ -ब -भ -म -य -र -ल -ळ -व -श -ष -स -ह -़ -ा -ि -ी -ु -ू -ृ -ॅ -े -ै -ॉ -ो -ौ -् -क़ -ख़ -ग़ -ज़ -ड़ -ढ़ -फ़ -० -१ -२ -३ -४ -५ -६ -७ -८ -९ -॰ diff --git a/backend/ppocr/utils/dict/it_dict.txt b/backend/ppocr/utils/dict/it_dict.txt deleted file mode 100644 index e692c6d4..00000000 --- a/backend/ppocr/utils/dict/it_dict.txt +++ /dev/null @@ -1,118 +0,0 @@ -i -t -_ -m -g -/ -5 -I -L -S -V -R -C -2 -0 -1 -v -a -l -7 -8 -9 -6 -. -j -p - -e -r -o -d -s -n -3 -4 -P -u -c -A -- -, -" -z -h -f -b -q -ì -' -à -O -è -G -ù -é -ò -; -F -E -B -N -H -k -: -U -T -X -D -K -? -[ -M -­ -x -y -( -) -W -ö -º -w -] -Q -J -+ -ü -! -È -á -% -= -» -ñ -Ö -Y -ä -í -Z -« -@ -ó -ø -ï -ú -ê -ç -Á -É -Å -ß -{ -} -& -` -û -î -# -$ diff --git a/backend/ppocr/utils/dict/japan_dict.txt b/backend/ppocr/utils/dict/japan_dict.txt deleted file mode 100644 index 339d4b89..00000000 --- a/backend/ppocr/utils/dict/japan_dict.txt +++ /dev/null @@ -1,4399 +0,0 @@ -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -[ -] -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -© -° -² -´ -½ -Á -Ä -Å -Ç -È -É -Í -Ó -Ö -× -Ü -ß -à -á -â -ã -ä -å -æ -ç -è -é -ê -ë -í -ð -ñ -ò -ó -ô -õ -ö -ø -ú -û -ü -ý -ā -ă -ą -ć -Č -č -đ -ē -ė -ę -ğ -ī -ı -Ł -ł -ń -ň -ō -ř -Ş -ş -Š -š -ţ -ū -ż -Ž -ž -Ș -ș -ț -Δ -α -λ -μ -φ -Г -О -а -в -л -о -р -с -т -я -ồ -​ -— -― -’ -“ -” -… -℃ -→ -∇ -− -■ -☆ -  -、 -。 -々 -〆 -〈 -〉 -「 -」 -『 -』 -〔 -〕 -〜 -ぁ -あ -ぃ -い -う -ぇ -え -ぉ -お -か -が -き -ぎ -く -ぐ -け -げ -こ -ご -さ -ざ -し -じ -す -ず -せ -ぜ -そ -ぞ -た -だ -ち -ぢ -っ -つ -づ -て -で -と -ど -な -に -ぬ -ね -の -は -ば -ぱ -ひ -び -ぴ -ふ -ぶ -ぷ -へ -べ -ぺ -ほ -ぼ -ぽ -ま -み -む -め -も -ゃ -や -ゅ -ゆ -ょ -よ -ら -り -る -れ -ろ -わ -ゑ -を -ん -ゝ -ゞ -ァ -ア -ィ -イ -ゥ -ウ -ェ -エ -ォ -オ -カ -ガ -キ -ギ -ク -グ -ケ -ゲ -コ -ゴ -サ -ザ -シ -ジ -ス -ズ -セ -ゼ -ソ -ゾ -タ -ダ -チ -ヂ -ッ -ツ -ヅ -テ -デ -ト -ド -ナ -ニ -ヌ -ネ -ノ -ハ -バ -パ -ヒ -ビ -ピ -フ -ブ -プ -ヘ -ベ -ペ -ホ -ボ -ポ -マ -ミ -ム -メ -モ -ャ -ヤ -ュ -ユ -ョ -ヨ -ラ -リ -ル -レ -ロ -ワ -ヰ -ン -ヴ -ヵ -ヶ -・ -ー -㈱ -一 -丁 -七 -万 -丈 -三 -上 -下 -不 -与 -丑 -且 -世 -丘 -丙 -丞 -両 -並 -中 -串 -丸 -丹 -主 -丼 -丿 -乃 -久 -之 -乎 -乏 -乗 -乘 -乙 -九 -乞 -也 -乱 -乳 -乾 -亀 -了 -予 -争 -事 -二 -于 -互 -五 -井 -亘 -亙 -些 -亜 -亟 -亡 -交 -亥 -亦 -亨 -享 -京 -亭 -亮 -人 -什 -仁 -仇 -今 -介 -仍 -仏 -仔 -仕 -他 -仗 -付 -仙 -代 -令 -以 -仮 -仰 -仲 -件 -任 -企 -伊 -伍 -伎 -伏 -伐 -休 -会 -伝 -伯 -估 -伴 -伶 -伸 -伺 -似 -伽 -佃 -但 -位 -低 -住 -佐 -佑 -体 -何 -余 -佚 -佛 -作 -佩 -佳 -併 -佶 -使 -侈 -例 -侍 -侏 -侑 -侘 -供 -依 -侠 -価 -侮 -侯 -侵 -侶 -便 -係 -促 -俄 -俊 -俔 -俗 -俘 -保 -信 -俣 -俤 -修 -俯 -俳 -俵 -俸 -俺 -倉 -個 -倍 -倒 -候 -借 -倣 -値 -倫 -倭 -倶 -倹 -偃 -假 -偈 -偉 -偏 -偐 -偕 -停 -健 -側 -偵 -偶 -偽 -傀 -傅 -傍 -傑 -傘 -備 -催 -傭 -傲 -傳 -債 -傷 -傾 -僊 -働 -像 -僑 -僕 -僚 -僧 -僭 -僮 -儀 -億 -儇 -儒 -儛 -償 -儡 -優 -儲 -儺 -儼 -兀 -允 -元 -兄 -充 -兆 -先 -光 -克 -兌 -免 -兎 -児 -党 -兜 -入 -全 -八 -公 -六 -共 -兵 -其 -具 -典 -兼 -内 -円 -冊 -再 -冑 -冒 -冗 -写 -冠 -冤 -冥 -冨 -冬 -冲 -决 -冶 -冷 -准 -凉 -凋 -凌 -凍 -凛 -凝 -凞 -几 -凡 -処 -凪 -凰 -凱 -凶 -凸 -凹 -出 -函 -刀 -刃 -分 -切 -刈 -刊 -刎 -刑 -列 -初 -判 -別 -利 -刪 -到 -制 -刷 -券 -刹 -刺 -刻 -剃 -則 -削 -剋 -前 -剖 -剛 -剣 -剤 -剥 -剪 -副 -剰 -割 -創 -剽 -劇 -劉 -劔 -力 -功 -加 -劣 -助 -努 -劫 -劭 -励 -労 -効 -劾 -勃 -勅 -勇 -勉 -勒 -動 -勘 -務 -勝 -募 -勢 -勤 -勧 -勲 -勺 -勾 -勿 -匁 -匂 -包 -匏 -化 -北 -匙 -匝 -匠 -匡 -匣 -匯 -匲 -匹 -区 -医 -匿 -十 -千 -升 -午 -卉 -半 -卍 -卑 -卒 -卓 -協 -南 -単 -博 -卜 -占 -卦 -卯 -印 -危 -即 -却 -卵 -卸 -卿 -厄 -厚 -原 -厠 -厨 -厩 -厭 -厳 -去 -参 -又 -叉 -及 -友 -双 -反 -収 -叔 -取 -受 -叙 -叛 -叟 -叡 -叢 -口 -古 -句 -叩 -只 -叫 -召 -可 -台 -叱 -史 -右 -叶 -号 -司 -吃 -各 -合 -吉 -吊 -同 -名 -后 -吏 -吐 -向 -君 -吝 -吟 -吠 -否 -含 -吸 -吹 -吻 -吽 -吾 -呂 -呆 -呈 -呉 -告 -呑 -周 -呪 -呰 -味 -呼 -命 -咀 -咄 -咋 -和 -咒 -咫 -咲 -咳 -咸 -哀 -品 -哇 -哉 -員 -哨 -哩 -哭 -哲 -哺 -唄 -唆 -唇 -唐 -唖 -唯 -唱 -唳 -唸 -唾 -啄 -商 -問 -啓 -啼 -善 -喋 -喚 -喜 -喝 -喧 -喩 -喪 -喫 -喬 -單 -喰 -営 -嗅 -嗇 -嗔 -嗚 -嗜 -嗣 -嘆 -嘉 -嘗 -嘘 -嘩 -嘯 -嘱 -嘲 -嘴 -噂 -噌 -噛 -器 -噴 -噺 -嚆 -嚢 -囀 -囃 -囉 -囚 -四 -回 -因 -団 -困 -囲 -図 -固 -国 -圀 -圃 -國 -圏 -園 -圓 -團 -圜 -土 -圧 -在 -圭 -地 -址 -坂 -均 -坊 -坐 -坑 -坡 -坤 -坦 -坪 -垂 -型 -垢 -垣 -埃 -埋 -城 -埒 -埔 -域 -埠 -埴 -埵 -執 -培 -基 -埼 -堀 -堂 -堅 -堆 -堕 -堤 -堪 -堯 -堰 -報 -場 -堵 -堺 -塀 -塁 -塊 -塑 -塔 -塗 -塘 -塙 -塚 -塞 -塩 -填 -塵 -塾 -境 -墉 -墓 -増 -墜 -墟 -墨 -墳 -墺 -墻 -墾 -壁 -壇 -壊 -壌 -壕 -士 -壬 -壮 -声 -壱 -売 -壷 -壹 -壺 -壽 -変 -夏 -夕 -外 -夙 -多 -夜 -夢 -夥 -大 -天 -太 -夫 -夬 -夭 -央 -失 -夷 -夾 -奄 -奇 -奈 -奉 -奎 -奏 -契 -奔 -奕 -套 -奘 -奠 -奢 -奥 -奨 -奪 -奮 -女 -奴 -奸 -好 -如 -妃 -妄 -妊 -妍 -妓 -妖 -妙 -妥 -妨 -妬 -妲 -妹 -妻 -妾 -姉 -始 -姐 -姓 -委 -姚 -姜 -姞 -姥 -姦 -姨 -姪 -姫 -姶 -姻 -姿 -威 -娑 -娘 -娟 -娠 -娩 -娯 -娼 -婆 -婉 -婚 -婢 -婦 -婬 -婿 -媄 -媒 -媓 -媚 -媛 -媞 -媽 -嫁 -嫄 -嫉 -嫌 -嫐 -嫗 -嫡 -嬉 -嬌 -嬢 -嬪 -嬬 -嬾 -孁 -子 -孔 -字 -存 -孚 -孝 -孟 -季 -孤 -学 -孫 -孵 -學 -宅 -宇 -守 -安 -宋 -完 -宍 -宏 -宕 -宗 -官 -宙 -定 -宛 -宜 -宝 -実 -客 -宣 -室 -宥 -宮 -宰 -害 -宴 -宵 -家 -宸 -容 -宿 -寂 -寄 -寅 -密 -寇 -富 -寒 -寓 -寔 -寛 -寝 -察 -寡 -實 -寧 -審 -寮 -寵 -寶 -寸 -寺 -対 -寿 -封 -専 -射 -将 -尉 -尊 -尋 -對 -導 -小 -少 -尖 -尚 -尤 -尪 -尭 -就 -尹 -尺 -尻 -尼 -尽 -尾 -尿 -局 -居 -屈 -届 -屋 -屍 -屎 -屏 -屑 -屓 -展 -属 -屠 -層 -履 -屯 -山 -岐 -岑 -岡 -岩 -岫 -岬 -岳 -岷 -岸 -峠 -峡 -峨 -峯 -峰 -島 -峻 -崇 -崋 -崎 -崑 -崖 -崗 -崛 -崩 -嵌 -嵐 -嵩 -嵯 -嶂 -嶋 -嶠 -嶺 -嶼 -嶽 -巀 -巌 -巒 -巖 -川 -州 -巡 -巣 -工 -左 -巧 -巨 -巫 -差 -己 -巳 -巴 -巷 -巻 -巽 -巾 -市 -布 -帆 -希 -帖 -帚 -帛 -帝 -帥 -師 -席 -帯 -帰 -帳 -帷 -常 -帽 -幄 -幅 -幇 -幌 -幔 -幕 -幟 -幡 -幢 -幣 -干 -平 -年 -并 -幸 -幹 -幻 -幼 -幽 -幾 -庁 -広 -庄 -庇 -床 -序 -底 -庖 -店 -庚 -府 -度 -座 -庫 -庭 -庵 -庶 -康 -庸 -廂 -廃 -廉 -廊 -廓 -廟 -廠 -廣 -廬 -延 -廷 -建 -廻 -廼 -廿 -弁 -弄 -弉 -弊 -弌 -式 -弐 -弓 -弔 -引 -弖 -弗 -弘 -弛 -弟 -弥 -弦 -弧 -弱 -張 -強 -弼 -弾 -彈 -彊 -彌 -彎 -当 -彗 -彙 -彝 -形 -彦 -彩 -彫 -彬 -彭 -彰 -影 -彷 -役 -彼 -往 -征 -徂 -径 -待 -律 -後 -徐 -徑 -徒 -従 -得 -徠 -御 -徧 -徨 -復 -循 -徭 -微 -徳 -徴 -德 -徹 -徽 -心 -必 -忉 -忌 -忍 -志 -忘 -忙 -応 -忠 -快 -忯 -念 -忻 -忽 -忿 -怒 -怖 -思 -怠 -怡 -急 -性 -怨 -怪 -怯 -恂 -恋 -恐 -恒 -恕 -恣 -恤 -恥 -恨 -恩 -恬 -恭 -息 -恵 -悉 -悌 -悍 -悔 -悟 -悠 -患 -悦 -悩 -悪 -悲 -悼 -情 -惇 -惑 -惚 -惜 -惟 -惠 -惣 -惧 -惨 -惰 -想 -惹 -惺 -愈 -愉 -愍 -意 -愔 -愚 -愛 -感 -愷 -愿 -慈 -態 -慌 -慎 -慕 -慢 -慣 -慧 -慨 -慮 -慰 -慶 -憂 -憎 -憐 -憑 -憙 -憤 -憧 -憩 -憬 -憲 -憶 -憾 -懇 -應 -懌 -懐 -懲 -懸 -懺 -懽 -懿 -戈 -戊 -戌 -戎 -成 -我 -戒 -戔 -或 -戚 -戟 -戦 -截 -戮 -戯 -戴 -戸 -戻 -房 -所 -扁 -扇 -扈 -扉 -手 -才 -打 -払 -托 -扮 -扱 -扶 -批 -承 -技 -抄 -把 -抑 -抓 -投 -抗 -折 -抜 -択 -披 -抱 -抵 -抹 -押 -抽 -担 -拇 -拈 -拉 -拍 -拏 -拐 -拒 -拓 -拘 -拙 -招 -拝 -拠 -拡 -括 -拭 -拳 -拵 -拶 -拾 -拿 -持 -挂 -指 -按 -挑 -挙 -挟 -挨 -振 -挺 -挽 -挿 -捉 -捕 -捗 -捜 -捧 -捨 -据 -捺 -捻 -掃 -掄 -授 -掌 -排 -掖 -掘 -掛 -掟 -採 -探 -掣 -接 -控 -推 -掩 -措 -掬 -掲 -掴 -掻 -掾 -揃 -揄 -揆 -揉 -描 -提 -揖 -揚 -換 -握 -揮 -援 -揶 -揺 -損 -搦 -搬 -搭 -携 -搾 -摂 -摘 -摩 -摸 -摺 -撃 -撒 -撞 -撤 -撥 -撫 -播 -撮 -撰 -撲 -撹 -擁 -操 -擔 -擦 -擬 -擾 -攘 -攝 -攣 -支 -收 -改 -攻 -放 -政 -故 -敏 -救 -敗 -教 -敢 -散 -敦 -敬 -数 -整 -敵 -敷 -斂 -文 -斉 -斎 -斐 -斑 -斗 -料 -斜 -斟 -斤 -斥 -斧 -斬 -断 -斯 -新 -方 -於 -施 -旁 -旅 -旋 -旌 -族 -旗 -旛 -无 -旡 -既 -日 -旦 -旧 -旨 -早 -旬 -旭 -旺 -旻 -昂 -昆 -昇 -昉 -昌 -明 -昏 -易 -昔 -星 -映 -春 -昧 -昨 -昪 -昭 -是 -昵 -昼 -晁 -時 -晃 -晋 -晏 -晒 -晟 -晦 -晧 -晩 -普 -景 -晴 -晶 -智 -暁 -暇 -暈 -暉 -暑 -暖 -暗 -暘 -暢 -暦 -暫 -暮 -暲 -暴 -暹 -暾 -曄 -曇 -曉 -曖 -曙 -曜 -曝 -曠 -曰 -曲 -曳 -更 -書 -曹 -曼 -曽 -曾 -替 -最 -會 -月 -有 -朋 -服 -朏 -朔 -朕 -朗 -望 -朝 -期 -朧 -木 -未 -末 -本 -札 -朱 -朴 -机 -朽 -杁 -杉 -李 -杏 -材 -村 -杓 -杖 -杜 -杞 -束 -条 -杢 -杣 -来 -杭 -杮 -杯 -東 -杲 -杵 -杷 -杼 -松 -板 -枅 -枇 -析 -枓 -枕 -林 -枚 -果 -枝 -枠 -枡 -枢 -枯 -枳 -架 -柄 -柊 -柏 -某 -柑 -染 -柔 -柘 -柚 -柯 -柱 -柳 -柴 -柵 -査 -柾 -柿 -栂 -栃 -栄 -栖 -栗 -校 -株 -栲 -栴 -核 -根 -栻 -格 -栽 -桁 -桂 -桃 -框 -案 -桐 -桑 -桓 -桔 -桜 -桝 -桟 -桧 -桴 -桶 -桾 -梁 -梅 -梆 -梓 -梔 -梗 -梛 -條 -梟 -梢 -梧 -梨 -械 -梱 -梲 -梵 -梶 -棄 -棋 -棒 -棗 -棘 -棚 -棟 -棠 -森 -棲 -棹 -棺 -椀 -椅 -椋 -植 -椎 -椏 -椒 -椙 -検 -椥 -椹 -椿 -楊 -楓 -楕 -楚 -楞 -楠 -楡 -楢 -楨 -楪 -楫 -業 -楮 -楯 -楳 -極 -楷 -楼 -楽 -概 -榊 -榎 -榕 -榛 -榜 -榮 -榱 -榴 -槃 -槇 -槊 -構 -槌 -槍 -槐 -様 -槙 -槻 -槽 -槿 -樂 -樋 -樓 -樗 -標 -樟 -模 -権 -横 -樫 -樵 -樹 -樺 -樽 -橇 -橋 -橘 -機 -橿 -檀 -檄 -檎 -檐 -檗 -檜 -檣 -檥 -檬 -檮 -檸 -檻 -櫃 -櫓 -櫛 -櫟 -櫨 -櫻 -欄 -欅 -欠 -次 -欣 -欧 -欲 -欺 -欽 -款 -歌 -歎 -歓 -止 -正 -此 -武 -歩 -歪 -歯 -歳 -歴 -死 -殆 -殉 -殊 -残 -殖 -殯 -殴 -段 -殷 -殺 -殻 -殿 -毀 -毅 -母 -毎 -毒 -比 -毘 -毛 -毫 -毬 -氈 -氏 -民 -気 -水 -氷 -永 -氾 -汀 -汁 -求 -汎 -汐 -汗 -汚 -汝 -江 -池 -汪 -汰 -汲 -決 -汽 -沂 -沃 -沅 -沆 -沈 -沌 -沐 -沓 -沖 -沙 -没 -沢 -沱 -河 -沸 -油 -治 -沼 -沽 -沿 -況 -泉 -泊 -泌 -法 -泗 -泡 -波 -泣 -泥 -注 -泯 -泰 -泳 -洋 -洒 -洗 -洛 -洞 -津 -洩 -洪 -洲 -洸 -洹 -活 -洽 -派 -流 -浄 -浅 -浙 -浚 -浜 -浣 -浦 -浩 -浪 -浮 -浴 -海 -浸 -涅 -消 -涌 -涙 -涛 -涯 -液 -涵 -涼 -淀 -淄 -淆 -淇 -淋 -淑 -淘 -淡 -淤 -淨 -淫 -深 -淳 -淵 -混 -淹 -添 -清 -済 -渉 -渋 -渓 -渕 -渚 -減 -渟 -渠 -渡 -渤 -渥 -渦 -温 -渫 -測 -港 -游 -渾 -湊 -湖 -湘 -湛 -湧 -湫 -湯 -湾 -湿 -満 -源 -準 -溜 -溝 -溢 -溥 -溪 -溶 -溺 -滄 -滅 -滋 -滌 -滑 -滕 -滝 -滞 -滴 -滸 -滹 -滿 -漁 -漂 -漆 -漉 -漏 -漑 -演 -漕 -漠 -漢 -漣 -漫 -漬 -漱 -漸 -漿 -潅 -潔 -潙 -潜 -潟 -潤 -潭 -潮 -潰 -潴 -澁 -澂 -澄 -澎 -澗 -澤 -澪 -澱 -澳 -激 -濁 -濃 -濟 -濠 -濡 -濤 -濫 -濯 -濱 -濾 -瀉 -瀋 -瀑 -瀕 -瀞 -瀟 -瀧 -瀬 -瀾 -灌 -灑 -灘 -火 -灯 -灰 -灸 -災 -炉 -炊 -炎 -炒 -炭 -炮 -炷 -点 -為 -烈 -烏 -烙 -烝 -烹 -焔 -焙 -焚 -無 -焦 -然 -焼 -煇 -煉 -煌 -煎 -煕 -煙 -煤 -煥 -照 -煩 -煬 -煮 -煽 -熈 -熊 -熙 -熟 -熨 -熱 -熹 -熾 -燃 -燈 -燎 -燔 -燕 -燗 -燥 -燭 -燻 -爆 -爐 -爪 -爬 -爲 -爵 -父 -爺 -爼 -爽 -爾 -片 -版 -牌 -牒 -牘 -牙 -牛 -牝 -牟 -牡 -牢 -牧 -物 -牲 -特 -牽 -犂 -犠 -犬 -犯 -状 -狂 -狄 -狐 -狗 -狙 -狛 -狡 -狩 -独 -狭 -狷 -狸 -狼 -猊 -猛 -猟 -猥 -猨 -猩 -猪 -猫 -献 -猴 -猶 -猷 -猾 -猿 -獄 -獅 -獏 -獣 -獲 -玄 -玅 -率 -玉 -王 -玖 -玩 -玲 -珀 -珂 -珈 -珉 -珊 -珍 -珎 -珞 -珠 -珣 -珥 -珪 -班 -現 -球 -理 -琉 -琢 -琥 -琦 -琮 -琲 -琳 -琴 -琵 -琶 -瑁 -瑋 -瑙 -瑚 -瑛 -瑜 -瑞 -瑠 -瑤 -瑩 -瑪 -瑳 -瑾 -璃 -璋 -璜 -璞 -璧 -璨 -環 -璵 -璽 -璿 -瓊 -瓔 -瓜 -瓢 -瓦 -瓶 -甍 -甑 -甕 -甘 -甚 -甞 -生 -産 -甥 -用 -甫 -田 -由 -甲 -申 -男 -町 -画 -界 -畏 -畑 -畔 -留 -畜 -畝 -畠 -畢 -略 -番 -異 -畳 -當 -畷 -畸 -畺 -畿 -疆 -疇 -疋 -疎 -疏 -疑 -疫 -疱 -疲 -疹 -疼 -疾 -病 -症 -痒 -痔 -痕 -痘 -痙 -痛 -痢 -痩 -痴 -痺 -瘍 -瘡 -瘧 -療 -癇 -癌 -癒 -癖 -癡 -癪 -発 -登 -白 -百 -的 -皆 -皇 -皋 -皐 -皓 -皮 -皺 -皿 -盂 -盃 -盆 -盈 -益 -盒 -盗 -盛 -盞 -盟 -盡 -監 -盤 -盥 -盧 -目 -盲 -直 -相 -盾 -省 -眉 -看 -県 -眞 -真 -眠 -眷 -眺 -眼 -着 -睡 -督 -睦 -睨 -睿 -瞋 -瞑 -瞞 -瞬 -瞭 -瞰 -瞳 -瞻 -瞼 -瞿 -矍 -矛 -矜 -矢 -知 -矧 -矩 -短 -矮 -矯 -石 -砂 -砌 -研 -砕 -砥 -砦 -砧 -砲 -破 -砺 -硝 -硫 -硬 -硯 -碁 -碇 -碌 -碑 -碓 -碕 -碗 -碣 -碧 -碩 -確 -碾 -磁 -磐 -磔 -磧 -磨 -磬 -磯 -礁 -礎 -礒 -礙 -礫 -礬 -示 -礼 -社 -祀 -祁 -祇 -祈 -祉 -祐 -祓 -祕 -祖 -祗 -祚 -祝 -神 -祟 -祠 -祢 -祥 -票 -祭 -祷 -祺 -禁 -禄 -禅 -禊 -禍 -禎 -福 -禔 -禖 -禛 -禦 -禧 -禮 -禰 -禹 -禽 -禿 -秀 -私 -秋 -科 -秒 -秘 -租 -秤 -秦 -秩 -称 -移 -稀 -程 -税 -稔 -稗 -稙 -稚 -稜 -稠 -種 -稱 -稲 -稷 -稻 -稼 -稽 -稿 -穀 -穂 -穆 -積 -穎 -穏 -穗 -穜 -穢 -穣 -穫 -穴 -究 -空 -突 -窃 -窄 -窒 -窓 -窟 -窠 -窩 -窪 -窮 -窯 -竃 -竄 -竈 -立 -站 -竜 -竝 -竟 -章 -童 -竪 -竭 -端 -竴 -競 -竹 -竺 -竽 -竿 -笄 -笈 -笏 -笑 -笙 -笛 -笞 -笠 -笥 -符 -第 -笹 -筅 -筆 -筇 -筈 -等 -筋 -筌 -筍 -筏 -筐 -筑 -筒 -答 -策 -筝 -筥 -筧 -筬 -筮 -筯 -筰 -筵 -箆 -箇 -箋 -箏 -箒 -箔 -箕 -算 -箙 -箜 -管 -箪 -箭 -箱 -箸 -節 -篁 -範 -篆 -篇 -築 -篋 -篌 -篝 -篠 -篤 -篥 -篦 -篩 -篭 -篳 -篷 -簀 -簒 -簡 -簧 -簪 -簫 -簺 -簾 -簿 -籀 -籃 -籌 -籍 -籐 -籟 -籠 -籤 -籬 -米 -籾 -粂 -粉 -粋 -粒 -粕 -粗 -粘 -粛 -粟 -粥 -粧 -粮 -粳 -精 -糊 -糖 -糜 -糞 -糟 -糠 -糧 -糯 -糸 -糺 -系 -糾 -紀 -約 -紅 -紋 -納 -紐 -純 -紗 -紘 -紙 -級 -紛 -素 -紡 -索 -紫 -紬 -累 -細 -紳 -紵 -紹 -紺 -絁 -終 -絃 -組 -絅 -経 -結 -絖 -絞 -絡 -絣 -給 -統 -絲 -絵 -絶 -絹 -絽 -綏 -經 -継 -続 -綜 -綟 -綬 -維 -綱 -網 -綴 -綸 -綺 -綽 -綾 -綿 -緊 -緋 -総 -緑 -緒 -線 -締 -緥 -編 -緩 -緬 -緯 -練 -緻 -縁 -縄 -縅 -縒 -縛 -縞 -縢 -縣 -縦 -縫 -縮 -縹 -總 -績 -繁 -繊 -繋 -繍 -織 -繕 -繝 -繦 -繧 -繰 -繹 -繼 -纂 -纈 -纏 -纐 -纒 -纛 -缶 -罔 -罠 -罧 -罪 -置 -罰 -署 -罵 -罷 -罹 -羂 -羅 -羆 -羇 -羈 -羊 -羌 -美 -群 -羨 -義 -羯 -羲 -羹 -羽 -翁 -翅 -翌 -習 -翔 -翛 -翠 -翡 -翫 -翰 -翺 -翻 -翼 -耀 -老 -考 -者 -耆 -而 -耐 -耕 -耗 -耨 -耳 -耶 -耽 -聊 -聖 -聘 -聚 -聞 -聟 -聡 -聨 -聯 -聰 -聲 -聴 -職 -聾 -肄 -肆 -肇 -肉 -肋 -肌 -肖 -肘 -肛 -肝 -股 -肢 -肥 -肩 -肪 -肯 -肱 -育 -肴 -肺 -胃 -胆 -背 -胎 -胖 -胚 -胝 -胞 -胡 -胤 -胱 -胴 -胸 -能 -脂 -脅 -脆 -脇 -脈 -脊 -脚 -脛 -脩 -脱 -脳 -腋 -腎 -腐 -腑 -腔 -腕 -腫 -腰 -腱 -腸 -腹 -腺 -腿 -膀 -膏 -膚 -膜 -膝 -膠 -膣 -膨 -膩 -膳 -膵 -膾 -膿 -臂 -臆 -臈 -臍 -臓 -臘 -臚 -臣 -臥 -臨 -自 -臭 -至 -致 -臺 -臼 -舂 -舅 -與 -興 -舌 -舍 -舎 -舒 -舖 -舗 -舘 -舜 -舞 -舟 -舩 -航 -般 -舳 -舶 -船 -艇 -艘 -艦 -艮 -良 -色 -艶 -芋 -芒 -芙 -芝 -芥 -芦 -芬 -芭 -芯 -花 -芳 -芸 -芹 -芻 -芽 -芿 -苅 -苑 -苔 -苗 -苛 -苞 -苡 -若 -苦 -苧 -苫 -英 -苴 -苻 -茂 -范 -茄 -茅 -茎 -茗 -茘 -茜 -茨 -茲 -茵 -茶 -茸 -茹 -草 -荊 -荏 -荒 -荘 -荷 -荻 -荼 -莞 -莪 -莫 -莬 -莱 -莵 -莽 -菅 -菊 -菌 -菓 -菖 -菘 -菜 -菟 -菩 -菫 -華 -菱 -菴 -萄 -萊 -萌 -萍 -萎 -萠 -萩 -萬 -萱 -落 -葉 -著 -葛 -葡 -董 -葦 -葩 -葬 -葭 -葱 -葵 -葺 -蒋 -蒐 -蒔 -蒙 -蒟 -蒡 -蒲 -蒸 -蒻 -蒼 -蒿 -蓄 -蓆 -蓉 -蓋 -蓑 -蓬 -蓮 -蓼 -蔀 -蔑 -蔓 -蔚 -蔡 -蔦 -蔬 -蔭 -蔵 -蔽 -蕃 -蕉 -蕊 -蕎 -蕨 -蕩 -蕪 -蕭 -蕾 -薄 -薇 -薊 -薔 -薗 -薙 -薛 -薦 -薨 -薩 -薪 -薫 -薬 -薭 -薮 -藁 -藉 -藍 -藏 -藐 -藝 -藤 -藩 -藪 -藷 -藹 -藺 -藻 -蘂 -蘆 -蘇 -蘊 -蘭 -虎 -虐 -虔 -虚 -虜 -虞 -號 -虫 -虹 -虻 -蚊 -蚕 -蛇 -蛉 -蛍 -蛎 -蛙 -蛛 -蛟 -蛤 -蛭 -蛮 -蛸 -蛹 -蛾 -蜀 -蜂 -蜃 -蜆 -蜊 -蜘 -蜜 -蜷 -蜻 -蝉 -蝋 -蝕 -蝙 -蝠 -蝦 -蝶 -蝿 -螂 -融 -螣 -螺 -蟄 -蟇 -蟠 -蟷 -蟹 -蟻 -蠢 -蠣 -血 -衆 -行 -衍 -衒 -術 -街 -衙 -衛 -衝 -衞 -衡 -衢 -衣 -表 -衫 -衰 -衵 -衷 -衽 -衾 -衿 -袁 -袈 -袋 -袍 -袒 -袖 -袙 -袞 -袢 -被 -袰 -袱 -袴 -袷 -袿 -裁 -裂 -裃 -装 -裏 -裔 -裕 -裘 -裙 -補 -裟 -裡 -裲 -裳 -裴 -裸 -裹 -製 -裾 -褂 -褄 -複 -褌 -褐 -褒 -褥 -褪 -褶 -褻 -襄 -襖 -襞 -襟 -襠 -襦 -襪 -襲 -襴 -襷 -西 -要 -覆 -覇 -覈 -見 -規 -視 -覗 -覚 -覧 -親 -覲 -観 -覺 -觀 -角 -解 -触 -言 -訂 -計 -討 -訓 -託 -記 -訛 -訟 -訢 -訥 -訪 -設 -許 -訳 -訴 -訶 -診 -註 -証 -詐 -詔 -評 -詛 -詞 -詠 -詢 -詣 -試 -詩 -詫 -詮 -詰 -話 -該 -詳 -誄 -誅 -誇 -誉 -誌 -認 -誓 -誕 -誘 -語 -誠 -誡 -誣 -誤 -誥 -誦 -説 -読 -誰 -課 -誼 -誾 -調 -談 -請 -諌 -諍 -諏 -諒 -論 -諚 -諜 -諟 -諡 -諦 -諧 -諫 -諭 -諮 -諱 -諶 -諷 -諸 -諺 -諾 -謀 -謄 -謌 -謎 -謗 -謙 -謚 -講 -謝 -謡 -謫 -謬 -謹 -證 -識 -譚 -譛 -譜 -警 -譬 -譯 -議 -譲 -譴 -護 -讀 -讃 -讐 -讒 -谷 -谿 -豅 -豆 -豊 -豎 -豐 -豚 -象 -豪 -豫 -豹 -貌 -貝 -貞 -負 -財 -貢 -貧 -貨 -販 -貪 -貫 -責 -貯 -貰 -貴 -買 -貸 -費 -貼 -貿 -賀 -賁 -賂 -賃 -賄 -資 -賈 -賊 -賎 -賑 -賓 -賛 -賜 -賞 -賠 -賢 -賣 -賤 -賦 -質 -賭 -購 -賽 -贄 -贅 -贈 -贋 -贔 -贖 -赤 -赦 -走 -赴 -起 -超 -越 -趙 -趣 -足 -趺 -趾 -跋 -跏 -距 -跡 -跨 -跪 -路 -跳 -践 -踊 -踏 -踐 -踞 -踪 -踵 -蹄 -蹉 -蹊 -蹟 -蹲 -蹴 -躅 -躇 -躊 -躍 -躑 -躙 -躪 -身 -躬 -躯 -躰 -車 -軋 -軌 -軍 -軒 -軟 -転 -軸 -軻 -軽 -軾 -較 -載 -輌 -輔 -輜 -輝 -輦 -輩 -輪 -輯 -輸 -輿 -轄 -轍 -轟 -轢 -辛 -辞 -辟 -辥 -辦 -辨 -辰 -辱 -農 -辺 -辻 -込 -迂 -迅 -迎 -近 -返 -迢 -迦 -迪 -迫 -迭 -述 -迷 -迹 -追 -退 -送 -逃 -逅 -逆 -逍 -透 -逐 -逓 -途 -逕 -逗 -這 -通 -逝 -逞 -速 -造 -逢 -連 -逮 -週 -進 -逸 -逼 -遁 -遂 -遅 -遇 -遊 -運 -遍 -過 -遐 -道 -達 -違 -遙 -遜 -遠 -遡 -遣 -遥 -適 -遭 -遮 -遯 -遵 -遷 -選 -遺 -遼 -避 -邀 -邁 -邂 -邃 -還 -邇 -邉 -邊 -邑 -那 -邦 -邨 -邪 -邯 -邵 -邸 -郁 -郊 -郎 -郡 -郢 -部 -郭 -郴 -郵 -郷 -都 -鄂 -鄙 -鄭 -鄰 -鄲 -酉 -酋 -酌 -配 -酎 -酒 -酔 -酢 -酥 -酪 -酬 -酵 -酷 -酸 -醍 -醐 -醒 -醗 -醜 -醤 -醪 -醵 -醸 -采 -釈 -釉 -釋 -里 -重 -野 -量 -釐 -金 -釘 -釜 -針 -釣 -釧 -釿 -鈍 -鈎 -鈐 -鈔 -鈞 -鈦 -鈴 -鈷 -鈸 -鈿 -鉄 -鉇 -鉉 -鉋 -鉛 -鉢 -鉤 -鉦 -鉱 -鉾 -銀 -銃 -銅 -銈 -銑 -銕 -銘 -銚 -銜 -銭 -鋏 -鋒 -鋤 -鋭 -鋲 -鋳 -鋸 -鋺 -鋼 -錆 -錍 -錐 -錘 -錠 -錣 -錦 -錫 -錬 -錯 -録 -錵 -鍋 -鍍 -鍑 -鍔 -鍛 -鍬 -鍮 -鍵 -鍼 -鍾 -鎌 -鎖 -鎗 -鎚 -鎧 -鎬 -鎮 -鎰 -鎹 -鏃 -鏑 -鏡 -鐃 -鐇 -鐐 -鐔 -鐘 -鐙 -鐚 -鐡 -鐵 -鐸 -鑁 -鑊 -鑑 -鑒 -鑚 -鑠 -鑢 -鑰 -鑵 -鑷 -鑼 -鑽 -鑿 -長 -門 -閃 -閇 -閉 -開 -閏 -閑 -間 -閔 -閘 -関 -閣 -閤 -閥 -閦 -閨 -閬 -閲 -閻 -閼 -閾 -闇 -闍 -闔 -闕 -闘 -關 -闡 -闢 -闥 -阜 -阪 -阮 -阯 -防 -阻 -阿 -陀 -陂 -附 -陌 -降 -限 -陛 -陞 -院 -陣 -除 -陥 -陪 -陬 -陰 -陳 -陵 -陶 -陸 -険 -陽 -隅 -隆 -隈 -隊 -隋 -階 -随 -隔 -際 -障 -隠 -隣 -隧 -隷 -隻 -隼 -雀 -雁 -雄 -雅 -集 -雇 -雉 -雊 -雋 -雌 -雍 -雑 -雖 -雙 -雛 -離 -難 -雨 -雪 -雫 -雰 -雲 -零 -雷 -雹 -電 -需 -震 -霊 -霍 -霖 -霜 -霞 -霧 -霰 -露 -靈 -青 -靖 -静 -靜 -非 -面 -革 -靫 -靭 -靱 -靴 -靺 -鞁 -鞄 -鞆 -鞋 -鞍 -鞏 -鞘 -鞠 -鞨 -鞭 -韋 -韓 -韜 -韮 -音 -韶 -韻 -響 -頁 -頂 -頃 -項 -順 -須 -頌 -預 -頑 -頒 -頓 -領 -頚 -頬 -頭 -頴 -頸 -頻 -頼 -顆 -題 -額 -顎 -顔 -顕 -顗 -願 -顛 -類 -顧 -顯 -風 -飛 -食 -飢 -飩 -飫 -飯 -飲 -飴 -飼 -飽 -飾 -餃 -餅 -餉 -養 -餌 -餐 -餓 -餘 -餝 -餡 -館 -饂 -饅 -饉 -饋 -饌 -饒 -饗 -首 -馗 -香 -馨 -馬 -馳 -馴 -駄 -駅 -駆 -駈 -駐 -駒 -駕 -駝 -駿 -騁 -騎 -騏 -騒 -験 -騙 -騨 -騰 -驕 -驚 -驛 -驢 -骨 -骸 -髄 -體 -高 -髙 -髢 -髪 -髭 -髮 -髷 -髻 -鬘 -鬚 -鬢 -鬨 -鬯 -鬱 -鬼 -魁 -魂 -魄 -魅 -魏 -魔 -魚 -魯 -鮎 -鮑 -鮒 -鮪 -鮫 -鮭 -鮮 -鯉 -鯔 -鯖 -鯛 -鯨 -鯰 -鯱 -鰐 -鰒 -鰭 -鰯 -鰰 -鰹 -鰻 -鱈 -鱒 -鱗 -鱧 -鳥 -鳩 -鳰 -鳳 -鳴 -鳶 -鴈 -鴉 -鴎 -鴛 -鴟 -鴦 -鴨 -鴫 -鴻 -鵄 -鵜 -鵞 -鵡 -鵬 -鵲 -鵺 -鶉 -鶏 -鶯 -鶴 -鷄 -鷙 -鷲 -鷹 -鷺 -鸚 -鸞 -鹸 -鹽 -鹿 -麁 -麒 -麓 -麗 -麝 -麞 -麟 -麦 -麩 -麹 -麺 -麻 -麾 -麿 -黄 -黌 -黍 -黒 -黙 -黛 -黠 -鼈 -鼉 -鼎 -鼓 -鼠 -鼻 -齊 -齋 -齟 -齢 -齬 -龍 -龕 -龗 -! -# -% -& -( -) -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -= -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -R -S -T -U -V -W -X -Z -a -c -d -e -f -h -i -j -k -l -m -n -o -p -r -s -t -u -y -z -~ -・ - diff --git a/backend/ppocr/utils/dict/ka_dict.txt b/backend/ppocr/utils/dict/ka_dict.txt deleted file mode 100644 index d506b691..00000000 --- a/backend/ppocr/utils/dict/ka_dict.txt +++ /dev/null @@ -1,153 +0,0 @@ -k -a -_ -i -m -g -/ -1 -2 -I -L -S -V -R -C -0 -v -l -6 -4 -8 -. -j -p -ಗ -ು -ಣ -ಪ -ಡ -ಿ -ಸ -ಲ -ಾ -ದ -್ -7 -5 -3 -ವ -ಷ -ಬ -ಹ -ೆ -9 -ಅ -ಳ -ನ -ರ -ಉ -ಕ -ಎ -ೇ -ಂ -ೈ -ೊ -ೀ -ಯ -ೋ -ತ -ಶ -ಭ -ಧ -ಚ -ಜ -ೂ -ಮ -ಒ -ೃ -ಥ -ಇ -ಟ -ಖ -ಆ -ಞ -ಫ -- -ಢ -ಊ -ಓ -ಐ -ಃ -ಘ -ಝ -ೌ -ಠ -ಛ -ಔ -ಏ -ಈ -ಋ -೨ -೦ -೧ -೮ -೯ -೪ -, -೫ -೭ -೩ -೬ -ಙ -s -c -e -n -w -o -u -t -d -E -A -T -B -Z -N -G -O -q -z -r -x -P -K -M -J -U -D -f -F -h -b -W -Y -y -H -X -Q -' -# -& -! -@ -$ -: -% -é -É -( -? -+ - diff --git a/backend/ppocr/utils/dict/kie_dict/xfund_class_list.txt b/backend/ppocr/utils/dict/kie_dict/xfund_class_list.txt deleted file mode 100644 index faded9f9..00000000 --- a/backend/ppocr/utils/dict/kie_dict/xfund_class_list.txt +++ /dev/null @@ -1,4 +0,0 @@ -OTHER -QUESTION -ANSWER -HEADER diff --git a/backend/ppocr/utils/dict/kn_dict.txt b/backend/ppocr/utils/dict/kn_dict.txt deleted file mode 100644 index 33d605c4..00000000 --- a/backend/ppocr/utils/dict/kn_dict.txt +++ /dev/null @@ -1,153 +0,0 @@ -k -a -_ -i -m -g -/ -1 -2 -I -L -S -V -R -C -0 -v -l -6 -4 -8 -. -j -p -ಗ -ು -ಣ -ಪ -ಡ -ಿ -ಸ -ಲ -ಾ -ದ -್ -7 -5 -3 -ವ -ಷ -ಬ -ಹ -ೆ -9 -ಅ -ಳ -ನ -ರ -ಉ -ಕ -ಎ -ೇ -ಂ -ೈ -ೊ -ೀ -ಯ -ೋ -ತ -ಶ -ಭ -ಧ -ಚ -ಜ -ೂ -ಮ -ಒ -ೃ -ಥ -ಇ -ಟ -ಖ -ಆ -ಞ -ಫ -- -ಢ -ಊ -ಓ -ಐ -ಃ -ಘ -ಝ -ೌ -ಠ -ಛ -ಔ -ಏ -ಈ -ಋ -೨ -೦ -೧ -೮ -೯ -೪ -, -೫ -೭ -೩ -೬ -ಙ -s -c -e -n -w -o -u -t -d -E -A -T -B -Z -N -G -O -q -z -r -x -P -K -M -J -U -D -f -F -h -b -W -Y -y -H -X -Q -' -# -& -! -@ -$ -: -% -é -É -( -? -+ - diff --git a/backend/ppocr/utils/dict/korean_dict.txt b/backend/ppocr/utils/dict/korean_dict.txt deleted file mode 100644 index a13899f1..00000000 --- a/backend/ppocr/utils/dict/korean_dict.txt +++ /dev/null @@ -1,3688 +0,0 @@ -! -" -# -$ -% -& -' -* -+ -- -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -[ -\ -] -^ -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -{ -| -} -~ -© -° -² -½ -Á -Ä -Å -Ç -É -Í -Î -Ó -Ö -× -Ü -ß -à -á -â -ã -ä -å -æ -ç -è -é -ê -ë -ì -í -î -ï -ð -ñ -ò -ó -ô -õ -ö -ø -ú -û -ü -ý -ā -ă -ą -ć -Č -č -đ -ē -ė -ę -ě -ğ -ī -İ -ı -Ł -ł -ń -ň -ō -ř -Ş -ş -Š -š -ţ -ū -ź -ż -Ž -ž -Ș -ș -Α -Δ -α -λ -φ -Г -О -а -в -л -о -р -с -т -я -​ -’ -“ -” -→ -∇ -∼ -「 -」 -ア -カ -グ -ニ -ラ -ン -ㄱ -ㄴ -ㄷ -ㄸ -ㄹ -ㅂ -ㅅ -ㅆ -ㅇ -ㅈ -ㅊ -ㅋ -ㅌ -ㅎ -ㅓ -ㅜ -ㅣ -一 -丁 -七 -三 -上 -下 -不 -丑 -世 -丘 -丞 -中 -丸 -丹 -主 -乃 -久 -之 -乎 -乘 -九 -也 -乳 -乾 -事 -二 -云 -互 -五 -井 -亞 -亡 -交 -亥 -亨 -享 -京 -亭 -人 -仁 -今 -他 -仙 -代 -令 -以 -仰 -仲 -件 -任 -企 -伊 -伍 -伎 -伏 -伐 -休 -伯 -伴 -伸 -佃 -佈 -位 -低 -住 -佐 -何 -佛 -作 -使 -來 -供 -依 -侯 -侵 -侶 -便 -俗 -保 -俠 -信 -修 -俱 -俳 -倉 -個 -倍 -倒 -候 -借 -値 -倫 -倭 -假 -偈 -偉 -偏 -停 -偶 -傅 -傑 -傳 -傷 -傾 -像 -僞 -僥 -僧 -價 -儀 -儉 -儒 -優 -儼 -兀 -允 -元 -兆 -先 -光 -克 -兒 -入 -內 -全 -八 -公 -六 -共 -兵 -其 -具 -典 -兼 -再 -冠 -冥 -冶 -准 -凞 -凡 -凱 -出 -函 -刀 -分 -刊 -刑 -列 -初 -判 -別 -利 -到 -制 -券 -刺 -刻 -則 -前 -剛 -副 -創 -劃 -劑 -力 -功 -加 -劣 -助 -劫 -勇 -動 -務 -勝 -勢 -勳 -勸 -匈 -化 -北 -匠 -區 -十 -千 -午 -半 -卍 -卑 -卒 -卓 -南 -博 -卜 -占 -卦 -印 -危 -卵 -卷 -卽 -卿 -厄 -原 -厦 -去 -參 -又 -叉 -友 -反 -叔 -受 -口 -古 -句 -可 -台 -史 -右 -司 -各 -合 -吉 -同 -名 -后 -吏 -吐 -君 -吠 -吳 -呂 -告 -周 -味 -呵 -命 -和 -咳 -咸 -咽 -哀 -品 -哨 -哮 -哲 -唐 -唯 -唱 -商 -問 -啼 -善 -喆 -喉 -喜 -喩 -喪 -嘗 -器 -嚴 -囊 -四 -回 -因 -困 -固 -圈 -國 -圍 -園 -圓 -圖 -團 -土 -在 -地 -均 -坊 -坐 -坑 -坵 -型 -垢 -城 -域 -埴 -執 -培 -基 -堂 -堅 -堆 -堤 -堯 -報 -場 -塔 -塚 -塞 -塵 -境 -墜 -墟 -墨 -墳 -墾 -壁 -壇 -壓 -壤 -士 -壬 -壯 -壺 -壽 -夏 -夕 -外 -多 -夜 -夢 -大 -天 -太 -夫 -央 -失 -夷 -奄 -奇 -奉 -奎 -奏 -契 -奔 -奮 -女 -奴 -好 -如 -妄 -妊 -妖 -妙 -始 -姑 -姓 -姚 -姜 -威 -婆 -婚 -婦 -媒 -媚 -子 -孔 -字 -存 -孝 -孟 -季 -孤 -孫 -學 -孺 -宇 -守 -安 -宋 -宗 -官 -宙 -定 -客 -宣 -室 -宮 -害 -家 -容 -寂 -寃 -寄 -寅 -密 -寇 -富 -寒 -寓 -實 -審 -寫 -寬 -寶 -寸 -寺 -封 -將 -專 -尊 -對 -小 -少 -尙 -尹 -尼 -尿 -局 -居 -屈 -屋 -屍 -屎 -屛 -層 -屬 -山 -岐 -岡 -岩 -岳 -岸 -峙 -峰 -島 -峻 -峽 -崇 -崔 -崖 -崩 -嶋 -巖 -川 -州 -巢 -工 -左 -巧 -巨 -巫 -差 -己 -巷 -市 -布 -帝 -師 -帶 -常 -帽 -幕 -干 -平 -年 -幹 -幻 -幼 -幽 -庇 -序 -店 -府 -度 -座 -庫 -庭 -康 -廟 -廣 -廳 -延 -廷 -建 -廻 -弁 -式 -弑 -弓 -引 -弘 -弟 -弱 -張 -强 -弼 -彌 -彛 -形 -彬 -影 -役 -彼 -彿 -往 -征 -待 -律 -後 -徐 -徑 -得 -從 -循 -微 -德 -徹 -心 -必 -忌 -忍 -志 -忠 -思 -怡 -急 -性 -恐 -恒 -恨 -恩 -悅 -悖 -患 -悲 -情 -惑 -惟 -惠 -惡 -想 -惺 -愁 -意 -愚 -愛 -感 -愼 -慈 -態 -慕 -慣 -慧 -慾 -憂 -憤 -憺 -應 -懸 -戎 -成 -我 -戟 -戮 -戰 -戴 -戶 -房 -所 -手 -才 -打 -批 -承 -技 -抄 -把 -抗 -抱 -抽 -拇 -拓 -拘 -拙 -拜 -拾 -持 -指 -捌 -捨 -捿 -授 -掌 -排 -接 -推 -提 -揚 -揭 -援 -損 -搗 -摩 -播 -操 -擒 -擔 -擘 -據 -擧 -攘 -攝 -攬 -支 -改 -攻 -放 -政 -故 -敍 -敎 -救 -敗 -散 -敬 -整 -數 -文 -斗 -料 -斛 -斜 -斧 -斯 -新 -斷 -方 -於 -施 -旋 -族 -旗 -日 -旨 -早 -旱 -昌 -明 -易 -昔 -星 -春 -昧 -昭 -是 -時 -晉 -晋 -晩 -普 -景 -晴 -晶 -智 -暈 -暑 -暗 -暘 -曉 -曜 -曠 -曦 -曰 -曲 -書 -曹 -曼 -曾 -最 -會 -月 -有 -朋 -服 -望 -朝 -期 -木 -未 -末 -本 -朱 -朴 -李 -材 -村 -杖 -杜 -杞 -杭 -杯 -東 -松 -板 -林 -果 -枝 -枯 -枰 -枾 -柏 -柑 -柱 -栗 -校 -栢 -核 -根 -格 -桀 -桂 -案 -桎 -桑 -桓 -桔 -梁 -梏 -梓 -梗 -條 -梨 -梵 -棗 -棟 -森 -植 -椒 -楊 -楓 -楚 -業 -楮 -極 -榮 -槃 -槍 -樂 -樓 -樗 -樣 -樸 -樹 -樺 -樽 -橄 -橋 -橘 -機 -橡 -檀 -檎 -權 -欌 -欖 -次 -欲 -歌 -歐 -止 -正 -此 -步 -武 -歲 -歸 -死 -殖 -段 -殷 -殺 -殿 -毅 -母 -毒 -比 -毛 -氏 -民 -氣 -水 -永 -求 -汎 -汗 -江 -池 -沅 -沒 -沖 -沙 -沛 -河 -油 -治 -沼 -沿 -泉 -泊 -法 -泗 -泡 -波 -注 -泰 -洋 -洙 -洛 -洞 -津 -洲 -活 -派 -流 -浅 -浦 -浮 -浴 -海 -涅 -涇 -消 -涌 -液 -淑 -淡 -淨 -淫 -深 -淳 -淵 -淸 -渠 -渡 -游 -渾 -湖 -湯 -源 -溪 -溫 -溶 -滄 -滅 -滋 -滯 -滿 -漁 -漆 -漢 -漫 -漸 -潑 -潤 -潭 -澄 -澎 -澤 -澳 -澹 -濁 -濕 -濟 -濤 -濯 -瀋 -瀝 -灣 -火 -灰 -灸 -災 -炎 -炭 -点 -烈 -烏 -烙 -焚 -無 -焦 -然 -煌 -煎 -照 -煬 -煮 -熟 -熱 -燁 -燈 -燔 -燕 -燥 -燧 -燮 -爲 -爵 -父 -片 -版 -牌 -牛 -牝 -牟 -牡 -物 -特 -犧 -犬 -狀 -狗 -猥 -猩 -猪 -獨 -獵 -獸 -獻 -玄 -玉 -王 -玲 -珍 -珠 -珪 -班 -現 -球 -理 -琴 -瑞 -瑟 -瑪 -璃 -璋 -璽 -瓜 -瓦 -甑 -甘 -生 -産 -用 -甫 -田 -由 -甲 -申 -男 -界 -畏 -留 -畜 -畢 -略 -番 -異 -畵 -當 -畸 -疏 -疑 -疫 -疹 -疼 -病 -症 -痔 -痛 -痺 -瘀 -瘍 -瘡 -療 -癌 -癖 -登 -發 -白 -百 -的 -皆 -皇 -皮 -盂 -盆 -益 -盛 -盜 -盟 -盡 -盤 -盧 -目 -直 -相 -省 -看 -眞 -眼 -睡 -督 -瞋 -矢 -矣 -知 -短 -石 -破 -碍 -碑 -磁 -磨 -磬 -示 -社 -祇 -祖 -祝 -神 -祥 -祭 -祺 -禁 -禅 -禍 -福 -禦 -禪 -禮 -禹 -禽 -禾 -秀 -私 -秉 -秋 -科 -秘 -秤 -秦 -秩 -移 -稀 -稗 -種 -稱 -稷 -稼 -稽 -穀 -穆 -積 -空 -窮 -竅 -立 -章 -童 -竭 -端 -竹 -笑 -符 -第 -筆 -等 -筍 -答 -策 -箋 -箕 -管 -箱 -節 -篇 -簡 -米 -粉 -粘 -粥 -精 -糖 -糞 -系 -紀 -紂 -約 -紅 -紋 -純 -紙 -級 -素 -索 -紫 -紬 -累 -細 -紳 -終 -組 -結 -絡 -統 -絲 -絶 -絹 -經 -綠 -維 -綱 -網 -綸 -綽 -緖 -線 -緣 -緯 -縣 -縱 -總 -織 -繡 -繩 -繪 -繭 -纂 -續 -罕 -置 -罰 -羅 -羊 -美 -群 -義 -羽 -翁 -習 -翟 -老 -考 -者 -而 -耐 -耕 -耳 -聃 -聖 -聞 -聰 -聲 -職 -肇 -肉 -肖 -肝 -股 -肥 -育 -肺 -胃 -胎 -胚 -胞 -胡 -胥 -能 -脂 -脈 -脚 -脛 -脣 -脩 -脫 -脯 -脾 -腋 -腎 -腫 -腸 -腹 -膜 -膠 -膨 -膽 -臆 -臟 -臣 -臥 -臨 -自 -至 -致 -臺 -臼 -臾 -與 -興 -舊 -舌 -舍 -舒 -舜 -舟 -般 -船 -艦 -良 -色 -芋 -花 -芳 -芽 -苑 -苔 -苕 -苛 -苞 -若 -苦 -英 -茂 -茵 -茶 -茹 -荀 -荇 -草 -荒 -荷 -莊 -莫 -菊 -菌 -菜 -菩 -菫 -華 -菴 -菽 -萊 -萍 -萬 -落 -葉 -著 -葛 -董 -葬 -蒙 -蒜 -蒲 -蒸 -蒿 -蓮 -蔓 -蔘 -蔡 -蔬 -蕃 -蕉 -蕓 -薄 -薑 -薛 -薩 -薪 -薺 -藏 -藝 -藤 -藥 -藩 -藻 -蘆 -蘇 -蘊 -蘚 -蘭 -虎 -處 -虛 -虞 -虹 -蜀 -蜂 -蜜 -蝕 -蝶 -融 -蟬 -蟲 -蠶 -蠻 -血 -衆 -行 -術 -衛 -衡 -衣 -表 -袁 -裔 -裕 -裙 -補 -製 -複 -襄 -西 -要 -見 -視 -親 -覺 -觀 -角 -解 -言 -訂 -訊 -訓 -託 -記 -訣 -設 -診 -註 -評 -詩 -話 -詵 -誅 -誌 -認 -誕 -語 -誠 -誤 -誥 -誦 -說 -調 -談 -諍 -論 -諡 -諫 -諭 -諸 -謙 -講 -謝 -謠 -證 -識 -譚 -譜 -譯 -議 -護 -讀 -變 -谷 -豆 -豊 -豚 -象 -豪 -豫 -貝 -貞 -財 -貧 -貨 -貪 -貫 -貴 -貸 -費 -資 -賊 -賓 -賞 -賢 -賣 -賦 -質 -贍 -赤 -赫 -走 -起 -超 -越 -趙 -趣 -趨 -足 -趾 -跋 -跡 -路 -踏 -蹟 -身 -躬 -車 -軍 -軒 -軟 -載 -輓 -輕 -輪 -輯 -輸 -輻 -輿 -轅 -轉 -辨 -辭 -辯 -辰 -農 -近 -迦 -述 -追 -逆 -透 -逐 -通 -逝 -造 -逢 -連 -進 -逵 -遂 -遊 -運 -遍 -過 -道 -達 -遠 -遡 -適 -遷 -選 -遺 -遽 -還 -邊 -邑 -那 -邪 -郞 -郡 -部 -都 -鄒 -鄕 -鄭 -鄲 -配 -酒 -酸 -醉 -醫 -醯 -釋 -里 -重 -野 -量 -釐 -金 -針 -鈍 -鈴 -鉞 -銀 -銅 -銘 -鋼 -錄 -錢 -錦 -鎭 -鏡 -鐘 -鐵 -鑑 -鑛 -長 -門 -閃 -開 -間 -閔 -閣 -閥 -閭 -閻 -闕 -關 -阪 -防 -阿 -陀 -降 -限 -陝 -院 -陰 -陳 -陵 -陶 -陸 -陽 -隆 -隊 -隋 -階 -際 -障 -隣 -隨 -隱 -隷 -雀 -雄 -雅 -集 -雇 -雌 -雖 -雙 -雜 -離 -難 -雨 -雪 -雲 -電 -霜 -露 -靈 -靑 -靖 -靜 -非 -面 -革 -靴 -鞏 -韓 -音 -韶 -韻 -順 -須 -頊 -頌 -領 -頭 -顔 -願 -顚 -類 -顯 -風 -飛 -食 -飢 -飮 -飯 -飾 -養 -餓 -餘 -首 -香 -馨 -馬 -駒 -騫 -騷 -驕 -骨 -骸 -髓 -體 -高 -髥 -髮 -鬪 -鬱 -鬼 -魏 -魔 -魚 -魯 -鮮 -鰍 -鰐 -鳥 -鳧 -鳳 -鴨 -鵲 -鶴 -鷄 -鷹 -鹽 -鹿 -麗 -麥 -麻 -黃 -黑 -默 -點 -黨 -鼎 -齊 -齋 -齒 -龍 -龜 -가 -각 -간 -갇 -갈 -갉 -감 -갑 -값 -갓 -갔 -강 -갖 -갗 -같 -갚 -갛 -개 -객 -갠 -갤 -갬 -갭 -갯 -갰 -갱 -갸 -걀 -걔 -걘 -거 -걱 -건 -걷 -걸 -검 -겁 -것 -겄 -겅 -겆 -겉 -겊 -겋 -게 -겐 -겔 -겟 -겠 -겡 -겨 -격 -겪 -견 -결 -겸 -겹 -겻 -겼 -경 -곁 -계 -곕 -곗 -고 -곡 -곤 -곧 -골 -곪 -곬 -곯 -곰 -곱 -곳 -공 -곶 -과 -곽 -관 -괄 -괌 -광 -괘 -괜 -괭 -괴 -괸 -굉 -교 -구 -국 -군 -굳 -굴 -굵 -굶 -굼 -굽 -굿 -궁 -궂 -궈 -권 -궐 -궜 -궝 -궤 -귀 -귄 -귈 -귓 -규 -균 -귤 -그 -극 -근 -글 -긁 -금 -급 -긋 -긍 -기 -긴 -길 -김 -깁 -깃 -깅 -깊 -까 -깍 -깎 -깐 -깔 -깜 -깝 -깟 -깡 -깥 -깨 -깬 -깰 -깻 -깼 -깽 -꺄 -꺼 -꺽 -꺾 -껀 -껄 -껌 -껍 -껏 -껐 -껑 -께 -껴 -꼈 -꼍 -꼐 -꼬 -꼭 -꼴 -꼼 -꼽 -꼿 -꽁 -꽂 -꽃 -꽉 -꽝 -꽤 -꽥 -꾀 -꾜 -꾸 -꾹 -꾼 -꿀 -꿇 -꿈 -꿉 -꿋 -꿍 -꿎 -꿔 -꿨 -꿩 -꿰 -꿴 -뀄 -뀌 -뀐 -뀔 -뀜 -뀝 -끄 -끈 -끊 -끌 -끓 -끔 -끕 -끗 -끙 -끝 -끼 -끽 -낀 -낄 -낌 -낍 -낏 -낑 -나 -낙 -낚 -난 -낟 -날 -낡 -남 -납 -낫 -났 -낭 -낮 -낯 -낱 -낳 -내 -낵 -낸 -낼 -냄 -냅 -냇 -냈 -냉 -냐 -냔 -냘 -냥 -너 -넉 -넋 -넌 -널 -넓 -넘 -넙 -넛 -넜 -넝 -넣 -네 -넥 -넨 -넬 -넴 -넵 -넷 -넸 -넹 -녀 -녁 -년 -념 -녔 -녕 -녘 -녜 -노 -녹 -논 -놀 -놈 -놋 -농 -높 -놓 -놔 -놨 -뇌 -뇨 -뇩 -뇽 -누 -눅 -눈 -눌 -눔 -눕 -눗 -눠 -눴 -뉘 -뉜 -뉩 -뉴 -늄 -늅 -늉 -느 -늑 -는 -늘 -늙 -늠 -늡 -능 -늦 -늪 -늬 -니 -닉 -닌 -닐 -님 -닙 -닛 -닝 -닢 -다 -닥 -닦 -단 -닫 -달 -닭 -닮 -닯 -닳 -담 -답 -닷 -당 -닻 -닿 -대 -댁 -댄 -댈 -댐 -댑 -댓 -댔 -댕 -댜 -더 -덕 -덖 -던 -덜 -덟 -덤 -덥 -덧 -덩 -덫 -덮 -데 -덱 -덴 -델 -뎀 -뎃 -뎅 -뎌 -뎠 -뎨 -도 -독 -돈 -돋 -돌 -돔 -돕 -돗 -동 -돛 -돝 -돼 -됐 -되 -된 -될 -됨 -됩 -됴 -두 -둑 -둔 -둘 -둠 -둡 -둣 -둥 -둬 -뒀 -뒤 -뒬 -뒷 -뒹 -듀 -듈 -듐 -드 -득 -든 -듣 -들 -듦 -듬 -듭 -듯 -등 -듸 -디 -딕 -딘 -딛 -딜 -딤 -딥 -딧 -딨 -딩 -딪 -따 -딱 -딴 -딸 -땀 -땄 -땅 -때 -땐 -땔 -땜 -땝 -땠 -땡 -떠 -떡 -떤 -떨 -떫 -떰 -떱 -떳 -떴 -떵 -떻 -떼 -떽 -뗀 -뗄 -뗍 -뗏 -뗐 -뗑 -또 -똑 -똘 -똥 -뙤 -뚜 -뚝 -뚤 -뚫 -뚱 -뛰 -뛴 -뛸 -뜀 -뜁 -뜨 -뜩 -뜬 -뜯 -뜰 -뜸 -뜻 -띄 -띈 -띌 -띔 -띕 -띠 -띤 -띨 -띱 -띵 -라 -락 -란 -랄 -람 -랍 -랏 -랐 -랑 -랒 -랗 -래 -랙 -랜 -랠 -램 -랩 -랫 -랬 -랭 -랴 -략 -량 -러 -럭 -런 -럴 -럼 -럽 -럿 -렀 -렁 -렇 -레 -렉 -렌 -렐 -렘 -렙 -렛 -렝 -려 -력 -련 -렬 -렴 -렵 -렷 -렸 -령 -례 -로 -록 -론 -롤 -롬 -롭 -롯 -롱 -롸 -롹 -뢰 -뢴 -뢸 -룃 -료 -룐 -룡 -루 -룩 -룬 -룰 -룸 -룹 -룻 -룽 -뤄 -뤘 -뤼 -류 -륙 -륜 -률 -륨 -륭 -르 -륵 -른 -를 -름 -릅 -릇 -릉 -릎 -리 -릭 -린 -릴 -림 -립 -릿 -링 -마 -막 -만 -많 -맏 -말 -맑 -맘 -맙 -맛 -망 -맞 -맡 -맣 -매 -맥 -맨 -맬 -맴 -맵 -맷 -맸 -맹 -맺 -먀 -먁 -머 -먹 -먼 -멀 -멈 -멋 -멍 -멎 -메 -멕 -멘 -멜 -멤 -멥 -멧 -멩 -며 -멱 -면 -멸 -몄 -명 -몇 -모 -목 -몫 -몬 -몰 -몸 -몹 -못 -몽 -뫼 -묘 -무 -묵 -묶 -문 -묻 -물 -묽 -뭄 -뭅 -뭇 -뭉 -뭍 -뭏 -뭐 -뭔 -뭘 -뭡 -뭣 -뮈 -뮌 -뮐 -뮤 -뮬 -므 -믈 -믐 -미 -믹 -민 -믿 -밀 -밈 -밉 -밋 -밌 -밍 -및 -밑 -바 -박 -밖 -반 -받 -발 -밝 -밟 -밤 -밥 -밧 -방 -밭 -배 -백 -밴 -밸 -뱀 -뱁 -뱃 -뱄 -뱅 -뱉 -뱍 -뱐 -버 -벅 -번 -벌 -범 -법 -벗 -벙 -벚 -베 -벡 -벤 -벨 -벰 -벱 -벳 -벵 -벼 -벽 -변 -별 -볍 -볏 -볐 -병 -볕 -보 -복 -볶 -본 -볼 -봄 -봅 -봇 -봉 -봐 -봤 -뵈 -뵐 -뵙 -부 -북 -분 -붇 -불 -붉 -붐 -붓 -붕 -붙 -뷔 -뷰 -뷴 -뷸 -브 -븐 -블 -비 -빅 -빈 -빌 -빔 -빕 -빗 -빙 -빚 -빛 -빠 -빡 -빤 -빨 -빳 -빴 -빵 -빻 -빼 -빽 -뺀 -뺄 -뺌 -뺏 -뺐 -뺑 -뺨 -뻐 -뻑 -뻔 -뻗 -뻘 -뻣 -뻤 -뻥 -뻬 -뼈 -뼉 -뼘 -뽀 -뽈 -뽐 -뽑 -뽕 -뾰 -뿌 -뿍 -뿐 -뿔 -뿜 -쁘 -쁜 -쁠 -쁨 -삐 -삔 -삘 -사 -삭 -삯 -산 -살 -삵 -삶 -삼 -삽 -삿 -샀 -상 -샅 -새 -색 -샌 -샐 -샘 -샙 -샛 -샜 -생 -샤 -샨 -샬 -샴 -샵 -샷 -샹 -서 -석 -섞 -선 -섣 -설 -섬 -섭 -섯 -섰 -성 -섶 -세 -섹 -센 -셀 -셈 -셉 -셋 -셌 -셍 -셔 -션 -셜 -셨 -셰 -셴 -셸 -소 -속 -손 -솔 -솜 -솝 -솟 -송 -솥 -쇄 -쇠 -쇤 -쇳 -쇼 -숀 -숄 -숍 -수 -숙 -순 -숟 -술 -숨 -숩 -숫 -숭 -숯 -숱 -숲 -숴 -쉐 -쉘 -쉬 -쉭 -쉰 -쉴 -쉼 -쉽 -슈 -슐 -슘 -슛 -슝 -스 -슥 -슨 -슬 -슭 -슴 -습 -슷 -승 -시 -식 -신 -싣 -실 -싫 -심 -십 -싯 -싱 -싶 -싸 -싹 -싼 -쌀 -쌈 -쌉 -쌌 -쌍 -쌓 -쌔 -쌘 -쌩 -써 -썩 -썬 -썰 -썸 -썹 -썼 -썽 -쎄 -쎈 -쏘 -쏙 -쏜 -쏟 -쏠 -쏭 -쏴 -쐈 -쐐 -쐬 -쑤 -쑥 -쑨 -쒀 -쒔 -쓰 -쓱 -쓴 -쓸 -씀 -씁 -씌 -씨 -씩 -씬 -씰 -씸 -씹 -씻 -씽 -아 -악 -안 -앉 -않 -알 -앎 -앓 -암 -압 -앗 -았 -앙 -앞 -애 -액 -앤 -앨 -앰 -앱 -앳 -앴 -앵 -야 -약 -얀 -얄 -얇 -얌 -얍 -얏 -양 -얕 -얗 -얘 -얜 -어 -억 -언 -얹 -얻 -얼 -얽 -엄 -업 -없 -엇 -었 -엉 -엊 -엌 -엎 -에 -엑 -엔 -엘 -엠 -엡 -엣 -엥 -여 -역 -엮 -연 -열 -엷 -염 -엽 -엾 -엿 -였 -영 -옅 -옆 -옇 -예 -옌 -옐 -옙 -옛 -오 -옥 -온 -올 -옭 -옮 -옳 -옴 -옵 -옷 -옹 -옻 -와 -왁 -완 -왈 -왑 -왓 -왔 -왕 -왜 -왠 -왱 -외 -왼 -요 -욕 -욘 -욜 -욤 -용 -우 -욱 -운 -울 -움 -웁 -웃 -웅 -워 -웍 -원 -월 -웜 -웠 -웡 -웨 -웬 -웰 -웸 -웹 -위 -윅 -윈 -윌 -윔 -윗 -윙 -유 -육 -윤 -율 -윱 -윳 -융 -으 -윽 -은 -을 -읊 -음 -읍 -응 -의 -읜 -읠 -이 -익 -인 -일 -읽 -잃 -임 -입 -잇 -있 -잉 -잊 -잎 -자 -작 -잔 -잖 -잘 -잠 -잡 -잣 -잤 -장 -잦 -재 -잭 -잰 -잴 -잽 -잿 -쟀 -쟁 -쟈 -쟉 -쟤 -저 -적 -전 -절 -젊 -점 -접 -젓 -정 -젖 -제 -젝 -젠 -젤 -젬 -젭 -젯 -져 -젼 -졀 -졌 -졍 -조 -족 -존 -졸 -좀 -좁 -종 -좇 -좋 -좌 -좍 -좽 -죄 -죠 -죤 -주 -죽 -준 -줄 -줌 -줍 -줏 -중 -줘 -줬 -쥐 -쥔 -쥘 -쥬 -쥴 -즈 -즉 -즌 -즐 -즘 -즙 -증 -지 -직 -진 -짇 -질 -짊 -짐 -집 -짓 -징 -짖 -짙 -짚 -짜 -짝 -짠 -짢 -짤 -짧 -짬 -짭 -짰 -짱 -째 -짹 -짼 -쨀 -쨉 -쨋 -쨌 -쨍 -쩄 -쩌 -쩍 -쩐 -쩔 -쩜 -쩝 -쩡 -쩨 -쪄 -쪘 -쪼 -쪽 -쪾 -쫀 -쫄 -쫑 -쫓 -쫙 -쬐 -쭈 -쭉 -쭐 -쭙 -쯔 -쯤 -쯧 -찌 -찍 -찐 -찔 -찜 -찝 -찡 -찢 -찧 -차 -착 -찬 -찮 -찰 -참 -찹 -찻 -찼 -창 -찾 -채 -책 -챈 -챌 -챔 -챕 -챗 -챘 -챙 -챠 -챤 -처 -척 -천 -철 -첨 -첩 -첫 -청 -체 -첵 -첸 -첼 -쳄 -쳇 -쳉 -쳐 -쳔 -쳤 -초 -촉 -촌 -촘 -촛 -총 -촨 -촬 -최 -쵸 -추 -축 -춘 -출 -춤 -춥 -춧 -충 -춰 -췄 -췌 -취 -췬 -츄 -츠 -측 -츨 -츰 -층 -치 -칙 -친 -칠 -칡 -침 -칩 -칫 -칭 -카 -칵 -칸 -칼 -캄 -캅 -캇 -캉 -캐 -캔 -캘 -캠 -캡 -캣 -캤 -캥 -캬 -커 -컥 -컨 -컫 -컬 -컴 -컵 -컷 -컸 -컹 -케 -켄 -켈 -켐 -켓 -켕 -켜 -켠 -켤 -켭 -켯 -켰 -코 -콕 -콘 -콜 -콤 -콥 -콧 -콩 -콰 -콱 -콴 -콸 -쾅 -쾌 -쾡 -쾨 -쾰 -쿄 -쿠 -쿡 -쿤 -쿨 -쿰 -쿵 -쿼 -퀀 -퀄 -퀘 -퀭 -퀴 -퀵 -퀸 -퀼 -큐 -큘 -크 -큰 -클 -큼 -큽 -키 -킥 -킨 -킬 -킴 -킵 -킷 -킹 -타 -탁 -탄 -탈 -탉 -탐 -탑 -탓 -탔 -탕 -태 -택 -탠 -탤 -탬 -탭 -탯 -탰 -탱 -터 -턱 -턴 -털 -텀 -텁 -텃 -텄 -텅 -테 -텍 -텐 -텔 -템 -텝 -텡 -텨 -톈 -토 -톡 -톤 -톨 -톰 -톱 -톳 -통 -퇴 -툇 -투 -툭 -툰 -툴 -툼 -퉁 -퉈 -퉜 -튀 -튄 -튈 -튕 -튜 -튠 -튤 -튬 -트 -특 -튼 -튿 -틀 -틈 -틉 -틋 -틔 -티 -틱 -틴 -틸 -팀 -팁 -팅 -파 -팍 -팎 -판 -팔 -팜 -팝 -팟 -팠 -팡 -팥 -패 -팩 -팬 -팰 -팸 -팻 -팼 -팽 -퍼 -퍽 -펀 -펄 -펌 -펍 -펐 -펑 -페 -펙 -펜 -펠 -펨 -펩 -펫 -펭 -펴 -편 -펼 -폄 -폈 -평 -폐 -포 -폭 -폰 -폴 -폼 -폿 -퐁 -표 -푭 -푸 -푹 -푼 -풀 -품 -풋 -풍 -퓨 -퓬 -퓰 -퓸 -프 -픈 -플 -픔 -픕 -피 -픽 -핀 -필 -핌 -핍 -핏 -핑 -하 -학 -한 -할 -핥 -함 -합 -핫 -항 -해 -핵 -핸 -핼 -햄 -햅 -햇 -했 -행 -햐 -향 -헀 -허 -헉 -헌 -헐 -험 -헙 -헛 -헝 -헤 -헥 -헨 -헬 -헴 -헵 -헷 -헹 -혀 -혁 -현 -혈 -혐 -협 -혓 -혔 -형 -혜 -호 -혹 -혼 -홀 -홈 -홉 -홋 -홍 -홑 -화 -확 -환 -활 -홧 -황 -홰 -홱 -횃 -회 -획 -횝 -횟 -횡 -효 -후 -훅 -훈 -훌 -훑 -훔 -훗 -훤 -훨 -훼 -휄 -휑 -휘 -휙 -휜 -휠 -휩 -휭 -휴 -휼 -흄 -흉 -흐 -흑 -흔 -흘 -흙 -흠 -흡 -흣 -흥 -흩 -희 -흰 -흽 -히 -힉 -힌 -힐 -힘 -힙 -힝 -車 -滑 -金 -奈 -羅 -洛 -卵 -欄 -蘭 -郎 -來 -盧 -老 -魯 -綠 -鹿 -論 -雷 -樓 -縷 -凌 -樂 -不 -參 -葉 -沈 -若 -兩 -凉 -梁 -呂 -女 -廬 -麗 -黎 -曆 -歷 -戀 -蓮 -連 -列 -烈 -裂 -念 -獵 -靈 -領 -例 -禮 -醴 -惡 -尿 -料 -遼 -龍 -暈 -柳 -流 -類 -六 -陸 -倫 -律 -栗 -利 -李 -梨 -理 -離 -燐 -林 -臨 -立 -茶 -切 -宅 - diff --git a/backend/ppocr/utils/dict/latin_dict.txt b/backend/ppocr/utils/dict/latin_dict.txt deleted file mode 100644 index e166bf33..00000000 --- a/backend/ppocr/utils/dict/latin_dict.txt +++ /dev/null @@ -1,185 +0,0 @@ - -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -[ -] -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -{ -} -¡ -£ -§ -ª -« -­ -° -² -³ -´ -µ -· -º -» -¿ -À -Á - -Ä -Å -Ç -È -É -Ê -Ë -Ì -Í -Î -Ï -Ò -Ó -Ô -Õ -Ö -Ú -Ü -Ý -ß -à -á -â -ã -ä -å -æ -ç -è -é -ê -ë -ì -í -î -ï -ñ -ò -ó -ô -õ -ö -ø -ù -ú -û -ü -ý -ą -Ć -ć -Č -č -Đ -đ -ę -ı -Ł -ł -ō -Œ -œ -Š -š -Ÿ -Ž -ž -ʒ -β -δ -ε -з -Ṡ -‘ -€ -™ diff --git a/backend/ppocr/utils/dict/layout_dict/layout_cdla_dict.txt b/backend/ppocr/utils/dict/layout_dict/layout_cdla_dict.txt deleted file mode 100644 index 8be0f486..00000000 --- a/backend/ppocr/utils/dict/layout_dict/layout_cdla_dict.txt +++ /dev/null @@ -1,10 +0,0 @@ -text -title -figure -figure_caption -table -table_caption -header -footer -reference -equation \ No newline at end of file diff --git a/backend/ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt b/backend/ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt deleted file mode 100644 index ca6acf4e..00000000 --- a/backend/ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt +++ /dev/null @@ -1,5 +0,0 @@ -text -title -list -table -figure \ No newline at end of file diff --git a/backend/ppocr/utils/dict/layout_dict/layout_table_dict.txt b/backend/ppocr/utils/dict/layout_dict/layout_table_dict.txt deleted file mode 100644 index faea15ea..00000000 --- a/backend/ppocr/utils/dict/layout_dict/layout_table_dict.txt +++ /dev/null @@ -1 +0,0 @@ -table \ No newline at end of file diff --git a/backend/ppocr/utils/dict/mr_dict.txt b/backend/ppocr/utils/dict/mr_dict.txt deleted file mode 100644 index 283b1504..00000000 --- a/backend/ppocr/utils/dict/mr_dict.txt +++ /dev/null @@ -1,153 +0,0 @@ - -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -ँ -ं -ः -अ -आ -इ -ई -उ -ऊ -ए -ऐ -ऑ -ओ -औ -क -ख -ग -घ -च -छ -ज -झ -ञ -ट -ठ -ड -ढ -ण -त -थ -द -ध -न -प -फ -ब -भ -म -य -र -ऱ -ल -ळ -व -श -ष -स -ह -़ -ा -ि -ी -ु -ू -ृ -ॅ -े -ै -ॉ -ो -ौ -् -० -१ -२ -३ -४ -५ -६ -७ -८ -९ diff --git a/backend/ppocr/utils/dict/ne_dict.txt b/backend/ppocr/utils/dict/ne_dict.txt deleted file mode 100644 index 5a7df953..00000000 --- a/backend/ppocr/utils/dict/ne_dict.txt +++ /dev/null @@ -1,153 +0,0 @@ - -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -ः -अ -आ -इ -ई -उ -ऊ -ऋ -ए -ऐ -ओ -औ -क -ख -ग -घ -ङ -च -छ -ज -झ -ञ -ट -ठ -ड -ढ -ण -त -थ -द -ध -न -ऩ -प -फ -ब -भ -म -य -र -ऱ -ल -व -श -ष -स -ह -़ -ा -ि -ी -ु -ू -ृ -े -ै -ो -ौ -् -॒ -ॠ -। -० -१ -२ -३ -४ -५ -६ -७ -८ -९ diff --git a/backend/ppocr/utils/dict/oc_dict.txt b/backend/ppocr/utils/dict/oc_dict.txt deleted file mode 100644 index e88af8bd..00000000 --- a/backend/ppocr/utils/dict/oc_dict.txt +++ /dev/null @@ -1,96 +0,0 @@ -o -c -_ -i -m -g -/ -2 -0 -I -L -S -V -R -C -1 -v -a -l -4 -3 -. -j -p -r -e -è -t -9 -7 -5 -8 -n -' -b -s -6 -q -u -á -d -ò -à -h -z -f -ï -í -A -ç -x -ó -é -P -O -Ò -ü -k -À -F -- -ú -­ -æ -Á -D -E -w -K -T -N -y -U -Z -G -B -J -H -M -W -Y -X -Q -% -$ -, -@ -& -! -: -( -# -? -+ -É - diff --git a/backend/ppocr/utils/dict/pt_dict.txt b/backend/ppocr/utils/dict/pt_dict.txt deleted file mode 100644 index 9500fae6..00000000 --- a/backend/ppocr/utils/dict/pt_dict.txt +++ /dev/null @@ -1,130 +0,0 @@ -p -u -_ -i -m -g -/ -8 -I -L -S -V -R -C -2 -0 -1 -v -a -l -6 -7 -4 -5 -. -j - -q -e -s -t -ã -o -x -9 -c -n -r -z -ç -õ -3 -A -U -d -º -ô -­ -, -E -; -ó -á -b -D -? -ú -ê -- -h -P -f -à -N -í -O -M -G -É -é -â -F -: -T -Á -" -Q -) -W -J -B -H -( -ö -% -Ö -« -w -K -y -! -k -] -' -Z -+ -Ç -Õ -Y -À -X -µ -» -ª -Í -ü -ä -´ -è -ñ -ß -ï -Ú -ë -Ô -Ï -Ó -[ -Ì -< - -ò -§ -³ -ø -å -# -$ -& -@ diff --git a/backend/ppocr/utils/dict/pu_dict.txt b/backend/ppocr/utils/dict/pu_dict.txt deleted file mode 100644 index 9500fae6..00000000 --- a/backend/ppocr/utils/dict/pu_dict.txt +++ /dev/null @@ -1,130 +0,0 @@ -p -u -_ -i -m -g -/ -8 -I -L -S -V -R -C -2 -0 -1 -v -a -l -6 -7 -4 -5 -. -j - -q -e -s -t -ã -o -x -9 -c -n -r -z -ç -õ -3 -A -U -d -º -ô -­ -, -E -; -ó -á -b -D -? -ú -ê -- -h -P -f -à -N -í -O -M -G -É -é -â -F -: -T -Á -" -Q -) -W -J -B -H -( -ö -% -Ö -« -w -K -y -! -k -] -' -Z -+ -Ç -Õ -Y -À -X -µ -» -ª -Í -ü -ä -´ -è -ñ -ß -ï -Ú -ë -Ô -Ï -Ó -[ -Ì -< - -ò -§ -³ -ø -å -# -$ -& -@ diff --git a/backend/ppocr/utils/dict/rs_cyrillic_dict.txt b/backend/ppocr/utils/dict/rs_cyrillic_dict.txt deleted file mode 100644 index 95dd4636..00000000 --- a/backend/ppocr/utils/dict/rs_cyrillic_dict.txt +++ /dev/null @@ -1,134 +0,0 @@ -r -s -c -_ -i -m -g -/ -5 -I -L -S -V -R -C -2 -0 -1 -v -a -l -9 -7 -8 -. -j -p -м -а -с -и -р -ћ -е -ш -3 -4 -о -г -н -з -в -л -6 -т -ж -у -к -п -њ -д -ч -С -ј -ф -ц -љ -х -О -И -А -б -Ш -К -ђ -џ -М -В -З -Д -Р -У -Н -Т -Б -? -П -Х -Ј -Ц -Г -Љ -Л -Ф -e -n -w -E -F -A -N -f -o -b -M -G -t -y -W -k -P -u -H -B -T -z -h -O -Y -d -U -K -D -x -X -J -Z -Q -q -' -- -@ -é -# -! -, -% -$ -: -& -+ -( -É - diff --git a/backend/ppocr/utils/dict/rs_dict.txt b/backend/ppocr/utils/dict/rs_dict.txt deleted file mode 100644 index d1ce46d2..00000000 --- a/backend/ppocr/utils/dict/rs_dict.txt +++ /dev/null @@ -1,91 +0,0 @@ -r -s -_ -i -m -g -/ -1 -I -L -S -V -R -C -2 -0 -v -a -l -7 -5 -8 -6 -. -j -p - -t -d -9 -3 -e -š -4 -k -u -ć -c -n -đ -o -z -č -b -ž -f -Z -T -h -M -F -O -Š -B -H -A -E -Đ -Ž -D -P -G -Č -K -U -N -J -Ć -w -y -W -x -Y -X -q -Q -# -& -$ -, -- -% -' -@ -! -: -? -( -É -é -+ diff --git a/backend/ppocr/utils/dict/rs_latin_dict.txt b/backend/ppocr/utils/dict/rs_latin_dict.txt deleted file mode 100644 index d1ce46d2..00000000 --- a/backend/ppocr/utils/dict/rs_latin_dict.txt +++ /dev/null @@ -1,91 +0,0 @@ -r -s -_ -i -m -g -/ -1 -I -L -S -V -R -C -2 -0 -v -a -l -7 -5 -8 -6 -. -j -p - -t -d -9 -3 -e -š -4 -k -u -ć -c -n -đ -o -z -č -b -ž -f -Z -T -h -M -F -O -Š -B -H -A -E -Đ -Ž -D -P -G -Č -K -U -N -J -Ć -w -y -W -x -Y -X -q -Q -# -& -$ -, -- -% -' -@ -! -: -? -( -É -é -+ diff --git a/backend/ppocr/utils/dict/rsc_dict.txt b/backend/ppocr/utils/dict/rsc_dict.txt deleted file mode 100644 index 95dd4636..00000000 --- a/backend/ppocr/utils/dict/rsc_dict.txt +++ /dev/null @@ -1,134 +0,0 @@ -r -s -c -_ -i -m -g -/ -5 -I -L -S -V -R -C -2 -0 -1 -v -a -l -9 -7 -8 -. -j -p -м -а -с -и -р -ћ -е -ш -3 -4 -о -г -н -з -в -л -6 -т -ж -у -к -п -њ -д -ч -С -ј -ф -ц -љ -х -О -И -А -б -Ш -К -ђ -џ -М -В -З -Д -Р -У -Н -Т -Б -? -П -Х -Ј -Ц -Г -Љ -Л -Ф -e -n -w -E -F -A -N -f -o -b -M -G -t -y -W -k -P -u -H -B -T -z -h -O -Y -d -U -K -D -x -X -J -Z -Q -q -' -- -@ -é -# -! -, -% -$ -: -& -+ -( -É - diff --git a/backend/ppocr/utils/dict/ru_dict.txt b/backend/ppocr/utils/dict/ru_dict.txt deleted file mode 100644 index aff9c16e..00000000 --- a/backend/ppocr/utils/dict/ru_dict.txt +++ /dev/null @@ -1,163 +0,0 @@ - -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -Ё -Є -І -Ј -Љ -Ў -А -Б -В -Г -Д -Е -Ж -З -И -Й -К -Л -М -Н -О -П -Р -С -Т -У -Ф -Х -Ц -Ч -Ш -Щ -Ъ -Ы -Ь -Э -Ю -Я -а -б -в -г -д -е -ж -з -и -й -к -л -м -н -о -п -р -с -т -у -ф -х -ц -ч -ш -щ -ъ -ы -ь -э -ю -я -ё -ђ -є -і -ј -љ -њ -ћ -ў -џ -Ґ -ґ diff --git a/backend/ppocr/utils/dict/spin_dict.txt b/backend/ppocr/utils/dict/spin_dict.txt deleted file mode 100644 index 8ee8347f..00000000 --- a/backend/ppocr/utils/dict/spin_dict.txt +++ /dev/null @@ -1,68 +0,0 @@ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -: -( -' -- -, -% -> -. -[ -? -) -" -= -_ -* -] -; -& -+ -$ -@ -/ -| -! -< -# -` -{ -~ -\ -} -^ \ No newline at end of file diff --git a/backend/ppocr/utils/dict/ta_dict.txt b/backend/ppocr/utils/dict/ta_dict.txt deleted file mode 100644 index 19d81892..00000000 --- a/backend/ppocr/utils/dict/ta_dict.txt +++ /dev/null @@ -1,128 +0,0 @@ -t -a -_ -i -m -g -/ -3 -I -L -S -V -R -C -2 -0 -1 -v -l -9 -7 -8 -. -j -p -ப -ூ -த -ம -ி -வ -ர -் -ந -ோ -ன -6 -ஆ -ற -ல -5 -ள -ா -ொ -ழ -ு -4 -ெ -ண -க -ட -ை -ே -ச -ய -ஒ -இ -அ -ங -உ -ீ -ஞ -எ -ஓ -ஃ -ஜ -ஷ -ஸ -ஏ -ஊ -ஹ -ஈ -ஐ -ௌ -ஔ -s -c -e -n -w -F -T -O -P -K -A -N -G -Y -E -M -H -U -B -o -b -D -d -r -W -u -y -f -X -k -q -h -J -z -Z -Q -x -- -' -$ -, -% -@ -é -! -# -+ -É -& -: -( -? - diff --git a/backend/ppocr/utils/dict/table_dict.txt b/backend/ppocr/utils/dict/table_dict.txt deleted file mode 100644 index 2ef028c7..00000000 --- a/backend/ppocr/utils/dict/table_dict.txt +++ /dev/null @@ -1,277 +0,0 @@ -← - -☆ -─ -α - - -⋅ -$ -ω -ψ -χ -( -υ -≥ -σ -, -ρ -ε -0 -■ -4 -8 -✗ -b -< -✓ -Ψ -Ω -€ -D -3 -Π -H -║ - -L -Φ -Χ -θ -P -κ -λ -μ -T -ξ -X -β -γ -δ -\ -ζ -η -` -d - -h -f -l -Θ -p -√ -t - -x -Β -Γ -Δ -| -ǂ -ɛ -j -̧ -➢ -⁡ -̌ -′ -« -△ -▲ -# - -' -Ι -+ -¶ -/ -▼ -⇑ -□ -· -7 -▪ -; -? -➔ -∩ -C -÷ -G -⇒ -K - -O -S -С -W -Α -[ -○ -_ -● -‡ -c -z -g - -o - -〈 -〉 -s -⩽ -w -φ -ʹ -{ -» -∣ -̆ -e -ˆ -∈ -τ -◆ -ι -∅ -∆ -∙ -∘ -Ø -ß -✔ -∞ -∑ -− -× -◊ -∗ -∖ -˃ -˂ -∫ -" -i -& -π -↔ -* -∥ -æ -∧ -. -⁄ -ø -Q -∼ -6 -⁎ -: -★ -> -a -B -≈ -F -J -̄ -N -♯ -R -V - -― -Z -♣ -^ -¤ -¥ -§ - -¢ -£ -≦ -­ -≤ -‖ -Λ -© -n -↓ -→ -↑ -r -° -± -v - -♂ -k -♀ -~ -ᅟ -̇ -@ -” -♦ -ł -® -⊕ -„ -! - -% -⇓ -) -- -1 -5 -9 -= -А -A -‰ -⋆ -Σ -E -◦ -I -※ -M -m -̨ -⩾ -† - -• -U -Y -
 -] -̸ -2 -‐ -– -‒ -̂ -— -̀ -́ -’ -‘ -⋮ -⋯ -̊ -“ -̈ -≧ -q -u -ı -y - -​ -̃ -} -ν diff --git a/backend/ppocr/utils/dict/table_master_structure_dict.txt b/backend/ppocr/utils/dict/table_master_structure_dict.txt deleted file mode 100644 index 95ab2539..00000000 --- a/backend/ppocr/utils/dict/table_master_structure_dict.txt +++ /dev/null @@ -1,39 +0,0 @@ - - - - - - - - - - - colspan="2" - colspan="3" - - - rowspan="2" - colspan="4" - colspan="6" - rowspan="3" - colspan="9" - colspan="10" - colspan="7" - rowspan="4" - rowspan="5" - rowspan="9" - colspan="8" - rowspan="8" - rowspan="6" - rowspan="7" - rowspan="10" - - - - - - - - diff --git a/backend/ppocr/utils/dict/table_structure_dict.txt b/backend/ppocr/utils/dict/table_structure_dict.txt deleted file mode 100644 index 8edb10b8..00000000 --- a/backend/ppocr/utils/dict/table_structure_dict.txt +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - - colspan="2" - colspan="3" - rowspan="2" - colspan="4" - colspan="6" - rowspan="3" - colspan="9" - colspan="10" - colspan="7" - rowspan="4" - rowspan="5" - rowspan="9" - colspan="8" - rowspan="8" - rowspan="6" - rowspan="7" - rowspan="10" \ No newline at end of file diff --git a/backend/ppocr/utils/dict/table_structure_dict_ch.txt b/backend/ppocr/utils/dict/table_structure_dict_ch.txt deleted file mode 100644 index 0c59c0e9..00000000 --- a/backend/ppocr/utils/dict/table_structure_dict_ch.txt +++ /dev/null @@ -1,48 +0,0 @@ - - - - - - - - - - colspan="2" - colspan="3" - colspan="4" - colspan="5" - colspan="6" - colspan="7" - colspan="8" - colspan="9" - colspan="10" - colspan="11" - colspan="12" - colspan="13" - colspan="14" - colspan="15" - colspan="16" - colspan="17" - colspan="18" - colspan="19" - colspan="20" - rowspan="2" - rowspan="3" - rowspan="4" - rowspan="5" - rowspan="6" - rowspan="7" - rowspan="8" - rowspan="9" - rowspan="10" - rowspan="11" - rowspan="12" - rowspan="13" - rowspan="14" - rowspan="15" - rowspan="16" - rowspan="17" - rowspan="18" - rowspan="19" - rowspan="20" diff --git a/backend/ppocr/utils/dict/te_dict.txt b/backend/ppocr/utils/dict/te_dict.txt deleted file mode 100644 index 83d74cc7..00000000 --- a/backend/ppocr/utils/dict/te_dict.txt +++ /dev/null @@ -1,151 +0,0 @@ -t -e -_ -i -m -g -/ -5 -I -L -S -V -R -C -2 -0 -1 -v -a -l -3 -4 -8 -9 -. -j -p -త -ె -ర -క -్ -ి -ం -చ -ే -ద -ు -7 -6 -ఉ -ా -మ -ట -ో -వ -ప -ల -శ -ఆ -య -ై -భ -' -ీ -గ -ూ -డ -ధ -హ -న -జ -స -[ -‌ -ష -అ -ణ -ఫ -బ -ఎ -; -ళ -థ -ొ -ఠ -ృ -ఒ -ఇ -ః -ఊ -ఖ -- -ఐ -ఘ -ౌ -ఏ -ఈ -ఛ -, -ఓ -ఞ -| -? -: -ఢ -" -( -” -! -+ -) -* -= -& -“ -€ -] -£ -$ -s -c -n -w -k -J -G -u -d -r -E -o -h -y -b -f -B -M -O -T -N -D -P -A -F -x -W -Y -U -H -K -X -z -Z -Q -q -É -% -# -@ -é diff --git a/backend/ppocr/utils/dict/ug_dict.txt b/backend/ppocr/utils/dict/ug_dict.txt deleted file mode 100644 index 77602f2c..00000000 --- a/backend/ppocr/utils/dict/ug_dict.txt +++ /dev/null @@ -1,114 +0,0 @@ -u -g -_ -i -m -/ -1 -I -L -S -V -R -C -2 -0 -v -a -l -8 -5 -3 -6 -9 -. -j -p - -ق -ا -پ -ل -4 -7 -ئ -ى -ش -ت -ي -ك -د -ف -ر -و -ن -ب -ە -خ -ې -چ -ۇ -ز -س -م -ۋ -گ -ڭ -ۆ -ۈ -ج -غ -ھ -ژ -s -c -e -n -w -P -E -D -U -d -r -b -y -B -o -O -Y -N -T -k -t -h -A -H -F -z -W -K -G -M -f -Z -X -Q -J -x -q -- -! -% -# -? -: -$ -, -& -' -É -@ -é -( -+ diff --git a/backend/ppocr/utils/dict/uk_dict.txt b/backend/ppocr/utils/dict/uk_dict.txt deleted file mode 100644 index c5ffc0a5..00000000 --- a/backend/ppocr/utils/dict/uk_dict.txt +++ /dev/null @@ -1,142 +0,0 @@ -u -k -_ -i -m -g -/ -1 -6 -I -L -S -V -R -C -2 -0 -v -a -l -7 -9 -. -j -p -в -і -д -п -о -н -с -т -ю -4 -5 -3 -а -и -м -е -р -ч -у -Б -з -л -к -8 -А -В -г -є -б -ь -х -ґ -ш -ц -ф -я -щ -ж -Г -Х -У -Т -Е -І -Н -П -З -Л -Ю -С -Д -М -К -Р -Ф -О -Ц -И -Я -Ч -Ш -Ж -Є -Ґ -Ь -s -c -e -n -w -A -P -r -E -t -o -h -d -y -M -G -N -F -B -T -D -U -O -W -Z -f -H -Y -b -K -z -x -Q -X -q -J -$ -- -' -# -& -% -? -: -! -, -+ -@ -( -é -É - diff --git a/backend/ppocr/utils/dict/ur_dict.txt b/backend/ppocr/utils/dict/ur_dict.txt deleted file mode 100644 index c06786a8..00000000 --- a/backend/ppocr/utils/dict/ur_dict.txt +++ /dev/null @@ -1,137 +0,0 @@ -u -r -_ -i -m -g -/ -3 -I -L -S -V -R -C -2 -0 -1 -v -a -l -9 -7 -8 -. -j -p - -چ -ٹ -پ -ا -ئ -ی -ے -4 -6 -و -ل -ن -ڈ -ھ -ک -ت -ش -ف -ق -ر -د -5 -ب -ج -خ -ہ -س -ز -غ -ڑ -ں -آ -م -ؤ -ط -ص -ح -ع -گ -ث -ض -ذ -ۓ -ِ -ء -ظ -ً -ي -ُ -ۃ -أ -ٰ -ە -ژ -ۂ -ة -ّ -ك -ه -s -c -e -n -w -o -d -t -D -M -T -U -E -b -P -h -y -W -H -A -x -B -O -N -G -Y -Q -F -k -K -q -J -Z -f -z -X -' -@ -& -! -, -: -$ -- -# -? -% -é -+ -( -É diff --git a/backend/ppocr/utils/dict/xi_dict.txt b/backend/ppocr/utils/dict/xi_dict.txt deleted file mode 100644 index f195f1ea..00000000 --- a/backend/ppocr/utils/dict/xi_dict.txt +++ /dev/null @@ -1,110 +0,0 @@ -x -i -_ -m -g -/ -1 -0 -I -L -S -V -R -C -2 -v -a -l -3 -6 -4 -5 -. -j -p - -Q -u -e -r -o -8 -7 -n -c -9 -t -b -é -q -d -ó -y -F -s -, -O -í -T -f -" -U -M -h -: -P -H -A -E -D -z -N -á -ñ -ú -% -; -è -+ -Y -- -B -G -( -) -¿ -? -w -¡ -! -X -É -K -k -Á -ü -Ú -« -» -J -' -ö -W -Z -º -Ö -­ -[ -] -Ç -ç -à -ä -û -ò -Í -ê -ô -ø -ª diff --git a/backend/ppocr/utils/e2e_metric/Deteval.py b/backend/ppocr/utils/e2e_metric/Deteval.py deleted file mode 100755 index 45567a7d..00000000 --- a/backend/ppocr/utils/e2e_metric/Deteval.py +++ /dev/null @@ -1,574 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import scipy.io as io -from ppocr.utils.e2e_metric.polygon_fast import iod, area_of_intersection, area - - -def get_socre_A(gt_dir, pred_dict): - allInputs = 1 - - def input_reading_mod(pred_dict): - """This helper reads input from txt files""" - det = [] - n = len(pred_dict) - for i in range(n): - points = pred_dict[i]['points'] - text = pred_dict[i]['texts'] - point = ",".join(map(str, points.reshape(-1, ))) - det.append([point, text]) - return det - - def gt_reading_mod(gt_dict): - """This helper reads groundtruths from mat files""" - gt = [] - n = len(gt_dict) - for i in range(n): - points = gt_dict[i]['points'].tolist() - h = len(points) - text = gt_dict[i]['text'] - xx = [ - np.array( - ['x:'], dtype=' 1): - gt_x = list(map(int, np.squeeze(gt[1]))) - gt_y = list(map(int, np.squeeze(gt[3]))) - for det_id, detection in enumerate(detections): - detection_orig = detection - detection = [float(x) for x in detection[0].split(',')] - detection = list(map(int, detection)) - det_x = detection[0::2] - det_y = detection[1::2] - det_gt_iou = iod(det_x, det_y, gt_x, gt_y) - if det_gt_iou > threshold: - detections[det_id] = [] - - detections[:] = [item for item in detections if item != []] - return detections - - def sigma_calculation(det_x, det_y, gt_x, gt_y): - """ - sigma = inter_area / gt_area - """ - return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / - area(gt_x, gt_y)), 2) - - def tau_calculation(det_x, det_y, gt_x, gt_y): - if area(det_x, det_y) == 0.0: - return 0 - return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / - area(det_x, det_y)), 2) - - ##############################Initialization################################### - # global_sigma = [] - # global_tau = [] - # global_pred_str = [] - # global_gt_str = [] - ############################################################################### - - for input_id in range(allInputs): - if (input_id != '.DS_Store') and (input_id != 'Pascal_result.txt') and ( - input_id != 'Pascal_result_curved.txt') and (input_id != 'Pascal_result_non_curved.txt') and ( - input_id != 'Deteval_result.txt') and (input_id != 'Deteval_result_curved.txt') \ - and (input_id != 'Deteval_result_non_curved.txt'): - detections = input_reading_mod(pred_dict) - groundtruths = gt_reading_mod(gt_dir) - detections = detection_filtering( - detections, - groundtruths) # filters detections overlapping with DC area - dc_id = [] - for i in range(len(groundtruths)): - if groundtruths[i][5] == '#': - dc_id.append(i) - cnt = 0 - for a in dc_id: - num = a - cnt - del groundtruths[num] - cnt += 1 - - local_sigma_table = np.zeros((len(groundtruths), len(detections))) - local_tau_table = np.zeros((len(groundtruths), len(detections))) - local_pred_str = {} - local_gt_str = {} - - for gt_id, gt in enumerate(groundtruths): - if len(detections) > 0: - for det_id, detection in enumerate(detections): - detection_orig = detection - detection = [float(x) for x in detection[0].split(',')] - detection = list(map(int, detection)) - pred_seq_str = detection_orig[1].strip() - det_x = detection[0::2] - det_y = detection[1::2] - gt_x = list(map(int, np.squeeze(gt[1]))) - gt_y = list(map(int, np.squeeze(gt[3]))) - gt_seq_str = str(gt[4].tolist()[0]) - - local_sigma_table[gt_id, det_id] = sigma_calculation( - det_x, det_y, gt_x, gt_y) - local_tau_table[gt_id, det_id] = tau_calculation( - det_x, det_y, gt_x, gt_y) - local_pred_str[det_id] = pred_seq_str - local_gt_str[gt_id] = gt_seq_str - - global_sigma = local_sigma_table - global_tau = local_tau_table - global_pred_str = local_pred_str - global_gt_str = local_gt_str - - single_data = {} - single_data['sigma'] = global_sigma - single_data['global_tau'] = global_tau - single_data['global_pred_str'] = global_pred_str - single_data['global_gt_str'] = global_gt_str - return single_data - - -def get_socre_B(gt_dir, img_id, pred_dict): - allInputs = 1 - - def input_reading_mod(pred_dict): - """This helper reads input from txt files""" - det = [] - n = len(pred_dict) - for i in range(n): - points = pred_dict[i]['points'] - text = pred_dict[i]['texts'] - point = ",".join(map(str, points.reshape(-1, ))) - det.append([point, text]) - return det - - def gt_reading_mod(gt_dir, gt_id): - gt = io.loadmat('%s/poly_gt_img%s.mat' % (gt_dir, gt_id)) - gt = gt['polygt'] - return gt - - def detection_filtering(detections, groundtruths, threshold=0.5): - for gt_id, gt in enumerate(groundtruths): - if (gt[5] == '#') and (gt[1].shape[1] > 1): - gt_x = list(map(int, np.squeeze(gt[1]))) - gt_y = list(map(int, np.squeeze(gt[3]))) - for det_id, detection in enumerate(detections): - detection_orig = detection - detection = [float(x) for x in detection[0].split(',')] - detection = list(map(int, detection)) - det_x = detection[0::2] - det_y = detection[1::2] - det_gt_iou = iod(det_x, det_y, gt_x, gt_y) - if det_gt_iou > threshold: - detections[det_id] = [] - - detections[:] = [item for item in detections if item != []] - return detections - - def sigma_calculation(det_x, det_y, gt_x, gt_y): - """ - sigma = inter_area / gt_area - """ - return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / - area(gt_x, gt_y)), 2) - - def tau_calculation(det_x, det_y, gt_x, gt_y): - if area(det_x, det_y) == 0.0: - return 0 - return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / - area(det_x, det_y)), 2) - - ##############################Initialization################################### - # global_sigma = [] - # global_tau = [] - # global_pred_str = [] - # global_gt_str = [] - ############################################################################### - - for input_id in range(allInputs): - if (input_id != '.DS_Store') and (input_id != 'Pascal_result.txt') and ( - input_id != 'Pascal_result_curved.txt') and (input_id != 'Pascal_result_non_curved.txt') and ( - input_id != 'Deteval_result.txt') and (input_id != 'Deteval_result_curved.txt') \ - and (input_id != 'Deteval_result_non_curved.txt'): - detections = input_reading_mod(pred_dict) - groundtruths = gt_reading_mod(gt_dir, img_id).tolist() - detections = detection_filtering( - detections, - groundtruths) # filters detections overlapping with DC area - dc_id = [] - for i in range(len(groundtruths)): - if groundtruths[i][5] == '#': - dc_id.append(i) - cnt = 0 - for a in dc_id: - num = a - cnt - del groundtruths[num] - cnt += 1 - - local_sigma_table = np.zeros((len(groundtruths), len(detections))) - local_tau_table = np.zeros((len(groundtruths), len(detections))) - local_pred_str = {} - local_gt_str = {} - - for gt_id, gt in enumerate(groundtruths): - if len(detections) > 0: - for det_id, detection in enumerate(detections): - detection_orig = detection - detection = [float(x) for x in detection[0].split(',')] - detection = list(map(int, detection)) - pred_seq_str = detection_orig[1].strip() - det_x = detection[0::2] - det_y = detection[1::2] - gt_x = list(map(int, np.squeeze(gt[1]))) - gt_y = list(map(int, np.squeeze(gt[3]))) - gt_seq_str = str(gt[4].tolist()[0]) - - local_sigma_table[gt_id, det_id] = sigma_calculation( - det_x, det_y, gt_x, gt_y) - local_tau_table[gt_id, det_id] = tau_calculation( - det_x, det_y, gt_x, gt_y) - local_pred_str[det_id] = pred_seq_str - local_gt_str[gt_id] = gt_seq_str - - global_sigma = local_sigma_table - global_tau = local_tau_table - global_pred_str = local_pred_str - global_gt_str = local_gt_str - - single_data = {} - single_data['sigma'] = global_sigma - single_data['global_tau'] = global_tau - single_data['global_pred_str'] = global_pred_str - single_data['global_gt_str'] = global_gt_str - return single_data - - -def combine_results(all_data): - tr = 0.7 - tp = 0.6 - fsc_k = 0.8 - k = 2 - global_sigma = [] - global_tau = [] - global_pred_str = [] - global_gt_str = [] - for data in all_data: - global_sigma.append(data['sigma']) - global_tau.append(data['global_tau']) - global_pred_str.append(data['global_pred_str']) - global_gt_str.append(data['global_gt_str']) - - global_accumulative_recall = 0 - global_accumulative_precision = 0 - total_num_gt = 0 - total_num_det = 0 - hit_str_count = 0 - hit_count = 0 - - def one_to_one(local_sigma_table, local_tau_table, - local_accumulative_recall, local_accumulative_precision, - global_accumulative_recall, global_accumulative_precision, - gt_flag, det_flag, idy): - hit_str_num = 0 - for gt_id in range(num_gt): - gt_matching_qualified_sigma_candidates = np.where( - local_sigma_table[gt_id, :] > tr) - gt_matching_num_qualified_sigma_candidates = gt_matching_qualified_sigma_candidates[ - 0].shape[0] - gt_matching_qualified_tau_candidates = np.where( - local_tau_table[gt_id, :] > tp) - gt_matching_num_qualified_tau_candidates = gt_matching_qualified_tau_candidates[ - 0].shape[0] - - det_matching_qualified_sigma_candidates = np.where( - local_sigma_table[:, gt_matching_qualified_sigma_candidates[0]] - > tr) - det_matching_num_qualified_sigma_candidates = det_matching_qualified_sigma_candidates[ - 0].shape[0] - det_matching_qualified_tau_candidates = np.where( - local_tau_table[:, gt_matching_qualified_tau_candidates[0]] > - tp) - det_matching_num_qualified_tau_candidates = det_matching_qualified_tau_candidates[ - 0].shape[0] - - if (gt_matching_num_qualified_sigma_candidates == 1) and (gt_matching_num_qualified_tau_candidates == 1) and \ - (det_matching_num_qualified_sigma_candidates == 1) and ( - det_matching_num_qualified_tau_candidates == 1): - global_accumulative_recall = global_accumulative_recall + 1.0 - global_accumulative_precision = global_accumulative_precision + 1.0 - local_accumulative_recall = local_accumulative_recall + 1.0 - local_accumulative_precision = local_accumulative_precision + 1.0 - - gt_flag[0, gt_id] = 1 - matched_det_id = np.where(local_sigma_table[gt_id, :] > tr) - # recg start - gt_str_cur = global_gt_str[idy][gt_id] - pred_str_cur = global_pred_str[idy][matched_det_id[0].tolist()[ - 0]] - if pred_str_cur == gt_str_cur: - hit_str_num += 1 - else: - if pred_str_cur.lower() == gt_str_cur.lower(): - hit_str_num += 1 - # recg end - det_flag[0, matched_det_id] = 1 - return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag, hit_str_num - - def one_to_many(local_sigma_table, local_tau_table, - local_accumulative_recall, local_accumulative_precision, - global_accumulative_recall, global_accumulative_precision, - gt_flag, det_flag, idy): - hit_str_num = 0 - for gt_id in range(num_gt): - # skip the following if the groundtruth was matched - if gt_flag[0, gt_id] > 0: - continue - - non_zero_in_sigma = np.where(local_sigma_table[gt_id, :] > 0) - num_non_zero_in_sigma = non_zero_in_sigma[0].shape[0] - - if num_non_zero_in_sigma >= k: - ####search for all detections that overlaps with this groundtruth - qualified_tau_candidates = np.where((local_tau_table[ - gt_id, :] >= tp) & (det_flag[0, :] == 0)) - num_qualified_tau_candidates = qualified_tau_candidates[ - 0].shape[0] - - if num_qualified_tau_candidates == 1: - if ((local_tau_table[gt_id, qualified_tau_candidates] >= tp) - and - (local_sigma_table[gt_id, qualified_tau_candidates] >= - tr)): - # became an one-to-one case - global_accumulative_recall = global_accumulative_recall + 1.0 - global_accumulative_precision = global_accumulative_precision + 1.0 - local_accumulative_recall = local_accumulative_recall + 1.0 - local_accumulative_precision = local_accumulative_precision + 1.0 - - gt_flag[0, gt_id] = 1 - det_flag[0, qualified_tau_candidates] = 1 - # recg start - gt_str_cur = global_gt_str[idy][gt_id] - pred_str_cur = global_pred_str[idy][ - qualified_tau_candidates[0].tolist()[0]] - if pred_str_cur == gt_str_cur: - hit_str_num += 1 - else: - if pred_str_cur.lower() == gt_str_cur.lower(): - hit_str_num += 1 - # recg end - elif (np.sum(local_sigma_table[gt_id, qualified_tau_candidates]) - >= tr): - gt_flag[0, gt_id] = 1 - det_flag[0, qualified_tau_candidates] = 1 - # recg start - gt_str_cur = global_gt_str[idy][gt_id] - pred_str_cur = global_pred_str[idy][ - qualified_tau_candidates[0].tolist()[0]] - if pred_str_cur == gt_str_cur: - hit_str_num += 1 - else: - if pred_str_cur.lower() == gt_str_cur.lower(): - hit_str_num += 1 - # recg end - - global_accumulative_recall = global_accumulative_recall + fsc_k - global_accumulative_precision = global_accumulative_precision + num_qualified_tau_candidates * fsc_k - - local_accumulative_recall = local_accumulative_recall + fsc_k - local_accumulative_precision = local_accumulative_precision + num_qualified_tau_candidates * fsc_k - - return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag, hit_str_num - - def many_to_one(local_sigma_table, local_tau_table, - local_accumulative_recall, local_accumulative_precision, - global_accumulative_recall, global_accumulative_precision, - gt_flag, det_flag, idy): - hit_str_num = 0 - for det_id in range(num_det): - # skip the following if the detection was matched - if det_flag[0, det_id] > 0: - continue - - non_zero_in_tau = np.where(local_tau_table[:, det_id] > 0) - num_non_zero_in_tau = non_zero_in_tau[0].shape[0] - - if num_non_zero_in_tau >= k: - ####search for all detections that overlaps with this groundtruth - qualified_sigma_candidates = np.where(( - local_sigma_table[:, det_id] >= tp) & (gt_flag[0, :] == 0)) - num_qualified_sigma_candidates = qualified_sigma_candidates[ - 0].shape[0] - - if num_qualified_sigma_candidates == 1: - if ((local_tau_table[qualified_sigma_candidates, det_id] >= - tp) and - (local_sigma_table[qualified_sigma_candidates, det_id] - >= tr)): - # became an one-to-one case - global_accumulative_recall = global_accumulative_recall + 1.0 - global_accumulative_precision = global_accumulative_precision + 1.0 - local_accumulative_recall = local_accumulative_recall + 1.0 - local_accumulative_precision = local_accumulative_precision + 1.0 - - gt_flag[0, qualified_sigma_candidates] = 1 - det_flag[0, det_id] = 1 - # recg start - pred_str_cur = global_pred_str[idy][det_id] - gt_len = len(qualified_sigma_candidates[0]) - for idx in range(gt_len): - ele_gt_id = qualified_sigma_candidates[0].tolist()[ - idx] - if ele_gt_id not in global_gt_str[idy]: - continue - gt_str_cur = global_gt_str[idy][ele_gt_id] - if pred_str_cur == gt_str_cur: - hit_str_num += 1 - break - else: - if pred_str_cur.lower() == gt_str_cur.lower(): - hit_str_num += 1 - break - # recg end - elif (np.sum(local_tau_table[qualified_sigma_candidates, - det_id]) >= tp): - det_flag[0, det_id] = 1 - gt_flag[0, qualified_sigma_candidates] = 1 - # recg start - pred_str_cur = global_pred_str[idy][det_id] - gt_len = len(qualified_sigma_candidates[0]) - for idx in range(gt_len): - ele_gt_id = qualified_sigma_candidates[0].tolist()[idx] - if ele_gt_id not in global_gt_str[idy]: - continue - gt_str_cur = global_gt_str[idy][ele_gt_id] - if pred_str_cur == gt_str_cur: - hit_str_num += 1 - break - else: - if pred_str_cur.lower() == gt_str_cur.lower(): - hit_str_num += 1 - break - # recg end - - global_accumulative_recall = global_accumulative_recall + num_qualified_sigma_candidates * fsc_k - global_accumulative_precision = global_accumulative_precision + fsc_k - - local_accumulative_recall = local_accumulative_recall + num_qualified_sigma_candidates * fsc_k - local_accumulative_precision = local_accumulative_precision + fsc_k - return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag, hit_str_num - - for idx in range(len(global_sigma)): - local_sigma_table = np.array(global_sigma[idx]) - local_tau_table = global_tau[idx] - - num_gt = local_sigma_table.shape[0] - num_det = local_sigma_table.shape[1] - - total_num_gt = total_num_gt + num_gt - total_num_det = total_num_det + num_det - - local_accumulative_recall = 0 - local_accumulative_precision = 0 - gt_flag = np.zeros((1, num_gt)) - det_flag = np.zeros((1, num_det)) - - #######first check for one-to-one case########## - local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \ - gt_flag, det_flag, hit_str_num = one_to_one(local_sigma_table, local_tau_table, - local_accumulative_recall, local_accumulative_precision, - global_accumulative_recall, global_accumulative_precision, - gt_flag, det_flag, idx) - - hit_str_count += hit_str_num - #######then check for one-to-many case########## - local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \ - gt_flag, det_flag, hit_str_num = one_to_many(local_sigma_table, local_tau_table, - local_accumulative_recall, local_accumulative_precision, - global_accumulative_recall, global_accumulative_precision, - gt_flag, det_flag, idx) - hit_str_count += hit_str_num - #######then check for many-to-one case########## - local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \ - gt_flag, det_flag, hit_str_num = many_to_one(local_sigma_table, local_tau_table, - local_accumulative_recall, local_accumulative_precision, - global_accumulative_recall, global_accumulative_precision, - gt_flag, det_flag, idx) - hit_str_count += hit_str_num - - try: - recall = global_accumulative_recall / total_num_gt - except ZeroDivisionError: - recall = 0 - - try: - precision = global_accumulative_precision / total_num_det - except ZeroDivisionError: - precision = 0 - - try: - f_score = 2 * precision * recall / (precision + recall) - except ZeroDivisionError: - f_score = 0 - - try: - seqerr = 1 - float(hit_str_count) / global_accumulative_recall - except ZeroDivisionError: - seqerr = 1 - - try: - recall_e2e = float(hit_str_count) / total_num_gt - except ZeroDivisionError: - recall_e2e = 0 - - try: - precision_e2e = float(hit_str_count) / total_num_det - except ZeroDivisionError: - precision_e2e = 0 - - try: - f_score_e2e = 2 * precision_e2e * recall_e2e / ( - precision_e2e + recall_e2e) - except ZeroDivisionError: - f_score_e2e = 0 - - final = { - 'total_num_gt': total_num_gt, - 'total_num_det': total_num_det, - 'global_accumulative_recall': global_accumulative_recall, - 'hit_str_count': hit_str_count, - 'recall': recall, - 'precision': precision, - 'f_score': f_score, - 'seqerr': seqerr, - 'recall_e2e': recall_e2e, - 'precision_e2e': precision_e2e, - 'f_score_e2e': f_score_e2e - } - return final diff --git a/backend/ppocr/utils/e2e_metric/polygon_fast.py b/backend/ppocr/utils/e2e_metric/polygon_fast.py deleted file mode 100755 index 81c9ad70..00000000 --- a/backend/ppocr/utils/e2e_metric/polygon_fast.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -from shapely.geometry import Polygon -""" -:param det_x: [1, N] Xs of detection's vertices -:param det_y: [1, N] Ys of detection's vertices -:param gt_x: [1, N] Xs of groundtruth's vertices -:param gt_y: [1, N] Ys of groundtruth's vertices - -############## -All the calculation of 'AREA' in this script is handled by: -1) First generating a binary mask with the polygon area filled up with 1's -2) Summing up all the 1's -""" - - -def area(x, y): - polygon = Polygon(np.stack([x, y], axis=1)) - return float(polygon.area) - - -def approx_area_of_intersection(det_x, det_y, gt_x, gt_y): - """ - This helper determine if both polygons are intersecting with each others with an approximation method. - Area of intersection represented by the minimum bounding rectangular [xmin, ymin, xmax, ymax] - """ - det_ymax = np.max(det_y) - det_xmax = np.max(det_x) - det_ymin = np.min(det_y) - det_xmin = np.min(det_x) - - gt_ymax = np.max(gt_y) - gt_xmax = np.max(gt_x) - gt_ymin = np.min(gt_y) - gt_xmin = np.min(gt_x) - - all_min_ymax = np.minimum(det_ymax, gt_ymax) - all_max_ymin = np.maximum(det_ymin, gt_ymin) - - intersect_heights = np.maximum(0.0, (all_min_ymax - all_max_ymin)) - - all_min_xmax = np.minimum(det_xmax, gt_xmax) - all_max_xmin = np.maximum(det_xmin, gt_xmin) - intersect_widths = np.maximum(0.0, (all_min_xmax - all_max_xmin)) - - return intersect_heights * intersect_widths - - -def area_of_intersection(det_x, det_y, gt_x, gt_y): - p1 = Polygon(np.stack([det_x, det_y], axis=1)).buffer(0) - p2 = Polygon(np.stack([gt_x, gt_y], axis=1)).buffer(0) - return float(p1.intersection(p2).area) - - -def area_of_union(det_x, det_y, gt_x, gt_y): - p1 = Polygon(np.stack([det_x, det_y], axis=1)).buffer(0) - p2 = Polygon(np.stack([gt_x, gt_y], axis=1)).buffer(0) - return float(p1.union(p2).area) - - -def iou(det_x, det_y, gt_x, gt_y): - return area_of_intersection(det_x, det_y, gt_x, gt_y) / ( - area_of_union(det_x, det_y, gt_x, gt_y) + 1.0) - - -def iod(det_x, det_y, gt_x, gt_y): - """ - This helper determine the fraction of intersection area over detection area - """ - return area_of_intersection(det_x, det_y, gt_x, gt_y) / ( - area(det_x, det_y) + 1.0) diff --git a/backend/ppocr/utils/e2e_utils/extract_batchsize.py b/backend/ppocr/utils/e2e_utils/extract_batchsize.py deleted file mode 100644 index e99a833e..00000000 --- a/backend/ppocr/utils/e2e_utils/extract_batchsize.py +++ /dev/null @@ -1,87 +0,0 @@ -import paddle -import numpy as np -import copy - - -def org_tcl_rois(batch_size, pos_lists, pos_masks, label_lists, tcl_bs): - """ - """ - pos_lists_, pos_masks_, label_lists_ = [], [], [] - img_bs = batch_size - ngpu = int(batch_size / img_bs) - img_ids = np.array(pos_lists, dtype=np.int32)[:, 0, 0].copy() - pos_lists_split, pos_masks_split, label_lists_split = [], [], [] - for i in range(ngpu): - pos_lists_split.append([]) - pos_masks_split.append([]) - label_lists_split.append([]) - - for i in range(img_ids.shape[0]): - img_id = img_ids[i] - gpu_id = int(img_id / img_bs) - img_id = img_id % img_bs - pos_list = pos_lists[i].copy() - pos_list[:, 0] = img_id - pos_lists_split[gpu_id].append(pos_list) - pos_masks_split[gpu_id].append(pos_masks[i].copy()) - label_lists_split[gpu_id].append(copy.deepcopy(label_lists[i])) - # repeat or delete - for i in range(ngpu): - vp_len = len(pos_lists_split[i]) - if vp_len <= tcl_bs: - for j in range(0, tcl_bs - vp_len): - pos_list = pos_lists_split[i][j].copy() - pos_lists_split[i].append(pos_list) - pos_mask = pos_masks_split[i][j].copy() - pos_masks_split[i].append(pos_mask) - label_list = copy.deepcopy(label_lists_split[i][j]) - label_lists_split[i].append(label_list) - else: - for j in range(0, vp_len - tcl_bs): - c_len = len(pos_lists_split[i]) - pop_id = np.random.permutation(c_len)[0] - pos_lists_split[i].pop(pop_id) - pos_masks_split[i].pop(pop_id) - label_lists_split[i].pop(pop_id) - # merge - for i in range(ngpu): - pos_lists_.extend(pos_lists_split[i]) - pos_masks_.extend(pos_masks_split[i]) - label_lists_.extend(label_lists_split[i]) - return pos_lists_, pos_masks_, label_lists_ - - -def pre_process(label_list, pos_list, pos_mask, max_text_length, max_text_nums, - pad_num, tcl_bs): - label_list = label_list.numpy() - batch, _, _, _ = label_list.shape - pos_list = pos_list.numpy() - pos_mask = pos_mask.numpy() - pos_list_t = [] - pos_mask_t = [] - label_list_t = [] - for i in range(batch): - for j in range(max_text_nums): - if pos_mask[i, j].any(): - pos_list_t.append(pos_list[i][j]) - pos_mask_t.append(pos_mask[i][j]) - label_list_t.append(label_list[i][j]) - pos_list, pos_mask, label_list = org_tcl_rois(batch, pos_list_t, pos_mask_t, - label_list_t, tcl_bs) - label = [] - tt = [l.tolist() for l in label_list] - for i in range(tcl_bs): - k = 0 - for j in range(max_text_length): - if tt[i][j][0] != pad_num: - k += 1 - else: - break - label.append(k) - label = paddle.to_tensor(label) - label = paddle.cast(label, dtype='int64') - pos_list = paddle.to_tensor(pos_list) - pos_mask = paddle.to_tensor(pos_mask) - label_list = paddle.squeeze(paddle.to_tensor(label_list), axis=2) - label_list = paddle.cast(label_list, dtype='int32') - return pos_list, pos_mask, label_list, label diff --git a/backend/ppocr/utils/e2e_utils/extract_textpoint_fast.py b/backend/ppocr/utils/e2e_utils/extract_textpoint_fast.py deleted file mode 100644 index 787cd301..00000000 --- a/backend/ppocr/utils/e2e_utils/extract_textpoint_fast.py +++ /dev/null @@ -1,457 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Contains various CTC decoders.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import cv2 -import math - -import numpy as np -from itertools import groupby -from skimage.morphology._skeletonize import thin - - -def get_dict(character_dict_path): - character_str = "" - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n").strip("\r\n") - character_str += line - dict_character = list(character_str) - return dict_character - - -def softmax(logits): - """ - logits: N x d - """ - max_value = np.max(logits, axis=1, keepdims=True) - exp = np.exp(logits - max_value) - exp_sum = np.sum(exp, axis=1, keepdims=True) - dist = exp / exp_sum - return dist - - -def get_keep_pos_idxs(labels, remove_blank=None): - """ - Remove duplicate and get pos idxs of keep items. - The value of keep_blank should be [None, 95]. - """ - duplicate_len_list = [] - keep_pos_idx_list = [] - keep_char_idx_list = [] - for k, v_ in groupby(labels): - current_len = len(list(v_)) - if k != remove_blank: - current_idx = int(sum(duplicate_len_list) + current_len // 2) - keep_pos_idx_list.append(current_idx) - keep_char_idx_list.append(k) - duplicate_len_list.append(current_len) - return keep_char_idx_list, keep_pos_idx_list - - -def remove_blank(labels, blank=0): - new_labels = [x for x in labels if x != blank] - return new_labels - - -def insert_blank(labels, blank=0): - new_labels = [blank] - for l in labels: - new_labels += [l, blank] - return new_labels - - -def ctc_greedy_decoder(probs_seq, blank=95, keep_blank_in_idxs=True): - """ - CTC greedy (best path) decoder. - """ - raw_str = np.argmax(np.array(probs_seq), axis=1) - remove_blank_in_pos = None if keep_blank_in_idxs else blank - dedup_str, keep_idx_list = get_keep_pos_idxs( - raw_str, remove_blank=remove_blank_in_pos) - dst_str = remove_blank(dedup_str, blank=blank) - return dst_str, keep_idx_list - - -def instance_ctc_greedy_decoder(gather_info, logits_map, pts_num=4): - _, _, C = logits_map.shape - ys, xs = zip(*gather_info) - logits_seq = logits_map[list(ys), list(xs)] - probs_seq = logits_seq - labels = np.argmax(probs_seq, axis=1) - dst_str = [k for k, v_ in groupby(labels) if k != C - 1] - detal = len(gather_info) // (pts_num - 1) - keep_idx_list = [0] + [detal * (i + 1) for i in range(pts_num - 2)] + [-1] - keep_gather_list = [gather_info[idx] for idx in keep_idx_list] - return dst_str, keep_gather_list - - -def ctc_decoder_for_image(gather_info_list, - logits_map, - Lexicon_Table, - pts_num=6): - """ - CTC decoder using multiple processes. - """ - decoder_str = [] - decoder_xys = [] - for gather_info in gather_info_list: - if len(gather_info) < pts_num: - continue - dst_str, xys_list = instance_ctc_greedy_decoder( - gather_info, logits_map, pts_num=pts_num) - dst_str_readable = ''.join([Lexicon_Table[idx] for idx in dst_str]) - if len(dst_str_readable) < 2: - continue - decoder_str.append(dst_str_readable) - decoder_xys.append(xys_list) - return decoder_str, decoder_xys - - -def sort_with_direction(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - - def sort_part_with_direction(pos_list, point_direction): - pos_list = np.array(pos_list).reshape(-1, 2) - point_direction = np.array(point_direction).reshape(-1, 2) - average_direction = np.mean(point_direction, axis=0, keepdims=True) - pos_proj_leng = np.sum(pos_list * average_direction, axis=1) - sorted_list = pos_list[np.argsort(pos_proj_leng)].tolist() - sorted_direction = point_direction[np.argsort(pos_proj_leng)].tolist() - return sorted_list, sorted_direction - - pos_list = np.array(pos_list).reshape(-1, 2) - point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]] # x, y - point_direction = point_direction[:, ::-1] # x, y -> y, x - sorted_point, sorted_direction = sort_part_with_direction(pos_list, - point_direction) - - point_num = len(sorted_point) - if point_num >= 16: - middle_num = point_num // 2 - first_part_point = sorted_point[:middle_num] - first_point_direction = sorted_direction[:middle_num] - sorted_fist_part_point, sorted_fist_part_direction = sort_part_with_direction( - first_part_point, first_point_direction) - - last_part_point = sorted_point[middle_num:] - last_point_direction = sorted_direction[middle_num:] - sorted_last_part_point, sorted_last_part_direction = sort_part_with_direction( - last_part_point, last_point_direction) - sorted_point = sorted_fist_part_point + sorted_last_part_point - sorted_direction = sorted_fist_part_direction + sorted_last_part_direction - - return sorted_point, np.array(sorted_direction) - - -def add_id(pos_list, image_id=0): - """ - Add id for gather feature, for inference. - """ - new_list = [] - for item in pos_list: - new_list.append((image_id, item[0], item[1])) - return new_list - - -def sort_and_expand_with_direction(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - h, w, _ = f_direction.shape - sorted_list, point_direction = sort_with_direction(pos_list, f_direction) - - point_num = len(sorted_list) - sub_direction_len = max(point_num // 3, 2) - left_direction = point_direction[:sub_direction_len, :] - right_dirction = point_direction[point_num - sub_direction_len:, :] - - left_average_direction = -np.mean(left_direction, axis=0, keepdims=True) - left_average_len = np.linalg.norm(left_average_direction) - left_start = np.array(sorted_list[0]) - left_step = left_average_direction / (left_average_len + 1e-6) - - right_average_direction = np.mean(right_dirction, axis=0, keepdims=True) - right_average_len = np.linalg.norm(right_average_direction) - right_step = right_average_direction / (right_average_len + 1e-6) - right_start = np.array(sorted_list[-1]) - - append_num = max( - int((left_average_len + right_average_len) / 2.0 * 0.15), 1) - left_list = [] - right_list = [] - for i in range(append_num): - ly, lx = np.round(left_start + left_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ly < h and lx < w and (ly, lx) not in left_list: - left_list.append((ly, lx)) - ry, rx = np.round(right_start + right_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ry < h and rx < w and (ry, rx) not in right_list: - right_list.append((ry, rx)) - - all_list = left_list[::-1] + sorted_list + right_list - return all_list - - -def sort_and_expand_with_direction_v2(pos_list, f_direction, binary_tcl_map): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - binary_tcl_map: h x w - """ - h, w, _ = f_direction.shape - sorted_list, point_direction = sort_with_direction(pos_list, f_direction) - - point_num = len(sorted_list) - sub_direction_len = max(point_num // 3, 2) - left_direction = point_direction[:sub_direction_len, :] - right_dirction = point_direction[point_num - sub_direction_len:, :] - - left_average_direction = -np.mean(left_direction, axis=0, keepdims=True) - left_average_len = np.linalg.norm(left_average_direction) - left_start = np.array(sorted_list[0]) - left_step = left_average_direction / (left_average_len + 1e-6) - - right_average_direction = np.mean(right_dirction, axis=0, keepdims=True) - right_average_len = np.linalg.norm(right_average_direction) - right_step = right_average_direction / (right_average_len + 1e-6) - right_start = np.array(sorted_list[-1]) - - append_num = max( - int((left_average_len + right_average_len) / 2.0 * 0.15), 1) - max_append_num = 2 * append_num - - left_list = [] - right_list = [] - for i in range(max_append_num): - ly, lx = np.round(left_start + left_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ly < h and lx < w and (ly, lx) not in left_list: - if binary_tcl_map[ly, lx] > 0.5: - left_list.append((ly, lx)) - else: - break - - for i in range(max_append_num): - ry, rx = np.round(right_start + right_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ry < h and rx < w and (ry, rx) not in right_list: - if binary_tcl_map[ry, rx] > 0.5: - right_list.append((ry, rx)) - else: - break - - all_list = left_list[::-1] + sorted_list + right_list - return all_list - - -def point_pair2poly(point_pair_list): - """ - Transfer vertical point_pairs into poly point in clockwise. - """ - point_num = len(point_pair_list) * 2 - point_list = [0] * point_num - for idx, point_pair in enumerate(point_pair_list): - point_list[idx] = point_pair[0] - point_list[point_num - 1 - idx] = point_pair[1] - return np.array(point_list).reshape(-1, 2) - - -def shrink_quad_along_width(quad, begin_width_ratio=0., end_width_ratio=1.): - ratio_pair = np.array( - [[begin_width_ratio], [end_width_ratio]], dtype=np.float32) - p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair - p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair - return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]]) - - -def expand_poly_along_width(poly, shrink_ratio_of_width=0.3): - """ - expand poly along width. - """ - point_num = poly.shape[0] - left_quad = np.array( - [poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32) - left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \ - (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6) - left_quad_expand = shrink_quad_along_width(left_quad, left_ratio, 1.0) - right_quad = np.array( - [ - poly[point_num // 2 - 2], poly[point_num // 2 - 1], - poly[point_num // 2], poly[point_num // 2 + 1] - ], - dtype=np.float32) - right_ratio = 1.0 + shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \ - (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6) - right_quad_expand = shrink_quad_along_width(right_quad, 0.0, right_ratio) - poly[0] = left_quad_expand[0] - poly[-1] = left_quad_expand[-1] - poly[point_num // 2 - 1] = right_quad_expand[1] - poly[point_num // 2] = right_quad_expand[2] - return poly - - -def restore_poly(instance_yxs_list, seq_strs, p_border, ratio_w, ratio_h, src_w, - src_h, valid_set): - poly_list = [] - keep_str_list = [] - for yx_center_line, keep_str in zip(instance_yxs_list, seq_strs): - if len(keep_str) < 2: - print('--> too short, {}'.format(keep_str)) - continue - - offset_expand = 1.0 - if valid_set == 'totaltext': - offset_expand = 1.2 - - point_pair_list = [] - for y, x in yx_center_line: - offset = p_border[:, y, x].reshape(2, 2) * offset_expand - ori_yx = np.array([y, x], dtype=np.float32) - point_pair = (ori_yx + offset)[:, ::-1] * 4.0 / np.array( - [ratio_w, ratio_h]).reshape(-1, 2) - point_pair_list.append(point_pair) - - detected_poly = point_pair2poly(point_pair_list) - detected_poly = expand_poly_along_width( - detected_poly, shrink_ratio_of_width=0.2) - detected_poly[:, 0] = np.clip(detected_poly[:, 0], a_min=0, a_max=src_w) - detected_poly[:, 1] = np.clip(detected_poly[:, 1], a_min=0, a_max=src_h) - - keep_str_list.append(keep_str) - if valid_set == 'partvgg': - middle_point = len(detected_poly) // 2 - detected_poly = detected_poly[ - [0, middle_point - 1, middle_point, -1], :] - poly_list.append(detected_poly) - elif valid_set == 'totaltext': - poly_list.append(detected_poly) - else: - print('--> Not supported format.') - exit(-1) - return poly_list, keep_str_list - - -def generate_pivot_list_fast(p_score, - p_char_maps, - f_direction, - Lexicon_Table, - score_thresh=0.5): - """ - return center point and end point of TCL instance; filter with the char maps; - """ - p_score = p_score[0] - f_direction = f_direction.transpose(1, 2, 0) - p_tcl_map = (p_score > score_thresh) * 1.0 - skeleton_map = thin(p_tcl_map.astype(np.uint8)) - instance_count, instance_label_map = cv2.connectedComponents( - skeleton_map.astype(np.uint8), connectivity=8) - - # get TCL Instance - all_pos_yxs = [] - if instance_count > 0: - for instance_id in range(1, instance_count): - pos_list = [] - ys, xs = np.where(instance_label_map == instance_id) - pos_list = list(zip(ys, xs)) - - if len(pos_list) < 3: - continue - - pos_list_sorted = sort_and_expand_with_direction_v2( - pos_list, f_direction, p_tcl_map) - all_pos_yxs.append(pos_list_sorted) - - p_char_maps = p_char_maps.transpose([1, 2, 0]) - decoded_str, keep_yxs_list = ctc_decoder_for_image( - all_pos_yxs, logits_map=p_char_maps, Lexicon_Table=Lexicon_Table) - return keep_yxs_list, decoded_str - - -def extract_main_direction(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - pos_list = np.array(pos_list) - point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]] - point_direction = point_direction[:, ::-1] # x, y -> y, x - average_direction = np.mean(point_direction, axis=0, keepdims=True) - average_direction = average_direction / ( - np.linalg.norm(average_direction) + 1e-6) - return average_direction - - -def sort_by_direction_with_image_id_deprecated(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[id, y, x], [id, y, x], [id, y, x] ...] - """ - pos_list_full = np.array(pos_list).reshape(-1, 3) - pos_list = pos_list_full[:, 1:] - point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]] # x, y - point_direction = point_direction[:, ::-1] # x, y -> y, x - average_direction = np.mean(point_direction, axis=0, keepdims=True) - pos_proj_leng = np.sum(pos_list * average_direction, axis=1) - sorted_list = pos_list_full[np.argsort(pos_proj_leng)].tolist() - return sorted_list - - -def sort_by_direction_with_image_id(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - - def sort_part_with_direction(pos_list_full, point_direction): - pos_list_full = np.array(pos_list_full).reshape(-1, 3) - pos_list = pos_list_full[:, 1:] - point_direction = np.array(point_direction).reshape(-1, 2) - average_direction = np.mean(point_direction, axis=0, keepdims=True) - pos_proj_leng = np.sum(pos_list * average_direction, axis=1) - sorted_list = pos_list_full[np.argsort(pos_proj_leng)].tolist() - sorted_direction = point_direction[np.argsort(pos_proj_leng)].tolist() - return sorted_list, sorted_direction - - pos_list = np.array(pos_list).reshape(-1, 3) - point_direction = f_direction[pos_list[:, 1], pos_list[:, 2]] # x, y - point_direction = point_direction[:, ::-1] # x, y -> y, x - sorted_point, sorted_direction = sort_part_with_direction(pos_list, - point_direction) - - point_num = len(sorted_point) - if point_num >= 16: - middle_num = point_num // 2 - first_part_point = sorted_point[:middle_num] - first_point_direction = sorted_direction[:middle_num] - sorted_fist_part_point, sorted_fist_part_direction = sort_part_with_direction( - first_part_point, first_point_direction) - - last_part_point = sorted_point[middle_num:] - last_point_direction = sorted_direction[middle_num:] - sorted_last_part_point, sorted_last_part_direction = sort_part_with_direction( - last_part_point, last_point_direction) - sorted_point = sorted_fist_part_point + sorted_last_part_point - sorted_direction = sorted_fist_part_direction + sorted_last_part_direction - - return sorted_point diff --git a/backend/ppocr/utils/e2e_utils/extract_textpoint_slow.py b/backend/ppocr/utils/e2e_utils/extract_textpoint_slow.py deleted file mode 100644 index ace46fba..00000000 --- a/backend/ppocr/utils/e2e_utils/extract_textpoint_slow.py +++ /dev/null @@ -1,592 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Contains various CTC decoders.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import cv2 -import math - -import numpy as np -from itertools import groupby -from skimage.morphology._skeletonize import thin - - -def get_dict(character_dict_path): - character_str = "" - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n").strip("\r\n") - character_str += line - dict_character = list(character_str) - return dict_character - - -def point_pair2poly(point_pair_list): - """ - Transfer vertical point_pairs into poly point in clockwise. - """ - pair_length_list = [] - for point_pair in point_pair_list: - pair_length = np.linalg.norm(point_pair[0] - point_pair[1]) - pair_length_list.append(pair_length) - pair_length_list = np.array(pair_length_list) - pair_info = (pair_length_list.max(), pair_length_list.min(), - pair_length_list.mean()) - - point_num = len(point_pair_list) * 2 - point_list = [0] * point_num - for idx, point_pair in enumerate(point_pair_list): - point_list[idx] = point_pair[0] - point_list[point_num - 1 - idx] = point_pair[1] - return np.array(point_list).reshape(-1, 2), pair_info - - -def shrink_quad_along_width(quad, begin_width_ratio=0., end_width_ratio=1.): - """ - Generate shrink_quad_along_width. - """ - ratio_pair = np.array( - [[begin_width_ratio], [end_width_ratio]], dtype=np.float32) - p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair - p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair - return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]]) - - -def expand_poly_along_width(poly, shrink_ratio_of_width=0.3): - """ - expand poly along width. - """ - point_num = poly.shape[0] - left_quad = np.array( - [poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32) - left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \ - (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6) - left_quad_expand = shrink_quad_along_width(left_quad, left_ratio, 1.0) - right_quad = np.array( - [ - poly[point_num // 2 - 2], poly[point_num // 2 - 1], - poly[point_num // 2], poly[point_num // 2 + 1] - ], - dtype=np.float32) - right_ratio = 1.0 + \ - shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \ - (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6) - right_quad_expand = shrink_quad_along_width(right_quad, 0.0, right_ratio) - poly[0] = left_quad_expand[0] - poly[-1] = left_quad_expand[-1] - poly[point_num // 2 - 1] = right_quad_expand[1] - poly[point_num // 2] = right_quad_expand[2] - return poly - - -def softmax(logits): - """ - logits: N x d - """ - max_value = np.max(logits, axis=1, keepdims=True) - exp = np.exp(logits - max_value) - exp_sum = np.sum(exp, axis=1, keepdims=True) - dist = exp / exp_sum - return dist - - -def get_keep_pos_idxs(labels, remove_blank=None): - """ - Remove duplicate and get pos idxs of keep items. - The value of keep_blank should be [None, 95]. - """ - duplicate_len_list = [] - keep_pos_idx_list = [] - keep_char_idx_list = [] - for k, v_ in groupby(labels): - current_len = len(list(v_)) - if k != remove_blank: - current_idx = int(sum(duplicate_len_list) + current_len // 2) - keep_pos_idx_list.append(current_idx) - keep_char_idx_list.append(k) - duplicate_len_list.append(current_len) - return keep_char_idx_list, keep_pos_idx_list - - -def remove_blank(labels, blank=0): - new_labels = [x for x in labels if x != blank] - return new_labels - - -def insert_blank(labels, blank=0): - new_labels = [blank] - for l in labels: - new_labels += [l, blank] - return new_labels - - -def ctc_greedy_decoder(probs_seq, blank=95, keep_blank_in_idxs=True): - """ - CTC greedy (best path) decoder. - """ - raw_str = np.argmax(np.array(probs_seq), axis=1) - remove_blank_in_pos = None if keep_blank_in_idxs else blank - dedup_str, keep_idx_list = get_keep_pos_idxs( - raw_str, remove_blank=remove_blank_in_pos) - dst_str = remove_blank(dedup_str, blank=blank) - return dst_str, keep_idx_list - - -def instance_ctc_greedy_decoder(gather_info, - logits_map, - keep_blank_in_idxs=True): - """ - gather_info: [[x, y], [x, y] ...] - logits_map: H x W X (n_chars + 1) - """ - _, _, C = logits_map.shape - ys, xs = zip(*gather_info) - logits_seq = logits_map[list(ys), list(xs)] # n x 96 - probs_seq = softmax(logits_seq) - dst_str, keep_idx_list = ctc_greedy_decoder( - probs_seq, blank=C - 1, keep_blank_in_idxs=keep_blank_in_idxs) - keep_gather_list = [gather_info[idx] for idx in keep_idx_list] - return dst_str, keep_gather_list - - -def ctc_decoder_for_image(gather_info_list, logits_map, - keep_blank_in_idxs=True): - """ - CTC decoder using multiple processes. - """ - decoder_results = [] - for gather_info in gather_info_list: - res = instance_ctc_greedy_decoder( - gather_info, logits_map, keep_blank_in_idxs=keep_blank_in_idxs) - decoder_results.append(res) - return decoder_results - - -def sort_with_direction(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - - def sort_part_with_direction(pos_list, point_direction): - pos_list = np.array(pos_list).reshape(-1, 2) - point_direction = np.array(point_direction).reshape(-1, 2) - average_direction = np.mean(point_direction, axis=0, keepdims=True) - pos_proj_leng = np.sum(pos_list * average_direction, axis=1) - sorted_list = pos_list[np.argsort(pos_proj_leng)].tolist() - sorted_direction = point_direction[np.argsort(pos_proj_leng)].tolist() - return sorted_list, sorted_direction - - pos_list = np.array(pos_list).reshape(-1, 2) - point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]] # x, y - point_direction = point_direction[:, ::-1] # x, y -> y, x - sorted_point, sorted_direction = sort_part_with_direction(pos_list, - point_direction) - - point_num = len(sorted_point) - if point_num >= 16: - middle_num = point_num // 2 - first_part_point = sorted_point[:middle_num] - first_point_direction = sorted_direction[:middle_num] - sorted_fist_part_point, sorted_fist_part_direction = sort_part_with_direction( - first_part_point, first_point_direction) - - last_part_point = sorted_point[middle_num:] - last_point_direction = sorted_direction[middle_num:] - sorted_last_part_point, sorted_last_part_direction = sort_part_with_direction( - last_part_point, last_point_direction) - sorted_point = sorted_fist_part_point + sorted_last_part_point - sorted_direction = sorted_fist_part_direction + sorted_last_part_direction - - return sorted_point, np.array(sorted_direction) - - -def add_id(pos_list, image_id=0): - """ - Add id for gather feature, for inference. - """ - new_list = [] - for item in pos_list: - new_list.append((image_id, item[0], item[1])) - return new_list - - -def sort_and_expand_with_direction(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - h, w, _ = f_direction.shape - sorted_list, point_direction = sort_with_direction(pos_list, f_direction) - - # expand along - point_num = len(sorted_list) - sub_direction_len = max(point_num // 3, 2) - left_direction = point_direction[:sub_direction_len, :] - right_dirction = point_direction[point_num - sub_direction_len:, :] - - left_average_direction = -np.mean(left_direction, axis=0, keepdims=True) - left_average_len = np.linalg.norm(left_average_direction) - left_start = np.array(sorted_list[0]) - left_step = left_average_direction / (left_average_len + 1e-6) - - right_average_direction = np.mean(right_dirction, axis=0, keepdims=True) - right_average_len = np.linalg.norm(right_average_direction) - right_step = right_average_direction / (right_average_len + 1e-6) - right_start = np.array(sorted_list[-1]) - - append_num = max( - int((left_average_len + right_average_len) / 2.0 * 0.15), 1) - left_list = [] - right_list = [] - for i in range(append_num): - ly, lx = np.round(left_start + left_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ly < h and lx < w and (ly, lx) not in left_list: - left_list.append((ly, lx)) - ry, rx = np.round(right_start + right_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ry < h and rx < w and (ry, rx) not in right_list: - right_list.append((ry, rx)) - - all_list = left_list[::-1] + sorted_list + right_list - return all_list - - -def sort_and_expand_with_direction_v2(pos_list, f_direction, binary_tcl_map): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - binary_tcl_map: h x w - """ - h, w, _ = f_direction.shape - sorted_list, point_direction = sort_with_direction(pos_list, f_direction) - - # expand along - point_num = len(sorted_list) - sub_direction_len = max(point_num // 3, 2) - left_direction = point_direction[:sub_direction_len, :] - right_dirction = point_direction[point_num - sub_direction_len:, :] - - left_average_direction = -np.mean(left_direction, axis=0, keepdims=True) - left_average_len = np.linalg.norm(left_average_direction) - left_start = np.array(sorted_list[0]) - left_step = left_average_direction / (left_average_len + 1e-6) - - right_average_direction = np.mean(right_dirction, axis=0, keepdims=True) - right_average_len = np.linalg.norm(right_average_direction) - right_step = right_average_direction / (right_average_len + 1e-6) - right_start = np.array(sorted_list[-1]) - - append_num = max( - int((left_average_len + right_average_len) / 2.0 * 0.15), 1) - max_append_num = 2 * append_num - - left_list = [] - right_list = [] - for i in range(max_append_num): - ly, lx = np.round(left_start + left_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ly < h and lx < w and (ly, lx) not in left_list: - if binary_tcl_map[ly, lx] > 0.5: - left_list.append((ly, lx)) - else: - break - - for i in range(max_append_num): - ry, rx = np.round(right_start + right_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ry < h and rx < w and (ry, rx) not in right_list: - if binary_tcl_map[ry, rx] > 0.5: - right_list.append((ry, rx)) - else: - break - - all_list = left_list[::-1] + sorted_list + right_list - return all_list - - -def generate_pivot_list_curved(p_score, - p_char_maps, - f_direction, - score_thresh=0.5, - is_expand=True, - is_backbone=False, - image_id=0): - """ - return center point and end point of TCL instance; filter with the char maps; - """ - p_score = p_score[0] - f_direction = f_direction.transpose(1, 2, 0) - p_tcl_map = (p_score > score_thresh) * 1.0 - skeleton_map = thin(p_tcl_map) - instance_count, instance_label_map = cv2.connectedComponents( - skeleton_map.astype(np.uint8), connectivity=8) - - # get TCL Instance - all_pos_yxs = [] - center_pos_yxs = [] - end_points_yxs = [] - instance_center_pos_yxs = [] - pred_strs = [] - if instance_count > 0: - for instance_id in range(1, instance_count): - pos_list = [] - ys, xs = np.where(instance_label_map == instance_id) - pos_list = list(zip(ys, xs)) - - ### FIX-ME, eliminate outlier - if len(pos_list) < 3: - continue - - if is_expand: - pos_list_sorted = sort_and_expand_with_direction_v2( - pos_list, f_direction, p_tcl_map) - else: - pos_list_sorted, _ = sort_with_direction(pos_list, f_direction) - all_pos_yxs.append(pos_list_sorted) - - # use decoder to filter backgroud points. - p_char_maps = p_char_maps.transpose([1, 2, 0]) - decode_res = ctc_decoder_for_image( - all_pos_yxs, logits_map=p_char_maps, keep_blank_in_idxs=True) - for decoded_str, keep_yxs_list in decode_res: - if is_backbone: - keep_yxs_list_with_id = add_id(keep_yxs_list, image_id=image_id) - instance_center_pos_yxs.append(keep_yxs_list_with_id) - pred_strs.append(decoded_str) - else: - end_points_yxs.extend((keep_yxs_list[0], keep_yxs_list[-1])) - center_pos_yxs.extend(keep_yxs_list) - - if is_backbone: - return pred_strs, instance_center_pos_yxs - else: - return center_pos_yxs, end_points_yxs - - -def generate_pivot_list_horizontal(p_score, - p_char_maps, - f_direction, - score_thresh=0.5, - is_backbone=False, - image_id=0): - """ - return center point and end point of TCL instance; filter with the char maps; - """ - p_score = p_score[0] - f_direction = f_direction.transpose(1, 2, 0) - p_tcl_map_bi = (p_score > score_thresh) * 1.0 - instance_count, instance_label_map = cv2.connectedComponents( - p_tcl_map_bi.astype(np.uint8), connectivity=8) - - # get TCL Instance - all_pos_yxs = [] - center_pos_yxs = [] - end_points_yxs = [] - instance_center_pos_yxs = [] - - if instance_count > 0: - for instance_id in range(1, instance_count): - pos_list = [] - ys, xs = np.where(instance_label_map == instance_id) - pos_list = list(zip(ys, xs)) - - ### FIX-ME, eliminate outlier - if len(pos_list) < 5: - continue - - # add rule here - main_direction = extract_main_direction(pos_list, - f_direction) # y x - reference_directin = np.array([0, 1]).reshape([-1, 2]) # y x - is_h_angle = abs(np.sum( - main_direction * reference_directin)) < math.cos(math.pi / 180 * - 70) - - point_yxs = np.array(pos_list) - max_y, max_x = np.max(point_yxs, axis=0) - min_y, min_x = np.min(point_yxs, axis=0) - is_h_len = (max_y - min_y) < 1.5 * (max_x - min_x) - - pos_list_final = [] - if is_h_len: - xs = np.unique(xs) - for x in xs: - ys = instance_label_map[:, x].copy().reshape((-1, )) - y = int(np.where(ys == instance_id)[0].mean()) - pos_list_final.append((y, x)) - else: - ys = np.unique(ys) - for y in ys: - xs = instance_label_map[y, :].copy().reshape((-1, )) - x = int(np.where(xs == instance_id)[0].mean()) - pos_list_final.append((y, x)) - - pos_list_sorted, _ = sort_with_direction(pos_list_final, - f_direction) - all_pos_yxs.append(pos_list_sorted) - - # use decoder to filter backgroud points. - p_char_maps = p_char_maps.transpose([1, 2, 0]) - decode_res = ctc_decoder_for_image( - all_pos_yxs, logits_map=p_char_maps, keep_blank_in_idxs=True) - for decoded_str, keep_yxs_list in decode_res: - if is_backbone: - keep_yxs_list_with_id = add_id(keep_yxs_list, image_id=image_id) - instance_center_pos_yxs.append(keep_yxs_list_with_id) - else: - end_points_yxs.extend((keep_yxs_list[0], keep_yxs_list[-1])) - center_pos_yxs.extend(keep_yxs_list) - - if is_backbone: - return instance_center_pos_yxs - else: - return center_pos_yxs, end_points_yxs - - -def generate_pivot_list_slow(p_score, - p_char_maps, - f_direction, - score_thresh=0.5, - is_backbone=False, - is_curved=True, - image_id=0): - """ - Warp all the function together. - """ - if is_curved: - return generate_pivot_list_curved( - p_score, - p_char_maps, - f_direction, - score_thresh=score_thresh, - is_expand=True, - is_backbone=is_backbone, - image_id=image_id) - else: - return generate_pivot_list_horizontal( - p_score, - p_char_maps, - f_direction, - score_thresh=score_thresh, - is_backbone=is_backbone, - image_id=image_id) - - -# for refine module -def extract_main_direction(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - pos_list = np.array(pos_list) - point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]] - point_direction = point_direction[:, ::-1] # x, y -> y, x - average_direction = np.mean(point_direction, axis=0, keepdims=True) - average_direction = average_direction / ( - np.linalg.norm(average_direction) + 1e-6) - return average_direction - - -def sort_by_direction_with_image_id_deprecated(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[id, y, x], [id, y, x], [id, y, x] ...] - """ - pos_list_full = np.array(pos_list).reshape(-1, 3) - pos_list = pos_list_full[:, 1:] - point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]] # x, y - point_direction = point_direction[:, ::-1] # x, y -> y, x - average_direction = np.mean(point_direction, axis=0, keepdims=True) - pos_proj_leng = np.sum(pos_list * average_direction, axis=1) - sorted_list = pos_list_full[np.argsort(pos_proj_leng)].tolist() - return sorted_list - - -def sort_by_direction_with_image_id(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - - def sort_part_with_direction(pos_list_full, point_direction): - pos_list_full = np.array(pos_list_full).reshape(-1, 3) - pos_list = pos_list_full[:, 1:] - point_direction = np.array(point_direction).reshape(-1, 2) - average_direction = np.mean(point_direction, axis=0, keepdims=True) - pos_proj_leng = np.sum(pos_list * average_direction, axis=1) - sorted_list = pos_list_full[np.argsort(pos_proj_leng)].tolist() - sorted_direction = point_direction[np.argsort(pos_proj_leng)].tolist() - return sorted_list, sorted_direction - - pos_list = np.array(pos_list).reshape(-1, 3) - point_direction = f_direction[pos_list[:, 1], pos_list[:, 2]] # x, y - point_direction = point_direction[:, ::-1] # x, y -> y, x - sorted_point, sorted_direction = sort_part_with_direction(pos_list, - point_direction) - - point_num = len(sorted_point) - if point_num >= 16: - middle_num = point_num // 2 - first_part_point = sorted_point[:middle_num] - first_point_direction = sorted_direction[:middle_num] - sorted_fist_part_point, sorted_fist_part_direction = sort_part_with_direction( - first_part_point, first_point_direction) - - last_part_point = sorted_point[middle_num:] - last_point_direction = sorted_direction[middle_num:] - sorted_last_part_point, sorted_last_part_direction = sort_part_with_direction( - last_part_point, last_point_direction) - sorted_point = sorted_fist_part_point + sorted_last_part_point - sorted_direction = sorted_fist_part_direction + sorted_last_part_direction - - return sorted_point - - -def generate_pivot_list_tt_inference(p_score, - p_char_maps, - f_direction, - score_thresh=0.5, - is_backbone=False, - is_curved=True, - image_id=0): - """ - return center point and end point of TCL instance; filter with the char maps; - """ - p_score = p_score[0] - f_direction = f_direction.transpose(1, 2, 0) - p_tcl_map = (p_score > score_thresh) * 1.0 - skeleton_map = thin(p_tcl_map) - instance_count, instance_label_map = cv2.connectedComponents( - skeleton_map.astype(np.uint8), connectivity=8) - - # get TCL Instance - all_pos_yxs = [] - if instance_count > 0: - for instance_id in range(1, instance_count): - pos_list = [] - ys, xs = np.where(instance_label_map == instance_id) - pos_list = list(zip(ys, xs)) - ### FIX-ME, eliminate outlier - if len(pos_list) < 3: - continue - pos_list_sorted = sort_and_expand_with_direction_v2( - pos_list, f_direction, p_tcl_map) - pos_list_sorted_with_id = add_id(pos_list_sorted, image_id=image_id) - all_pos_yxs.append(pos_list_sorted_with_id) - return all_pos_yxs diff --git a/backend/ppocr/utils/e2e_utils/pgnet_pp_utils.py b/backend/ppocr/utils/e2e_utils/pgnet_pp_utils.py deleted file mode 100644 index a15503c0..00000000 --- a/backend/ppocr/utils/e2e_utils/pgnet_pp_utils.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle -import os -import sys - -__dir__ = os.path.dirname(__file__) -sys.path.append(__dir__) -sys.path.append(os.path.join(__dir__, '..')) -from extract_textpoint_slow import * -from extract_textpoint_fast import generate_pivot_list_fast, restore_poly - - -class PGNet_PostProcess(object): - # two different post-process - def __init__(self, character_dict_path, valid_set, score_thresh, outs_dict, - shape_list): - self.Lexicon_Table = get_dict(character_dict_path) - self.valid_set = valid_set - self.score_thresh = score_thresh - self.outs_dict = outs_dict - self.shape_list = shape_list - - def pg_postprocess_fast(self): - p_score = self.outs_dict['f_score'] - p_border = self.outs_dict['f_border'] - p_char = self.outs_dict['f_char'] - p_direction = self.outs_dict['f_direction'] - if isinstance(p_score, paddle.Tensor): - p_score = p_score[0].numpy() - p_border = p_border[0].numpy() - p_direction = p_direction[0].numpy() - p_char = p_char[0].numpy() - else: - p_score = p_score[0] - p_border = p_border[0] - p_direction = p_direction[0] - p_char = p_char[0] - - src_h, src_w, ratio_h, ratio_w = self.shape_list[0] - instance_yxs_list, seq_strs = generate_pivot_list_fast( - p_score, - p_char, - p_direction, - self.Lexicon_Table, - score_thresh=self.score_thresh) - poly_list, keep_str_list = restore_poly(instance_yxs_list, seq_strs, - p_border, ratio_w, ratio_h, - src_w, src_h, self.valid_set) - data = { - 'points': poly_list, - 'texts': keep_str_list, - } - return data - - def pg_postprocess_slow(self): - p_score = self.outs_dict['f_score'] - p_border = self.outs_dict['f_border'] - p_char = self.outs_dict['f_char'] - p_direction = self.outs_dict['f_direction'] - if isinstance(p_score, paddle.Tensor): - p_score = p_score[0].numpy() - p_border = p_border[0].numpy() - p_direction = p_direction[0].numpy() - p_char = p_char[0].numpy() - else: - p_score = p_score[0] - p_border = p_border[0] - p_direction = p_direction[0] - p_char = p_char[0] - src_h, src_w, ratio_h, ratio_w = self.shape_list[0] - is_curved = self.valid_set == "totaltext" - char_seq_idx_set, instance_yxs_list = generate_pivot_list_slow( - p_score, - p_char, - p_direction, - score_thresh=self.score_thresh, - is_backbone=True, - is_curved=is_curved) - seq_strs = [] - for char_idx_set in char_seq_idx_set: - pr_str = ''.join([self.Lexicon_Table[pos] for pos in char_idx_set]) - seq_strs.append(pr_str) - poly_list = [] - keep_str_list = [] - all_point_list = [] - all_point_pair_list = [] - for yx_center_line, keep_str in zip(instance_yxs_list, seq_strs): - if len(yx_center_line) == 1: - yx_center_line.append(yx_center_line[-1]) - - offset_expand = 1.0 - if self.valid_set == 'totaltext': - offset_expand = 1.2 - - point_pair_list = [] - for batch_id, y, x in yx_center_line: - offset = p_border[:, y, x].reshape(2, 2) - if offset_expand != 1.0: - offset_length = np.linalg.norm( - offset, axis=1, keepdims=True) - expand_length = np.clip( - offset_length * (offset_expand - 1), - a_min=0.5, - a_max=3.0) - offset_detal = offset / offset_length * expand_length - offset = offset + offset_detal - ori_yx = np.array([y, x], dtype=np.float32) - point_pair = (ori_yx + offset)[:, ::-1] * 4.0 / np.array( - [ratio_w, ratio_h]).reshape(-1, 2) - point_pair_list.append(point_pair) - - all_point_list.append([ - int(round(x * 4.0 / ratio_w)), - int(round(y * 4.0 / ratio_h)) - ]) - all_point_pair_list.append(point_pair.round().astype(np.int32) - .tolist()) - - detected_poly, pair_length_info = point_pair2poly(point_pair_list) - detected_poly = expand_poly_along_width( - detected_poly, shrink_ratio_of_width=0.2) - detected_poly[:, 0] = np.clip( - detected_poly[:, 0], a_min=0, a_max=src_w) - detected_poly[:, 1] = np.clip( - detected_poly[:, 1], a_min=0, a_max=src_h) - - if len(keep_str) < 2: - continue - - keep_str_list.append(keep_str) - detected_poly = np.round(detected_poly).astype('int32') - if self.valid_set == 'partvgg': - middle_point = len(detected_poly) // 2 - detected_poly = detected_poly[ - [0, middle_point - 1, middle_point, -1], :] - poly_list.append(detected_poly) - elif self.valid_set == 'totaltext': - poly_list.append(detected_poly) - else: - print('--> Not supported format.') - exit(-1) - data = { - 'points': poly_list, - 'texts': keep_str_list, - } - return data diff --git a/backend/ppocr/utils/e2e_utils/visual.py b/backend/ppocr/utils/e2e_utils/visual.py deleted file mode 100644 index e6e4fd06..00000000 --- a/backend/ppocr/utils/e2e_utils/visual.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import cv2 -import time - - -def resize_image(im, max_side_len=512): - """ - resize image to a size multiple of max_stride which is required by the network - :param im: the resized image - :param max_side_len: limit of max image size to avoid out of memory in gpu - :return: the resized image and the resize ratio - """ - h, w, _ = im.shape - - resize_w = w - resize_h = h - - if resize_h > resize_w: - ratio = float(max_side_len) / resize_h - else: - ratio = float(max_side_len) / resize_w - - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - max_stride = 128 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - im = cv2.resize(im, (int(resize_w), int(resize_h))) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - - return im, (ratio_h, ratio_w) - - -def resize_image_min(im, max_side_len=512): - """ - """ - h, w, _ = im.shape - - resize_w = w - resize_h = h - - if resize_h < resize_w: - ratio = float(max_side_len) / resize_h - else: - ratio = float(max_side_len) / resize_w - - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - max_stride = 128 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - im = cv2.resize(im, (int(resize_w), int(resize_h))) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - return im, (ratio_h, ratio_w) - - -def resize_image_for_totaltext(im, max_side_len=512): - """ - """ - h, w, _ = im.shape - - resize_w = w - resize_h = h - ratio = 1.25 - if h * ratio > max_side_len: - ratio = float(max_side_len) / resize_h - - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - max_stride = 128 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - im = cv2.resize(im, (int(resize_w), int(resize_h))) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - return im, (ratio_h, ratio_w) - - -def point_pair2poly(point_pair_list): - """ - Transfer vertical point_pairs into poly point in clockwise. - """ - pair_length_list = [] - for point_pair in point_pair_list: - pair_length = np.linalg.norm(point_pair[0] - point_pair[1]) - pair_length_list.append(pair_length) - pair_length_list = np.array(pair_length_list) - pair_info = (pair_length_list.max(), pair_length_list.min(), - pair_length_list.mean()) - - point_num = len(point_pair_list) * 2 - point_list = [0] * point_num - for idx, point_pair in enumerate(point_pair_list): - point_list[idx] = point_pair[0] - point_list[point_num - 1 - idx] = point_pair[1] - return np.array(point_list).reshape(-1, 2), pair_info - - -def shrink_quad_along_width(quad, begin_width_ratio=0., end_width_ratio=1.): - """ - Generate shrink_quad_along_width. - """ - ratio_pair = np.array( - [[begin_width_ratio], [end_width_ratio]], dtype=np.float32) - p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair - p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair - return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]]) - - -def expand_poly_along_width(poly, shrink_ratio_of_width=0.3): - """ - expand poly along width. - """ - point_num = poly.shape[0] - left_quad = np.array( - [poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32) - left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \ - (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6) - left_quad_expand = shrink_quad_along_width(left_quad, left_ratio, 1.0) - right_quad = np.array( - [ - poly[point_num // 2 - 2], poly[point_num // 2 - 1], - poly[point_num // 2], poly[point_num // 2 + 1] - ], - dtype=np.float32) - right_ratio = 1.0 + \ - shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \ - (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6) - right_quad_expand = shrink_quad_along_width(right_quad, 0.0, right_ratio) - poly[0] = left_quad_expand[0] - poly[-1] = left_quad_expand[-1] - poly[point_num // 2 - 1] = right_quad_expand[1] - poly[point_num // 2] = right_quad_expand[2] - return poly - - -def norm2(x, axis=None): - if axis: - return np.sqrt(np.sum(x**2, axis=axis)) - return np.sqrt(np.sum(x**2)) - - -def cos(p1, p2): - return (p1 * p2).sum() / (norm2(p1) * norm2(p2)) diff --git a/backend/ppocr/utils/iou.py b/backend/ppocr/utils/iou.py deleted file mode 100644 index 35459f5f..00000000 --- a/backend/ppocr/utils/iou.py +++ /dev/null @@ -1,54 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/whai362/PSENet/blob/python3/models/loss/iou.py -""" - -import paddle - -EPS = 1e-6 - - -def iou_single(a, b, mask, n_class): - valid = mask == 1 - a = a.masked_select(valid) - b = b.masked_select(valid) - miou = [] - for i in range(n_class): - if a.shape == [0] and a.shape == b.shape: - inter = paddle.to_tensor(0.0) - union = paddle.to_tensor(0.0) - else: - inter = ((a == i).logical_and(b == i)).astype('float32') - union = ((a == i).logical_or(b == i)).astype('float32') - miou.append(paddle.sum(inter) / (paddle.sum(union) + EPS)) - miou = sum(miou) / len(miou) - return miou - - -def iou(a, b, mask, n_class=2, reduce=True): - batch_size = a.shape[0] - - a = a.reshape([batch_size, -1]) - b = b.reshape([batch_size, -1]) - mask = mask.reshape([batch_size, -1]) - - iou = paddle.zeros((batch_size, ), dtype='float32') - for i in range(batch_size): - iou[i] = iou_single(a[i], b[i], mask[i], n_class) - - if reduce: - iou = paddle.mean(iou) - return iou diff --git a/backend/ppocr/utils/loggers/__init__.py b/backend/ppocr/utils/loggers/__init__.py deleted file mode 100644 index b1e92f73..00000000 --- a/backend/ppocr/utils/loggers/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .vdl_logger import VDLLogger -from .wandb_logger import WandbLogger -from .loggers import Loggers diff --git a/backend/ppocr/utils/loggers/base_logger.py b/backend/ppocr/utils/loggers/base_logger.py deleted file mode 100644 index 3a7fc359..00000000 --- a/backend/ppocr/utils/loggers/base_logger.py +++ /dev/null @@ -1,15 +0,0 @@ -import os -from abc import ABC, abstractmethod - -class BaseLogger(ABC): - def __init__(self, save_dir): - self.save_dir = save_dir - os.makedirs(self.save_dir, exist_ok=True) - - @abstractmethod - def log_metrics(self, metrics, prefix=None): - pass - - @abstractmethod - def close(self): - pass \ No newline at end of file diff --git a/backend/ppocr/utils/loggers/loggers.py b/backend/ppocr/utils/loggers/loggers.py deleted file mode 100644 index 26014662..00000000 --- a/backend/ppocr/utils/loggers/loggers.py +++ /dev/null @@ -1,18 +0,0 @@ -from .wandb_logger import WandbLogger - -class Loggers(object): - def __init__(self, loggers): - super().__init__() - self.loggers = loggers - - def log_metrics(self, metrics, prefix=None, step=None): - for logger in self.loggers: - logger.log_metrics(metrics, prefix=prefix, step=step) - - def log_model(self, is_best, prefix, metadata=None): - for logger in self.loggers: - logger.log_model(is_best=is_best, prefix=prefix, metadata=metadata) - - def close(self): - for logger in self.loggers: - logger.close() \ No newline at end of file diff --git a/backend/ppocr/utils/loggers/vdl_logger.py b/backend/ppocr/utils/loggers/vdl_logger.py deleted file mode 100644 index c345f932..00000000 --- a/backend/ppocr/utils/loggers/vdl_logger.py +++ /dev/null @@ -1,21 +0,0 @@ -from .base_logger import BaseLogger -from visualdl import LogWriter - -class VDLLogger(BaseLogger): - def __init__(self, save_dir): - super().__init__(save_dir) - self.vdl_writer = LogWriter(logdir=save_dir) - - def log_metrics(self, metrics, prefix=None, step=None): - if not prefix: - prefix = "" - updated_metrics = {prefix + "/" + k: v for k, v in metrics.items()} - - for k, v in updated_metrics.items(): - self.vdl_writer.add_scalar(k, v, step) - - def log_model(self, is_best, prefix, metadata=None): - pass - - def close(self): - self.vdl_writer.close() \ No newline at end of file diff --git a/backend/ppocr/utils/loggers/wandb_logger.py b/backend/ppocr/utils/loggers/wandb_logger.py deleted file mode 100644 index 5c805f4e..00000000 --- a/backend/ppocr/utils/loggers/wandb_logger.py +++ /dev/null @@ -1,78 +0,0 @@ -import os -from .base_logger import BaseLogger - -class WandbLogger(BaseLogger): - def __init__(self, - project=None, - name=None, - id=None, - entity=None, - save_dir=None, - config=None, - **kwargs): - try: - import wandb - self.wandb = wandb - except ModuleNotFoundError: - raise ModuleNotFoundError( - "Please install wandb using `pip install wandb`" - ) - - self.project = project - self.name = name - self.id = id - self.save_dir = save_dir - self.config = config - self.kwargs = kwargs - self.entity = entity - self._run = None - self._wandb_init = dict( - project=self.project, - name=self.name, - id=self.id, - entity=self.entity, - dir=self.save_dir, - resume="allow" - ) - self._wandb_init.update(**kwargs) - - _ = self.run - - if self.config: - self.run.settings_config.update(self.config) - - @property - def run(self): - if self._run is None: - if self.wandb.run is not None: - logger.info( - "There is a wandb run already in progress " - "and newly created instances of `WandbLogger` will reuse" - " this run. If this is not desired, call `wandb.finish()`" - "before instantiating `WandbLogger`." - ) - self._run = self.wandb.run - else: - self._run = self.wandb.init(**self._wandb_init) - return self._run - - def log_metrics(self, metrics, prefix=None, step=None): - if not prefix: - prefix = "" - updated_metrics = {prefix.lower() + "/" + k: v for k, v in metrics.items()} - - self.run.log(updated_metrics, step=step) - - def log_model(self, is_best, prefix, metadata=None): - model_path = os.path.join(self.save_dir, prefix + '.pdparams') - artifact = self.wandb.Artifact('model-{}'.format(self.run.id), type='model', metadata=metadata) - artifact.add_file(model_path, name="model_ckpt.pdparams") - - aliases = [prefix] - if is_best: - aliases.append("best") - - self.run.log_artifact(artifact, aliases=aliases) - - def close(self): - self.run.finish() \ No newline at end of file diff --git a/backend/ppocr/utils/logging.py b/backend/ppocr/utils/logging.py deleted file mode 100644 index 1eac8f35..00000000 --- a/backend/ppocr/utils/logging.py +++ /dev/null @@ -1,71 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/WenmuZhou/PytorchOCR/blob/master/torchocr/utils/logging.py -""" - -import os -import sys -import logging -import functools -import paddle.distributed as dist - -logger_initialized = {} - - -@functools.lru_cache() -def get_logger(name='ppocr', log_file=None, log_level=logging.DEBUG): - """Initialize and get a logger by name. - If the logger has not been initialized, this method will initialize the - logger by adding one or two handlers, otherwise the initialized logger will - be directly returned. During initialization, a StreamHandler will always be - added. If `log_file` is specified a FileHandler will also be added. - Args: - name (str): Logger name. - log_file (str | None): The log filename. If specified, a FileHandler - will be added to the logger. - log_level (int): The logger level. Note that only the process of - rank 0 is affected, and other processes will set the level to - "Error" thus be silent most of the time. - Returns: - logging.Logger: The expected logger. - """ - logger = logging.getLogger(name) - if name in logger_initialized: - return logger - for logger_name in logger_initialized: - if name.startswith(logger_name): - return logger - - formatter = logging.Formatter( - '[%(asctime)s] %(name)s %(levelname)s: %(message)s', - datefmt="%Y/%m/%d %H:%M:%S") - - stream_handler = logging.StreamHandler(stream=sys.stdout) - stream_handler.setFormatter(formatter) - logger.addHandler(stream_handler) - if log_file is not None and dist.get_rank() == 0: - log_file_folder = os.path.split(log_file)[0] - os.makedirs(log_file_folder, exist_ok=True) - file_handler = logging.FileHandler(log_file, 'a') - file_handler.setFormatter(formatter) - logger.addHandler(file_handler) - if dist.get_rank() == 0: - logger.setLevel(log_level) - else: - logger.setLevel(logging.ERROR) - logger_initialized[name] = True - logger.propagate = False - return logger diff --git a/backend/ppocr/utils/network.py b/backend/ppocr/utils/network.py deleted file mode 100644 index 118d1be3..00000000 --- a/backend/ppocr/utils/network.py +++ /dev/null @@ -1,84 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import tarfile -import requests -from tqdm import tqdm - -from ppocr.utils.logging import get_logger - - -def download_with_progressbar(url, save_path): - logger = get_logger() - response = requests.get(url, stream=True) - if response.status_code == 200: - total_size_in_bytes = int(response.headers.get('content-length', 1)) - block_size = 1024 # 1 Kibibyte - progress_bar = tqdm( - total=total_size_in_bytes, unit='iB', unit_scale=True) - with open(save_path, 'wb') as file: - for data in response.iter_content(block_size): - progress_bar.update(len(data)) - file.write(data) - progress_bar.close() - else: - logger.error("Something went wrong while downloading models") - sys.exit(0) - - -def maybe_download(model_storage_directory, url): - # using custom model - tar_file_name_list = [ - 'inference.pdiparams', 'inference.pdiparams.info', 'inference.pdmodel' - ] - if not os.path.exists( - os.path.join(model_storage_directory, 'inference.pdiparams') - ) or not os.path.exists( - os.path.join(model_storage_directory, 'inference.pdmodel')): - assert url.endswith('.tar'), 'Only supports tar compressed package' - tmp_path = os.path.join(model_storage_directory, url.split('/')[-1]) - print('download {} to {}'.format(url, tmp_path)) - os.makedirs(model_storage_directory, exist_ok=True) - download_with_progressbar(url, tmp_path) - with tarfile.open(tmp_path, 'r') as tarObj: - for member in tarObj.getmembers(): - filename = None - for tar_file_name in tar_file_name_list: - if tar_file_name in member.name: - filename = tar_file_name - if filename is None: - continue - file = tarObj.extractfile(member) - with open( - os.path.join(model_storage_directory, filename), - 'wb') as f: - f.write(file.read()) - os.remove(tmp_path) - - -def is_link(s): - return s is not None and s.startswith('http') - - -def confirm_model_dir_url(model_dir, default_model_dir, default_url): - url = default_url - if model_dir is None or is_link(model_dir): - if is_link(model_dir): - url = model_dir - file_name = url.split('/')[-1][:-4] - model_dir = default_model_dir - model_dir = os.path.join(model_dir, file_name) - return model_dir, url diff --git a/backend/ppocr/utils/poly_nms.py b/backend/ppocr/utils/poly_nms.py deleted file mode 100644 index 9dcb3d2c..00000000 --- a/backend/ppocr/utils/poly_nms.py +++ /dev/null @@ -1,146 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -from shapely.geometry import Polygon - - -def points2polygon(points): - """Convert k points to 1 polygon. - - Args: - points (ndarray or list): A ndarray or a list of shape (2k) - that indicates k points. - - Returns: - polygon (Polygon): A polygon object. - """ - if isinstance(points, list): - points = np.array(points) - - assert isinstance(points, np.ndarray) - assert (points.size % 2 == 0) and (points.size >= 8) - - point_mat = points.reshape([-1, 2]) - return Polygon(point_mat) - - -def poly_intersection(poly_det, poly_gt, buffer=0.0001): - """Calculate the intersection area between two polygon. - - Args: - poly_det (Polygon): A polygon predicted by detector. - poly_gt (Polygon): A gt polygon. - - Returns: - intersection_area (float): The intersection area between two polygons. - """ - assert isinstance(poly_det, Polygon) - assert isinstance(poly_gt, Polygon) - - if buffer == 0: - poly_inter = poly_det & poly_gt - else: - poly_inter = poly_det.buffer(buffer) & poly_gt.buffer(buffer) - return poly_inter.area, poly_inter - - -def poly_union(poly_det, poly_gt): - """Calculate the union area between two polygon. - - Args: - poly_det (Polygon): A polygon predicted by detector. - poly_gt (Polygon): A gt polygon. - - Returns: - union_area (float): The union area between two polygons. - """ - assert isinstance(poly_det, Polygon) - assert isinstance(poly_gt, Polygon) - - area_det = poly_det.area - area_gt = poly_gt.area - area_inters, _ = poly_intersection(poly_det, poly_gt) - return area_det + area_gt - area_inters - - -def valid_boundary(x, with_score=True): - num = len(x) - if num < 8: - return False - if num % 2 == 0 and (not with_score): - return True - if num % 2 == 1 and with_score: - return True - - return False - - -def boundary_iou(src, target): - """Calculate the IOU between two boundaries. - - Args: - src (list): Source boundary. - target (list): Target boundary. - - Returns: - iou (float): The iou between two boundaries. - """ - assert valid_boundary(src, False) - assert valid_boundary(target, False) - src_poly = points2polygon(src) - target_poly = points2polygon(target) - - return poly_iou(src_poly, target_poly) - - -def poly_iou(poly_det, poly_gt): - """Calculate the IOU between two polygons. - - Args: - poly_det (Polygon): A polygon predicted by detector. - poly_gt (Polygon): A gt polygon. - - Returns: - iou (float): The IOU between two polygons. - """ - assert isinstance(poly_det, Polygon) - assert isinstance(poly_gt, Polygon) - area_inters, _ = poly_intersection(poly_det, poly_gt) - area_union = poly_union(poly_det, poly_gt) - if area_union == 0: - return 0.0 - return area_inters / area_union - - -def poly_nms(polygons, threshold): - assert isinstance(polygons, list) - - polygons = np.array(sorted(polygons, key=lambda x: x[-1])) - - keep_poly = [] - index = [i for i in range(polygons.shape[0])] - - while len(index) > 0: - keep_poly.append(polygons[index[-1]].tolist()) - A = polygons[index[-1]][:-1] - index = np.delete(index, -1) - iou_list = np.zeros((len(index), )) - for i in range(len(index)): - B = polygons[index[i]][:-1] - iou_list[i] = boundary_iou(A, B) - remove_index = np.where(iou_list > threshold) - index = np.delete(index, remove_index) - - return keep_poly diff --git a/backend/ppocr/utils/profiler.py b/backend/ppocr/utils/profiler.py deleted file mode 100644 index c4e28bc6..00000000 --- a/backend/ppocr/utils/profiler.py +++ /dev/null @@ -1,110 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import paddle - -# A global variable to record the number of calling times for profiler -# functions. It is used to specify the tracing range of training steps. -_profiler_step_id = 0 - -# A global variable to avoid parsing from string every time. -_profiler_options = None - - -class ProfilerOptions(object): - ''' - Use a string to initialize a ProfilerOptions. - The string should be in the format: "key1=value1;key2=value;key3=value3". - For example: - "profile_path=model.profile" - "batch_range=[50, 60]; profile_path=model.profile" - "batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile" - ProfilerOptions supports following key-value pair: - batch_range - a integer list, e.g. [100, 110]. - state - a string, the optional values are 'CPU', 'GPU' or 'All'. - sorted_key - a string, the optional values are 'calls', 'total', - 'max', 'min' or 'ave. - tracer_option - a string, the optional values are 'Default', 'OpDetail', - 'AllOpDetail'. - profile_path - a string, the path to save the serialized profile data, - which can be used to generate a timeline. - exit_on_finished - a boolean. - ''' - - def __init__(self, options_str): - assert isinstance(options_str, str) - - self._options = { - 'batch_range': [10, 20], - 'state': 'All', - 'sorted_key': 'total', - 'tracer_option': 'Default', - 'profile_path': '/tmp/profile', - 'exit_on_finished': True - } - self._parse_from_string(options_str) - - def _parse_from_string(self, options_str): - for kv in options_str.replace(' ', '').split(';'): - key, value = kv.split('=') - if key == 'batch_range': - value_list = value.replace('[', '').replace(']', '').split(',') - value_list = list(map(int, value_list)) - if len(value_list) >= 2 and value_list[0] >= 0 and value_list[ - 1] > value_list[0]: - self._options[key] = value_list - elif key == 'exit_on_finished': - self._options[key] = value.lower() in ("yes", "true", "t", "1") - elif key in [ - 'state', 'sorted_key', 'tracer_option', 'profile_path' - ]: - self._options[key] = value - - def __getitem__(self, name): - if self._options.get(name, None) is None: - raise ValueError( - "ProfilerOptions does not have an option named %s." % name) - return self._options[name] - - -def add_profiler_step(options_str=None): - ''' - Enable the operator-level timing using PaddlePaddle's profiler. - The profiler uses a independent variable to count the profiler steps. - One call of this function is treated as a profiler step. - - Args: - profiler_options - a string to initialize the ProfilerOptions. - Default is None, and the profiler is disabled. - ''' - if options_str is None: - return - - global _profiler_step_id - global _profiler_options - - if _profiler_options is None: - _profiler_options = ProfilerOptions(options_str) - - if _profiler_step_id == _profiler_options['batch_range'][0]: - paddle.utils.profiler.start_profiler( - _profiler_options['state'], _profiler_options['tracer_option']) - elif _profiler_step_id == _profiler_options['batch_range'][1]: - paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'], - _profiler_options['profile_path']) - if _profiler_options['exit_on_finished']: - sys.exit(0) - - _profiler_step_id += 1 diff --git a/backend/ppocr/utils/save_load.py b/backend/ppocr/utils/save_load.py deleted file mode 100644 index b09f1db6..00000000 --- a/backend/ppocr/utils/save_load.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import errno -import os -import pickle -import six - -import paddle - -from ppocr.utils.logging import get_logger - -__all__ = ['load_model'] - - -def _mkdir_if_not_exist(path, logger): - """ - mkdir if not exists, ignore the exception when multiprocess mkdir together - """ - if not os.path.exists(path): - try: - os.makedirs(path) - except OSError as e: - if e.errno == errno.EEXIST and os.path.isdir(path): - logger.warning( - 'be happy if some process has already created {}'.format( - path)) - else: - raise OSError('Failed to mkdir {}'.format(path)) - - -def load_model(config, model, optimizer=None, model_type='det'): - """ - load model from checkpoint or pretrained_model - """ - logger = get_logger() - global_config = config['Global'] - checkpoints = global_config.get('checkpoints') - pretrained_model = global_config.get('pretrained_model') - best_model_dict = {} - - if model_type == 'vqa': - checkpoints = config['Architecture']['Backbone']['checkpoints'] - # load vqa method metric - if checkpoints: - if os.path.exists(os.path.join(checkpoints, 'metric.states')): - with open(os.path.join(checkpoints, 'metric.states'), - 'rb') as f: - states_dict = pickle.load(f) if six.PY2 else pickle.load( - f, encoding='latin1') - best_model_dict = states_dict.get('best_model_dict', {}) - if 'epoch' in states_dict: - best_model_dict['start_epoch'] = states_dict['epoch'] + 1 - logger.info("resume from {}".format(checkpoints)) - - if optimizer is not None: - if checkpoints[-1] in ['/', '\\']: - checkpoints = checkpoints[:-1] - if os.path.exists(checkpoints + '.pdopt'): - optim_dict = paddle.load(checkpoints + '.pdopt') - optimizer.set_state_dict(optim_dict) - else: - logger.warning( - "{}.pdopt is not exists, params of optimizer is not loaded". - format(checkpoints)) - return best_model_dict - - if checkpoints: - if checkpoints.endswith('.pdparams'): - checkpoints = checkpoints.replace('.pdparams', '') - assert os.path.exists(checkpoints + ".pdparams"), \ - "The {}.pdparams does not exists!".format(checkpoints) - - # load params from trained model - params = paddle.load(checkpoints + '.pdparams') - state_dict = model.state_dict() - new_state_dict = {} - for key, value in state_dict.items(): - if key not in params: - logger.warning("{} not in loaded params {} !".format( - key, params.keys())) - continue - pre_value = params[key] - if list(value.shape) == list(pre_value.shape): - new_state_dict[key] = pre_value - else: - logger.warning( - "The shape of model params {} {} not matched with loaded params shape {} !". - format(key, value.shape, pre_value.shape)) - model.set_state_dict(new_state_dict) - - if optimizer is not None: - if os.path.exists(checkpoints + '.pdopt'): - optim_dict = paddle.load(checkpoints + '.pdopt') - optimizer.set_state_dict(optim_dict) - else: - logger.warning( - "{}.pdopt is not exists, params of optimizer is not loaded". - format(checkpoints)) - - if os.path.exists(checkpoints + '.states'): - with open(checkpoints + '.states', 'rb') as f: - states_dict = pickle.load(f) if six.PY2 else pickle.load( - f, encoding='latin1') - best_model_dict = states_dict.get('best_model_dict', {}) - if 'epoch' in states_dict: - best_model_dict['start_epoch'] = states_dict['epoch'] + 1 - logger.info("resume from {}".format(checkpoints)) - elif pretrained_model: - load_pretrained_params(model, pretrained_model) - else: - logger.info('train from scratch') - return best_model_dict - - -def load_pretrained_params(model, path): - logger = get_logger() - if path.endswith('.pdparams'): - path = path.replace('.pdparams', '') - assert os.path.exists(path + ".pdparams"), \ - "The {}.pdparams does not exists!".format(path) - - params = paddle.load(path + '.pdparams') - state_dict = model.state_dict() - new_state_dict = {} - for k1 in params.keys(): - if k1 not in state_dict.keys(): - logger.warning("The pretrained params {} not in model".format(k1)) - else: - if list(state_dict[k1].shape) == list(params[k1].shape): - new_state_dict[k1] = params[k1] - else: - logger.warning( - "The shape of model params {} {} not matched with loaded params {} {} !". - format(k1, state_dict[k1].shape, k1, params[k1].shape)) - model.set_state_dict(new_state_dict) - logger.info("load pretrain successful from {}".format(path)) - return model - - -def save_model(model, - optimizer, - model_path, - logger, - config, - is_best=False, - prefix='ppocr', - **kwargs): - """ - save model to the target path - """ - _mkdir_if_not_exist(model_path, logger) - model_prefix = os.path.join(model_path, prefix) - paddle.save(optimizer.state_dict(), model_prefix + '.pdopt') - if config['Architecture']["model_type"] != 'vqa': - paddle.save(model.state_dict(), model_prefix + '.pdparams') - metric_prefix = model_prefix - else: - if config['Global']['distributed']: - model._layers.backbone.model.save_pretrained(model_prefix) - else: - model.backbone.model.save_pretrained(model_prefix) - metric_prefix = os.path.join(model_prefix, 'metric') - # save metric and config - if is_best: - with open(metric_prefix + '.states', 'wb') as f: - pickle.dump(kwargs, f, protocol=2) - logger.info('save best model is to {}'.format(model_prefix)) - else: - logger.info("save model in {}".format(model_prefix)) diff --git a/backend/ppocr/utils/stats.py b/backend/ppocr/utils/stats.py deleted file mode 100755 index 179b0082..00000000 --- a/backend/ppocr/utils/stats.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import collections -import numpy as np -import datetime - -__all__ = ['TrainingStats', 'Time'] - - -class SmoothedValue(object): - """Track a series of values and provide access to smoothed values over a - window or the global series average. - """ - - def __init__(self, window_size): - self.deque = collections.deque(maxlen=window_size) - - def add_value(self, value): - self.deque.append(value) - - def get_median_value(self): - return np.median(self.deque) - - -def Time(): - return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f') - - -class TrainingStats(object): - def __init__(self, window_size, stats_keys): - self.window_size = window_size - self.smoothed_losses_and_metrics = { - key: SmoothedValue(window_size) - for key in stats_keys - } - - def update(self, stats): - for k, v in stats.items(): - if k not in self.smoothed_losses_and_metrics: - self.smoothed_losses_and_metrics[k] = SmoothedValue( - self.window_size) - self.smoothed_losses_and_metrics[k].add_value(v) - - def get(self, extras=None): - stats = collections.OrderedDict() - if extras: - for k, v in extras.items(): - stats[k] = v - for k, v in self.smoothed_losses_and_metrics.items(): - stats[k] = round(v.get_median_value(), 6) - - return stats - - def log(self, extras=None): - d = self.get(extras) - strs = [] - for k, v in d.items(): - strs.append('{}: {:x<6f}'.format(k, v)) - strs = ', '.join(strs) - return strs diff --git a/backend/ppocr/utils/utility.py b/backend/ppocr/utils/utility.py deleted file mode 100755 index 4a25ff8b..00000000 --- a/backend/ppocr/utils/utility.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import os -import imghdr -import cv2 -import random -import numpy as np -import paddle - - -def print_dict(d, logger, delimiter=0): - """ - Recursively visualize a dict and - indenting acrrording by the relationship of keys. - """ - for k, v in sorted(d.items()): - if isinstance(v, dict): - logger.info("{}{} : ".format(delimiter * " ", str(k))) - print_dict(v, logger, delimiter + 4) - elif isinstance(v, list) and len(v) >= 1 and isinstance(v[0], dict): - logger.info("{}{} : ".format(delimiter * " ", str(k))) - for value in v: - print_dict(value, logger, delimiter + 4) - else: - logger.info("{}{} : {}".format(delimiter * " ", k, v)) - - -def get_check_global_params(mode): - check_params = ['use_gpu', 'max_text_length', 'image_shape', \ - 'image_shape', 'character_type', 'loss_type'] - if mode == "train_eval": - check_params = check_params + [ \ - 'train_batch_size_per_card', 'test_batch_size_per_card'] - elif mode == "test": - check_params = check_params + ['test_batch_size_per_card'] - return check_params - - -def _check_image_file(path): - img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif'} - return any([path.lower().endswith(e) for e in img_end]) - - -def get_image_file_list(img_file): - imgs_lists = [] - if img_file is None or not os.path.exists(img_file): - raise Exception("not found any img file in {}".format(img_file)) - - img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif'} - if os.path.isfile(img_file) and _check_image_file(img_file): - imgs_lists.append(img_file) - elif os.path.isdir(img_file): - for single_file in os.listdir(img_file): - file_path = os.path.join(img_file, single_file) - if os.path.isfile(file_path) and _check_image_file(file_path): - imgs_lists.append(file_path) - if len(imgs_lists) == 0: - raise Exception("not found any img file in {}".format(img_file)) - imgs_lists = sorted(imgs_lists) - return imgs_lists - - -def check_and_read_gif(img_path): - if os.path.basename(img_path)[-3:] in ['gif', 'GIF']: - gif = cv2.VideoCapture(img_path) - ret, frame = gif.read() - if not ret: - logger = logging.getLogger('ppocr') - logger.info("Cannot read {}. This gif image maybe corrupted.") - return None, False - if len(frame.shape) == 2 or frame.shape[-1] == 1: - frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) - imgvalue = frame[:, :, ::-1] - return imgvalue, True - return None, False - - -def load_vqa_bio_label_maps(label_map_path): - with open(label_map_path, "r", encoding='utf-8') as fin: - lines = fin.readlines() - lines = [line.strip() for line in lines] - if "O" not in lines: - lines.insert(0, "O") - labels = [] - for line in lines: - if line == "O": - labels.append("O") - else: - labels.append("B-" + line) - labels.append("I-" + line) - label2id_map = {label: idx for idx, label in enumerate(labels)} - id2label_map = {idx: label for idx, label in enumerate(labels)} - return label2id_map, id2label_map - - -def set_seed(seed=1024): - random.seed(seed) - np.random.seed(seed) - paddle.seed(seed) - - -class AverageMeter: - def __init__(self): - self.reset() - - def reset(self): - """reset""" - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - """update""" - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count diff --git a/backend/ppocr/utils/visual.py b/backend/ppocr/utils/visual.py deleted file mode 100644 index 7a8c1674..00000000 --- a/backend/ppocr/utils/visual.py +++ /dev/null @@ -1,98 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import numpy as np -from PIL import Image, ImageDraw, ImageFont - - -def draw_ser_results(image, - ocr_results, - font_path="doc/fonts/simfang.ttf", - font_size=18): - np.random.seed(2021) - color = (np.random.permutation(range(255)), - np.random.permutation(range(255)), - np.random.permutation(range(255))) - color_map = { - idx: (color[0][idx], color[1][idx], color[2][idx]) - for idx in range(1, 255) - } - if isinstance(image, np.ndarray): - image = Image.fromarray(image) - elif isinstance(image, str) and os.path.isfile(image): - image = Image.open(image).convert('RGB') - img_new = image.copy() - draw = ImageDraw.Draw(img_new) - - font = ImageFont.truetype(font_path, font_size, encoding="utf-8") - for ocr_info in ocr_results: - if ocr_info["pred_id"] not in color_map: - continue - color = color_map[ocr_info["pred_id"]] - text = "{}: {}".format(ocr_info["pred"], ocr_info["text"]) - - draw_box_txt(ocr_info["bbox"], text, draw, font, font_size, color) - - img_new = Image.blend(image, img_new, 0.5) - return np.array(img_new) - - -def draw_box_txt(bbox, text, draw, font, font_size, color): - # draw ocr results outline - bbox = ((bbox[0], bbox[1]), (bbox[2], bbox[3])) - draw.rectangle(bbox, fill=color) - - # draw ocr results - start_y = max(0, bbox[0][1] - font_size) - tw = font.getsize(text)[0] - draw.rectangle( - [(bbox[0][0] + 1, start_y), (bbox[0][0] + tw + 1, start_y + font_size)], - fill=(0, 0, 255)) - draw.text((bbox[0][0] + 1, start_y), text, fill=(255, 255, 255), font=font) - - -def draw_re_results(image, - result, - font_path="doc/fonts/simfang.ttf", - font_size=18): - np.random.seed(0) - if isinstance(image, np.ndarray): - image = Image.fromarray(image) - elif isinstance(image, str) and os.path.isfile(image): - image = Image.open(image).convert('RGB') - img_new = image.copy() - draw = ImageDraw.Draw(img_new) - - font = ImageFont.truetype(font_path, font_size, encoding="utf-8") - color_head = (0, 0, 255) - color_tail = (255, 0, 0) - color_line = (0, 255, 0) - - for ocr_info_head, ocr_info_tail in result: - draw_box_txt(ocr_info_head["bbox"], ocr_info_head["text"], draw, font, - font_size, color_head) - draw_box_txt(ocr_info_tail["bbox"], ocr_info_tail["text"], draw, font, - font_size, color_tail) - - center_head = ( - (ocr_info_head['bbox'][0] + ocr_info_head['bbox'][2]) // 2, - (ocr_info_head['bbox'][1] + ocr_info_head['bbox'][3]) // 2) - center_tail = ( - (ocr_info_tail['bbox'][0] + ocr_info_tail['bbox'][2]) // 2, - (ocr_info_tail['bbox'][1] + ocr_info_tail['bbox'][3]) // 2) - - draw.line([center_head, center_tail], fill=color_line, width=5) - - img_new = Image.blend(image, img_new, 0.5) - return np.array(img_new) diff --git a/backend/tools/eval.py b/backend/tools/eval.py deleted file mode 100755 index cab28334..00000000 --- a/backend/tools/eval.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, __dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..'))) - -from ppocr.data import build_dataloader -from ppocr.modeling.architectures import build_model -from ppocr.postprocess import build_post_process -from ppocr.metrics import build_metric -from ppocr.utils.save_load import load_model -import tools.program as program - - -def main(): - global_config = config['Global'] - # build dataloader - valid_dataloader = build_dataloader(config, 'Eval', device, logger) - - # build post process - post_process_class = build_post_process(config['PostProcess'], - global_config) - - # build model - # for rec algorithm - if hasattr(post_process_class, 'character'): - char_num = len(getattr(post_process_class, 'character')) - if config['Architecture']["algorithm"] in ["Distillation", - ]: # distillation model - for key in config['Architecture']["Models"]: - if config['Architecture']['Models'][key]['Head'][ - 'name'] == 'MultiHead': # for multi head - out_channels_list = {} - if config['PostProcess'][ - 'name'] == 'DistillationSARLabelDecode': - char_num = char_num - 2 - out_channels_list['CTCLabelDecode'] = char_num - out_channels_list['SARLabelDecode'] = char_num + 2 - config['Architecture']['Models'][key]['Head'][ - 'out_channels_list'] = out_channels_list - else: - config['Architecture']["Models"][key]["Head"][ - 'out_channels'] = char_num - elif config['Architecture']['Head'][ - 'name'] == 'MultiHead': # for multi head - out_channels_list = {} - if config['PostProcess']['name'] == 'SARLabelDecode': - char_num = char_num - 2 - out_channels_list['CTCLabelDecode'] = char_num - out_channels_list['SARLabelDecode'] = char_num + 2 - config['Architecture']['Head'][ - 'out_channels_list'] = out_channels_list - else: # base rec model - config['Architecture']["Head"]['out_channels'] = char_num - - model = build_model(config['Architecture']) - extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR"] - extra_input = False - if config['Architecture']['algorithm'] == 'Distillation': - for key in config['Architecture']["Models"]: - extra_input = extra_input or config['Architecture']['Models'][key][ - 'algorithm'] in extra_input_models - else: - extra_input = config['Architecture']['algorithm'] in extra_input_models - if "model_type" in config['Architecture'].keys(): - model_type = config['Architecture']['model_type'] - else: - model_type = None - - best_model_dict = load_model( - config, model, model_type=config['Architecture']["model_type"]) - if len(best_model_dict): - logger.info('metric in ckpt ***************') - for k, v in best_model_dict.items(): - logger.info('{}:{}'.format(k, v)) - - # build metric - eval_class = build_metric(config['Metric']) - # start eval - metric = program.eval(model, valid_dataloader, post_process_class, - eval_class, model_type, extra_input) - logger.info('metric eval ***************') - for k, v in metric.items(): - logger.info('{}:{}'.format(k, v)) - - -if __name__ == '__main__': - config, device, logger, vdl_writer = program.preprocess() - main() diff --git a/backend/tools/export_center.py b/backend/tools/export_center.py deleted file mode 100644 index 9a6372f1..00000000 --- a/backend/tools/export_center.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import pickle - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) - -from ppocr.data import build_dataloader -from ppocr.modeling.architectures import build_model -from ppocr.postprocess import build_post_process -from ppocr.utils.save_load import load_model -import tools.program as program - - -def main(): - global_config = config['Global'] - # build dataloader - config['Eval']['dataset']['name'] = config['Train']['dataset']['name'] - config['Eval']['dataset']['data_dir'] = config['Train']['dataset'][ - 'data_dir'] - config['Eval']['dataset']['label_file_list'] = config['Train']['dataset'][ - 'label_file_list'] - eval_dataloader = build_dataloader(config, 'Eval', device, logger) - - # build post process - post_process_class = build_post_process(config['PostProcess'], - global_config) - - # build model - # for rec algorithm - if hasattr(post_process_class, 'character'): - char_num = len(getattr(post_process_class, 'character')) - config['Architecture']["Head"]['out_channels'] = char_num - - #set return_features = True - config['Architecture']["Head"]["return_feats"] = True - - model = build_model(config['Architecture']) - - best_model_dict = load_model(config, model) - if len(best_model_dict): - logger.info('metric in ckpt ***************') - for k, v in best_model_dict.items(): - logger.info('{}:{}'.format(k, v)) - - # get features from train data - char_center = program.get_center(model, eval_dataloader, post_process_class) - - #serialize to disk - with open("train_center.pkl", 'wb') as f: - pickle.dump(char_center, f) - return - - -if __name__ == '__main__': - config, device, logger, vdl_writer = program.preprocess() - main() diff --git a/backend/tools/export_model.py b/backend/tools/export_model.py deleted file mode 100755 index 76c716e0..00000000 --- a/backend/tools/export_model.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.append(os.path.abspath(os.path.join(__dir__, ".."))) - -import argparse - -import paddle -from paddle.jit import to_static - -from ppocr.modeling.architectures import build_model -from ppocr.postprocess import build_post_process -from ppocr.utils.save_load import load_model -from ppocr.utils.logging import get_logger -from tools.program import load_config, merge_config, ArgsParser - - -def export_single_model(model, arch_config, save_path, logger, quanter=None): - if arch_config["algorithm"] == "SRN": - max_text_length = arch_config["Head"]["max_text_length"] - other_shape = [ - paddle.static.InputSpec( - shape=[None, 1, 64, 256], dtype="float32"), [ - paddle.static.InputSpec( - shape=[None, 256, 1], - dtype="int64"), paddle.static.InputSpec( - shape=[None, max_text_length, 1], dtype="int64"), - paddle.static.InputSpec( - shape=[None, 8, max_text_length, max_text_length], - dtype="int64"), paddle.static.InputSpec( - shape=[None, 8, max_text_length, max_text_length], - dtype="int64") - ] - ] - model = to_static(model, input_spec=other_shape) - elif arch_config["algorithm"] == "SAR": - other_shape = [ - paddle.static.InputSpec( - shape=[None, 3, 48, 160], dtype="float32"), - ] - model = to_static(model, input_spec=other_shape) - elif arch_config["algorithm"] == "SVTR": - if arch_config["Head"]["name"] == 'MultiHead': - other_shape = [ - paddle.static.InputSpec( - shape=[None, 3, 48, -1], dtype="float32"), - ] - else: - other_shape = [ - paddle.static.InputSpec( - shape=[None, 3, 64, 256], dtype="float32"), - ] - model = to_static(model, input_spec=other_shape) - elif arch_config["algorithm"] == "PREN": - other_shape = [ - paddle.static.InputSpec( - shape=[None, 3, 64, 512], dtype="float32"), - ] - model = to_static(model, input_spec=other_shape) - else: - infer_shape = [3, -1, -1] - if arch_config["model_type"] == "rec": - infer_shape = [3, 32, -1] # for rec model, H must be 32 - if "Transform" in arch_config and arch_config[ - "Transform"] is not None and arch_config["Transform"][ - "name"] == "TPS": - logger.info( - "When there is tps in the network, variable length input is not supported, and the input size needs to be the same as during training" - ) - infer_shape[-1] = 100 - if arch_config["algorithm"] == "NRTR": - infer_shape = [1, 32, 100] - elif arch_config["model_type"] == "table": - infer_shape = [3, 488, 488] - model = to_static( - model, - input_spec=[ - paddle.static.InputSpec( - shape=[None] + infer_shape, dtype="float32") - ]) - - if quanter is None: - paddle.jit.save(model, save_path) - else: - quanter.save_quantized_model(model, save_path) - logger.info("inference model is saved to {}".format(save_path)) - return - - -def main(): - FLAGS = ArgsParser().parse_args() - config = load_config(FLAGS.settings_config) - config = merge_config(config, FLAGS.opt) - logger = get_logger() - # build post process - - post_process_class = build_post_process(config["PostProcess"], - config["Global"]) - - # build model - # for rec algorithm - if hasattr(post_process_class, "character"): - char_num = len(getattr(post_process_class, "character")) - if config["Architecture"]["algorithm"] in ["Distillation", - ]: # distillation model - for key in config["Architecture"]["Models"]: - if config["Architecture"]["Models"][key]["Head"][ - "name"] == 'MultiHead': # multi head - out_channels_list = {} - if config['PostProcess'][ - 'name'] == 'DistillationSARLabelDecode': - char_num = char_num - 2 - out_channels_list['CTCLabelDecode'] = char_num - out_channels_list['SARLabelDecode'] = char_num + 2 - config['Architecture']['Models'][key]['Head'][ - 'out_channels_list'] = out_channels_list - else: - config["Architecture"]["Models"][key]["Head"][ - "out_channels"] = char_num - # just one final tensor needs to exported for inference - config["Architecture"]["Models"][key][ - "return_all_feats"] = False - elif config['Architecture']['Head'][ - 'name'] == 'MultiHead': # multi head - out_channels_list = {} - char_num = len(getattr(post_process_class, 'character')) - if config['PostProcess']['name'] == 'SARLabelDecode': - char_num = char_num - 2 - out_channels_list['CTCLabelDecode'] = char_num - out_channels_list['SARLabelDecode'] = char_num + 2 - config['Architecture']['Head'][ - 'out_channels_list'] = out_channels_list - else: # base rec model - config["Architecture"]["Head"]["out_channels"] = char_num - - model = build_model(config["Architecture"]) - load_model(config, model) - model.eval() - - save_path = config["Global"]["save_inference_dir"] - - arch_config = config["Architecture"] - - if arch_config["algorithm"] in ["Distillation", ]: # distillation model - archs = list(arch_config["Models"].values()) - for idx, name in enumerate(model.model_name_list): - sub_model_save_path = os.path.join(save_path, name, "inference") - export_single_model(model.model_list[idx], archs[idx], - sub_model_save_path, logger) - else: - save_path = os.path.join(save_path, "inference") - export_single_model(model, arch_config, save_path, logger) - - -if __name__ == "__main__": - main() diff --git a/backend/tools/infer/predict_cls.py b/backend/tools/infer/predict_cls.py deleted file mode 100755 index ed2f47c0..00000000 --- a/backend/tools/infer/predict_cls.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import copy -import numpy as np -import math -import time -import traceback - -import tools.infer.utility as utility -from ppocr.postprocess import build_post_process -from ppocr.utils.logging import get_logger -from ppocr.utils.utility import get_image_file_list, check_and_read_gif - -logger = get_logger() - - -class TextClassifier(object): - def __init__(self, args): - self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")] - self.cls_batch_num = args.cls_batch_num - self.cls_thresh = args.cls_thresh - postprocess_params = { - 'name': 'ClsPostProcess', - "label_list": args.label_list, - } - self.postprocess_op = build_post_process(postprocess_params) - self.predictor, self.input_tensor, self.output_tensors, _ = \ - utility.create_predictor(args, 'cls', logger) - self.use_onnx = args.use_onnx - - def resize_norm_img(self, img): - imgC, imgH, imgW = self.cls_image_shape - h = img.shape[0] - w = img.shape[1] - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - if self.cls_image_shape[0] == 1: - resized_image = resized_image / 255 - resized_image = resized_image[np.newaxis, :] - else: - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - return padding_im - - def __call__(self, img_list): - img_list = copy.deepcopy(img_list) - img_num = len(img_list) - # Calculate the aspect ratio of all text bars - width_list = [] - for img in img_list: - width_list.append(img.shape[1] / float(img.shape[0])) - # Sorting can speed up the cls process - indices = np.argsort(np.array(width_list)) - - cls_res = [['', 0.0]] * img_num - batch_num = self.cls_batch_num - elapse = 0 - for beg_img_no in range(0, img_num, batch_num): - - end_img_no = min(img_num, beg_img_no + batch_num) - norm_img_batch = [] - max_wh_ratio = 0 - starttime = time.time() - for ino in range(beg_img_no, end_img_no): - h, w = img_list[indices[ino]].shape[0:2] - wh_ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - for ino in range(beg_img_no, end_img_no): - norm_img = self.resize_norm_img(img_list[indices[ino]]) - norm_img = norm_img[np.newaxis, :] - norm_img_batch.append(norm_img) - norm_img_batch = np.concatenate(norm_img_batch) - norm_img_batch = norm_img_batch.copy() - - if self.use_onnx: - input_dict = {} - input_dict[self.input_tensor.name] = norm_img_batch - outputs = self.predictor.run(self.output_tensors, input_dict) - prob_out = outputs[0] - else: - self.input_tensor.copy_from_cpu(norm_img_batch) - self.predictor.run() - prob_out = self.output_tensors[0].copy_to_cpu() - self.predictor.try_shrink_memory() - cls_result = self.postprocess_op(prob_out) - elapse += time.time() - starttime - for rno in range(len(cls_result)): - label, score = cls_result[rno] - cls_res[indices[beg_img_no + rno]] = [label, score] - if '180' in label and score > self.cls_thresh: - img_list[indices[beg_img_no + rno]] = cv2.rotate( - img_list[indices[beg_img_no + rno]], 1) - return img_list, cls_res, elapse - - -def main(args): - image_file_list = get_image_file_list(args.image_dir) - text_classifier = TextClassifier(args) - valid_image_file_list = [] - img_list = [] - for image_file in image_file_list: - img, flag = check_and_read_gif(image_file) - if not flag: - img = cv2.imread(image_file) - if img is None: - logger.info("error in loading image:{}".format(image_file)) - continue - valid_image_file_list.append(image_file) - img_list.append(img) - try: - img_list, cls_res, predict_time = text_classifier(img_list) - except Exception as E: - logger.info(traceback.format_exc()) - logger.info(E) - exit() - for ino in range(len(img_list)): - logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], - cls_res[ino])) - - -if __name__ == "__main__": - main(utility.parse_args()) diff --git a/backend/tools/infer/predict_det.py b/backend/tools/infer/predict_det.py deleted file mode 100755 index 5f2675d6..00000000 --- a/backend/tools/infer/predict_det.py +++ /dev/null @@ -1,302 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import numpy as np -import time -import sys - -import tools.infer.utility as utility -from ppocr.utils.logging import get_logger -from ppocr.utils.utility import get_image_file_list, check_and_read_gif -from ppocr.data import create_operators, transform -from ppocr.postprocess import build_post_process -import json -logger = get_logger() - - -class TextDetector(object): - def __init__(self, args): - self.args = args - self.det_algorithm = args.det_algorithm - self.use_onnx = args.use_onnx - pre_process_list = [{ - 'DetResizeForTest': { - 'limit_side_len': args.det_limit_side_len, - 'limit_type': args.det_limit_type, - } - }, { - 'NormalizeImage': { - 'std': [0.229, 0.224, 0.225], - 'mean': [0.485, 0.456, 0.406], - 'scale': '1./255.', - 'order': 'hwc' - } - }, { - 'ToCHWImage': None - }, { - 'KeepKeys': { - 'keep_keys': ['image', 'shape'] - } - }] - postprocess_params = {} - if self.det_algorithm == "DB": - postprocess_params['name'] = 'DBPostProcess' - postprocess_params["thresh"] = args.det_db_thresh - postprocess_params["box_thresh"] = args.det_db_box_thresh - postprocess_params["max_candidates"] = 1000 - postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio - postprocess_params["use_dilation"] = args.use_dilation - postprocess_params["score_mode"] = args.det_db_score_mode - elif self.det_algorithm == "EAST": - postprocess_params['name'] = 'EASTPostProcess' - postprocess_params["score_thresh"] = args.det_east_score_thresh - postprocess_params["cover_thresh"] = args.det_east_cover_thresh - postprocess_params["nms_thresh"] = args.det_east_nms_thresh - elif self.det_algorithm == "SAST": - pre_process_list[0] = { - 'DetResizeForTest': { - 'resize_long': args.det_limit_side_len - } - } - postprocess_params['name'] = 'SASTPostProcess' - postprocess_params["score_thresh"] = args.det_sast_score_thresh - postprocess_params["nms_thresh"] = args.det_sast_nms_thresh - self.det_sast_polygon = args.det_sast_polygon - if self.det_sast_polygon: - postprocess_params["sample_pts_num"] = 6 - postprocess_params["expand_scale"] = 1.2 - postprocess_params["shrink_ratio_of_width"] = 0.2 - else: - postprocess_params["sample_pts_num"] = 2 - postprocess_params["expand_scale"] = 1.0 - postprocess_params["shrink_ratio_of_width"] = 0.3 - elif self.det_algorithm == "PSE": - postprocess_params['name'] = 'PSEPostProcess' - postprocess_params["thresh"] = args.det_pse_thresh - postprocess_params["box_thresh"] = args.det_pse_box_thresh - postprocess_params["min_area"] = args.det_pse_min_area - postprocess_params["box_type"] = args.det_pse_box_type - postprocess_params["scale"] = args.det_pse_scale - self.det_pse_box_type = args.det_pse_box_type - elif self.det_algorithm == "FCE": - pre_process_list[0] = { - 'DetResizeForTest': { - 'rescale_img': [1080, 736] - } - } - postprocess_params['name'] = 'FCEPostProcess' - postprocess_params["scales"] = args.scales - postprocess_params["alpha"] = args.alpha - postprocess_params["beta"] = args.beta - postprocess_params["fourier_degree"] = args.fourier_degree - postprocess_params["box_type"] = args.det_fce_box_type - else: - logger.info("unknown det_algorithm:{}".format(self.det_algorithm)) - sys.exit(0) - - self.preprocess_op = create_operators(pre_process_list) - self.postprocess_op = build_post_process(postprocess_params) - self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor( - args, 'det', logger) - - if self.use_onnx: - img_h, img_w = self.input_tensor.shape[2:] - if img_h is not None and img_w is not None and img_h > 0 and img_w > 0: - pre_process_list[0] = { - 'DetResizeForTest': { - 'image_shape': [img_h, img_w] - } - } - self.preprocess_op = create_operators(pre_process_list) - - if args.benchmark: - import auto_log - pid = os.getpid() - gpu_id = utility.get_infer_gpuid() - self.autolog = auto_log.AutoLogger( - model_name="det", - model_precision=args.precision, - batch_size=1, - data_shape="dynamic", - save_path=None, - inference_config=self.config, - pids=pid, - process_name=None, - gpu_ids=gpu_id if args.use_gpu else None, - time_keys=[ - 'preprocess_time', 'inference_time', 'postprocess_time' - ], - warmup=2, - logger=logger) - - def order_points_clockwise(self, pts): - rect = np.zeros((4, 2), dtype="float32") - s = pts.sum(axis=1) - rect[0] = pts[np.argmin(s)] - rect[2] = pts[np.argmax(s)] - diff = np.diff(pts, axis=1) - rect[1] = pts[np.argmin(diff)] - rect[3] = pts[np.argmax(diff)] - return rect - - def clip_det_res(self, points, img_height, img_width): - for pno in range(points.shape[0]): - points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) - points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) - return points - - def filter_tag_det_res(self, dt_boxes, image_shape): - img_height, img_width = image_shape[0:2] - dt_boxes_new = [] - for box in dt_boxes: - box = self.order_points_clockwise(box) - box = self.clip_det_res(box, img_height, img_width) - rect_width = int(np.linalg.norm(box[0] - box[1])) - rect_height = int(np.linalg.norm(box[0] - box[3])) - if rect_width <= 3 or rect_height <= 3: - continue - dt_boxes_new.append(box) - dt_boxes = np.array(dt_boxes_new) - return dt_boxes - - def filter_tag_det_res_only_clip(self, dt_boxes, image_shape): - img_height, img_width = image_shape[0:2] - dt_boxes_new = [] - for box in dt_boxes: - box = self.clip_det_res(box, img_height, img_width) - dt_boxes_new.append(box) - dt_boxes = np.array(dt_boxes_new) - return dt_boxes - - def __call__(self, img): - ori_im = img.copy() - data = {'image': img} - - st = time.time() - - if self.args.benchmark: - self.autolog.times.start() - - data = transform(data, self.preprocess_op) - img, shape_list = data - if img is None: - return None, 0 - img = np.expand_dims(img, axis=0) - shape_list = np.expand_dims(shape_list, axis=0) - img = img.copy() - - if self.args.benchmark: - self.autolog.times.stamp() - if self.use_onnx: - input_dict = {} - input_dict[self.input_tensor.name] = img - outputs = self.predictor.run(self.output_tensors, input_dict) - else: - self.input_tensor.copy_from_cpu(img) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.args.benchmark: - self.autolog.times.stamp() - - preds = {} - if self.det_algorithm == "EAST": - preds['f_geo'] = outputs[0] - preds['f_score'] = outputs[1] - elif self.det_algorithm == 'SAST': - preds['f_border'] = outputs[0] - preds['f_score'] = outputs[1] - preds['f_tco'] = outputs[2] - preds['f_tvo'] = outputs[3] - elif self.det_algorithm in ['DB', 'PSE']: - preds['maps'] = outputs[0] - elif self.det_algorithm == 'FCE': - for i, output in enumerate(outputs): - preds['level_{}'.format(i)] = output - else: - raise NotImplementedError - - #self.predictor.try_shrink_memory() - post_result = self.postprocess_op(preds, shape_list) - dt_boxes = post_result[0]['points'] - if (self.det_algorithm == "SAST" and self.det_sast_polygon) or ( - self.det_algorithm in ["PSE", "FCE"] and - self.postprocess_op.box_type == 'poly'): - dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape) - else: - dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape) - - if self.args.benchmark: - self.autolog.times.end(stamp=True) - et = time.time() - return dt_boxes, et - st - - -if __name__ == "__main__": - args = utility.parse_args() - image_file_list = get_image_file_list(args.image_dir) - text_detector = TextDetector(args) - count = 0 - total_time = 0 - draw_img_save = "./inference_results" - - if args.warmup: - img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8) - for i in range(2): - res = text_detector(img) - - if not os.path.exists(draw_img_save): - os.makedirs(draw_img_save) - save_results = [] - for image_file in image_file_list: - img, flag = check_and_read_gif(image_file) - if not flag: - img = cv2.imread(image_file) - if img is None: - logger.info("error in loading image:{}".format(image_file)) - continue - st = time.time() - dt_boxes, _ = text_detector(img) - elapse = time.time() - st - if count > 0: - total_time += elapse - count += 1 - save_pred = os.path.basename(image_file) + "\t" + str( - json.dumps([x.tolist() for x in dt_boxes])) + "\n" - save_results.append(save_pred) - logger.info(save_pred) - logger.info("The predict time of {}: {}".format(image_file, elapse)) - src_im = utility.draw_text_det_res(dt_boxes, image_file) - img_name_pure = os.path.split(image_file)[-1] - img_path = os.path.join(draw_img_save, - "det_res_{}".format(img_name_pure)) - cv2.imwrite(img_path, src_im) - logger.info("The visualized image saved in {}".format(img_path)) - - with open(os.path.join(draw_img_save, "det_results.txt"), 'w') as f: - f.writelines(save_results) - f.close() - if args.benchmark: - text_detector.autolog.report() diff --git a/backend/tools/infer/predict_e2e.py b/backend/tools/infer/predict_e2e.py deleted file mode 100755 index fb2859f0..00000000 --- a/backend/tools/infer/predict_e2e.py +++ /dev/null @@ -1,169 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import numpy as np -import time -import sys - -import tools.infer.utility as utility -from ppocr.utils.logging import get_logger -from ppocr.utils.utility import get_image_file_list, check_and_read_gif -from ppocr.data import create_operators, transform -from ppocr.postprocess import build_post_process - -logger = get_logger() - - -class TextE2E(object): - def __init__(self, args): - self.args = args - self.e2e_algorithm = args.e2e_algorithm - self.use_onnx = args.use_onnx - pre_process_list = [{ - 'E2EResizeForTest': {} - }, { - 'NormalizeImage': { - 'std': [0.229, 0.224, 0.225], - 'mean': [0.485, 0.456, 0.406], - 'scale': '1./255.', - 'order': 'hwc' - } - }, { - 'ToCHWImage': None - }, { - 'KeepKeys': { - 'keep_keys': ['image', 'shape'] - } - }] - postprocess_params = {} - if self.e2e_algorithm == "PGNet": - pre_process_list[0] = { - 'E2EResizeForTest': { - 'max_side_len': args.e2e_limit_side_len, - 'valid_set': 'totaltext' - } - } - postprocess_params['name'] = 'PGPostProcess' - postprocess_params["score_thresh"] = args.e2e_pgnet_score_thresh - postprocess_params["character_dict_path"] = args.e2e_char_dict_path - postprocess_params["valid_set"] = args.e2e_pgnet_valid_set - postprocess_params["mode"] = args.e2e_pgnet_mode - else: - logger.info("unknown e2e_algorithm:{}".format(self.e2e_algorithm)) - sys.exit(0) - - self.preprocess_op = create_operators(pre_process_list) - self.postprocess_op = build_post_process(postprocess_params) - self.predictor, self.input_tensor, self.output_tensors, _ = utility.create_predictor( - args, 'e2e', logger) # paddle.jit.load(args.det_model_dir) - # self.predictor.eval() - - def clip_det_res(self, points, img_height, img_width): - for pno in range(points.shape[0]): - points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) - points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) - return points - - def filter_tag_det_res_only_clip(self, dt_boxes, image_shape): - img_height, img_width = image_shape[0:2] - dt_boxes_new = [] - for box in dt_boxes: - box = self.clip_det_res(box, img_height, img_width) - dt_boxes_new.append(box) - dt_boxes = np.array(dt_boxes_new) - return dt_boxes - - def __call__(self, img): - - ori_im = img.copy() - data = {'image': img} - data = transform(data, self.preprocess_op) - img, shape_list = data - if img is None: - return None, 0 - img = np.expand_dims(img, axis=0) - shape_list = np.expand_dims(shape_list, axis=0) - img = img.copy() - starttime = time.time() - - if self.use_onnx: - input_dict = {} - input_dict[self.input_tensor.name] = img - outputs = self.predictor.run(self.output_tensors, input_dict) - preds = {} - preds['f_border'] = outputs[0] - preds['f_char'] = outputs[1] - preds['f_direction'] = outputs[2] - preds['f_score'] = outputs[3] - else: - self.input_tensor.copy_from_cpu(img) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - - preds = {} - if self.e2e_algorithm == 'PGNet': - preds['f_border'] = outputs[0] - preds['f_char'] = outputs[1] - preds['f_direction'] = outputs[2] - preds['f_score'] = outputs[3] - else: - raise NotImplementedError - post_result = self.postprocess_op(preds, shape_list) - points, strs = post_result['points'], post_result['texts'] - dt_boxes = self.filter_tag_det_res_only_clip(points, ori_im.shape) - elapse = time.time() - starttime - return dt_boxes, strs, elapse - - -if __name__ == "__main__": - args = utility.parse_args() - image_file_list = get_image_file_list(args.image_dir) - text_detector = TextE2E(args) - count = 0 - total_time = 0 - draw_img_save = "./inference_results" - if not os.path.exists(draw_img_save): - os.makedirs(draw_img_save) - for image_file in image_file_list: - img, flag = check_and_read_gif(image_file) - if not flag: - img = cv2.imread(image_file) - if img is None: - logger.info("error in loading image:{}".format(image_file)) - continue - points, strs, elapse = text_detector(img) - if count > 0: - total_time += elapse - count += 1 - logger.info("Predict time of {}: {}".format(image_file, elapse)) - src_im = utility.draw_e2e_res(points, strs, image_file) - img_name_pure = os.path.split(image_file)[-1] - img_path = os.path.join(draw_img_save, - "e2e_res_{}".format(img_name_pure)) - cv2.imwrite(img_path, src_im) - logger.info("The visualized image saved in {}".format(img_path)) - if count > 1: - logger.info("Avg Time: {}".format(total_time / (count - 1))) diff --git a/backend/tools/infer/predict_rec.py b/backend/tools/infer/predict_rec.py deleted file mode 100755 index 3664ef2c..00000000 --- a/backend/tools/infer/predict_rec.py +++ /dev/null @@ -1,442 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -from PIL import Image -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import numpy as np -import math -import time -import traceback -import paddle - -import tools.infer.utility as utility -from ppocr.postprocess import build_post_process -from ppocr.utils.logging import get_logger -from ppocr.utils.utility import get_image_file_list, check_and_read_gif - -logger = get_logger() - - -class TextRecognizer(object): - def __init__(self, args): - self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")] - self.rec_batch_num = args.rec_batch_num - self.rec_algorithm = args.rec_algorithm - postprocess_params = { - 'name': 'CTCLabelDecode', - "character_dict_path": args.rec_char_dict_path, - "use_space_char": args.use_space_char - } - if self.rec_algorithm == "SRN": - postprocess_params = { - 'name': 'SRNLabelDecode', - "character_dict_path": args.rec_char_dict_path, - "use_space_char": args.use_space_char - } - elif self.rec_algorithm == "RARE": - postprocess_params = { - 'name': 'AttnLabelDecode', - "character_dict_path": args.rec_char_dict_path, - "use_space_char": args.use_space_char - } - elif self.rec_algorithm == 'NRTR': - postprocess_params = { - 'name': 'NRTRLabelDecode', - "character_dict_path": args.rec_char_dict_path, - "use_space_char": args.use_space_char - } - elif self.rec_algorithm == "SAR": - postprocess_params = { - 'name': 'SARLabelDecode', - "character_dict_path": args.rec_char_dict_path, - "use_space_char": args.use_space_char - } - self.postprocess_op = build_post_process(postprocess_params) - self.predictor, self.input_tensor, self.output_tensors, self.config = \ - utility.create_predictor(args, 'rec', logger) - self.benchmark = args.benchmark - self.use_onnx = args.use_onnx - if args.benchmark: - import auto_log - pid = os.getpid() - gpu_id = utility.get_infer_gpuid() - self.autolog = auto_log.AutoLogger( - model_name="rec", - model_precision=args.precision, - batch_size=args.rec_batch_num, - data_shape="dynamic", - save_path=None, #args.save_log_path, - inference_config=self.config, - pids=pid, - process_name=None, - gpu_ids=gpu_id if args.use_gpu else None, - time_keys=[ - 'preprocess_time', 'inference_time', 'postprocess_time' - ], - warmup=0, - logger=logger) - - def resize_norm_img(self, img, max_wh_ratio): - imgC, imgH, imgW = self.rec_image_shape - if self.rec_algorithm == 'NRTR': - img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - # return padding_im - image_pil = Image.fromarray(np.uint8(img)) - img = image_pil.resize([100, 32], Image.ANTIALIAS) - img = np.array(img) - norm_img = np.expand_dims(img, -1) - norm_img = norm_img.transpose((2, 0, 1)) - return norm_img.astype(np.float32) / 128. - 1. - - assert imgC == img.shape[2] - imgW = int((imgH * max_wh_ratio)) - if self.use_onnx: - w = self.input_tensor.shape[3:][0] - if w is not None and w > 0: - imgW = w - - h, w = img.shape[:2] - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - if self.rec_algorithm == 'RARE': - if resized_w > self.rec_image_shape[2]: - resized_w = self.rec_image_shape[2] - imgW = self.rec_image_shape[2] - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - return padding_im - - def resize_norm_img_svtr(self, img, image_shape): - - imgC, imgH, imgW = image_shape - resized_image = cv2.resize( - img, (imgW, imgH), interpolation=cv2.INTER_LINEAR) - resized_image = resized_image.astype('float32') - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - return resized_image - - def resize_norm_img_srn(self, img, image_shape): - imgC, imgH, imgW = image_shape - - img_black = np.zeros((imgH, imgW)) - im_hei = img.shape[0] - im_wid = img.shape[1] - - if im_wid <= im_hei * 1: - img_new = cv2.resize(img, (imgH * 1, imgH)) - elif im_wid <= im_hei * 2: - img_new = cv2.resize(img, (imgH * 2, imgH)) - elif im_wid <= im_hei * 3: - img_new = cv2.resize(img, (imgH * 3, imgH)) - else: - img_new = cv2.resize(img, (imgW, imgH)) - - img_np = np.asarray(img_new) - img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY) - img_black[:, 0:img_np.shape[1]] = img_np - img_black = img_black[:, :, np.newaxis] - - row, col, c = img_black.shape - c = 1 - - return np.reshape(img_black, (c, row, col)).astype(np.float32) - - def srn_other_inputs(self, image_shape, num_heads, max_text_length): - - imgC, imgH, imgW = image_shape - feature_dim = int((imgH / 8) * (imgW / 8)) - - encoder_word_pos = np.array(range(0, feature_dim)).reshape( - (feature_dim, 1)).astype('int64') - gsrm_word_pos = np.array(range(0, max_text_length)).reshape( - (max_text_length, 1)).astype('int64') - - gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length)) - gsrm_slf_attn_bias1 = np.triu(gsrm_attn_bias_data, 1).reshape( - [-1, 1, max_text_length, max_text_length]) - gsrm_slf_attn_bias1 = np.tile( - gsrm_slf_attn_bias1, - [1, num_heads, 1, 1]).astype('float32') * [-1e9] - - gsrm_slf_attn_bias2 = np.tril(gsrm_attn_bias_data, -1).reshape( - [-1, 1, max_text_length, max_text_length]) - gsrm_slf_attn_bias2 = np.tile( - gsrm_slf_attn_bias2, - [1, num_heads, 1, 1]).astype('float32') * [-1e9] - - encoder_word_pos = encoder_word_pos[np.newaxis, :] - gsrm_word_pos = gsrm_word_pos[np.newaxis, :] - - return [ - encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, - gsrm_slf_attn_bias2 - ] - - def process_image_srn(self, img, image_shape, num_heads, max_text_length): - norm_img = self.resize_norm_img_srn(img, image_shape) - norm_img = norm_img[np.newaxis, :] - - [encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2] = \ - self.srn_other_inputs(image_shape, num_heads, max_text_length) - - gsrm_slf_attn_bias1 = gsrm_slf_attn_bias1.astype(np.float32) - gsrm_slf_attn_bias2 = gsrm_slf_attn_bias2.astype(np.float32) - encoder_word_pos = encoder_word_pos.astype(np.int64) - gsrm_word_pos = gsrm_word_pos.astype(np.int64) - - return (norm_img, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, - gsrm_slf_attn_bias2) - - def resize_norm_img_sar(self, img, image_shape, - width_downsample_ratio=0.25): - imgC, imgH, imgW_min, imgW_max = image_shape - h = img.shape[0] - w = img.shape[1] - valid_ratio = 1.0 - # make sure new_width is an integral multiple of width_divisor. - width_divisor = int(1 / width_downsample_ratio) - # resize - ratio = w / float(h) - resize_w = math.ceil(imgH * ratio) - if resize_w % width_divisor != 0: - resize_w = round(resize_w / width_divisor) * width_divisor - if imgW_min is not None: - resize_w = max(imgW_min, resize_w) - if imgW_max is not None: - valid_ratio = min(1.0, 1.0 * resize_w / imgW_max) - resize_w = min(imgW_max, resize_w) - resized_image = cv2.resize(img, (resize_w, imgH)) - resized_image = resized_image.astype('float32') - # norm - if image_shape[0] == 1: - resized_image = resized_image / 255 - resized_image = resized_image[np.newaxis, :] - else: - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - resize_shape = resized_image.shape - padding_im = -1.0 * np.ones((imgC, imgH, imgW_max), dtype=np.float32) - padding_im[:, :, 0:resize_w] = resized_image - pad_shape = padding_im.shape - - return padding_im, resize_shape, pad_shape, valid_ratio - - def __call__(self, img_list): - img_num = len(img_list) - # Calculate the aspect ratio of all text bars - width_list = [] - for img in img_list: - width_list.append(img.shape[1] / float(img.shape[0])) - # Sorting can speed up the recognition process - indices = np.argsort(np.array(width_list)) - rec_res = [['', 0.0]] * img_num - batch_num = self.rec_batch_num - st = time.time() - if self.benchmark: - self.autolog.times.start() - for beg_img_no in range(0, img_num, batch_num): - end_img_no = min(img_num, beg_img_no + batch_num) - norm_img_batch = [] - imgC, imgH, imgW = self.rec_image_shape - max_wh_ratio = imgW / imgH - # max_wh_ratio = 0 - for ino in range(beg_img_no, end_img_no): - h, w = img_list[indices[ino]].shape[0:2] - wh_ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - for ino in range(beg_img_no, end_img_no): - - if self.rec_algorithm == "SAR": - norm_img, _, _, valid_ratio = self.resize_norm_img_sar( - img_list[indices[ino]], self.rec_image_shape) - norm_img = norm_img[np.newaxis, :] - valid_ratio = np.expand_dims(valid_ratio, axis=0) - valid_ratios = [] - valid_ratios.append(valid_ratio) - norm_img_batch.append(norm_img) - elif self.rec_algorithm == "SRN": - norm_img = self.process_image_srn( - img_list[indices[ino]], self.rec_image_shape, 8, 25) - encoder_word_pos_list = [] - gsrm_word_pos_list = [] - gsrm_slf_attn_bias1_list = [] - gsrm_slf_attn_bias2_list = [] - encoder_word_pos_list.append(norm_img[1]) - gsrm_word_pos_list.append(norm_img[2]) - gsrm_slf_attn_bias1_list.append(norm_img[3]) - gsrm_slf_attn_bias2_list.append(norm_img[4]) - norm_img_batch.append(norm_img[0]) - elif self.rec_algorithm == "SVTR": - norm_img = self.resize_norm_img_svtr(img_list[indices[ino]], - self.rec_image_shape) - norm_img = norm_img[np.newaxis, :] - norm_img_batch.append(norm_img) - else: - norm_img = self.resize_norm_img(img_list[indices[ino]], - max_wh_ratio) - norm_img = norm_img[np.newaxis, :] - norm_img_batch.append(norm_img) - norm_img_batch = np.concatenate(norm_img_batch) - norm_img_batch = norm_img_batch.copy() - if self.benchmark: - self.autolog.times.stamp() - - if self.rec_algorithm == "SRN": - encoder_word_pos_list = np.concatenate(encoder_word_pos_list) - gsrm_word_pos_list = np.concatenate(gsrm_word_pos_list) - gsrm_slf_attn_bias1_list = np.concatenate( - gsrm_slf_attn_bias1_list) - gsrm_slf_attn_bias2_list = np.concatenate( - gsrm_slf_attn_bias2_list) - - inputs = [ - norm_img_batch, - encoder_word_pos_list, - gsrm_word_pos_list, - gsrm_slf_attn_bias1_list, - gsrm_slf_attn_bias2_list, - ] - if self.use_onnx: - input_dict = {} - input_dict[self.input_tensor.name] = norm_img_batch - outputs = self.predictor.run(self.output_tensors, - input_dict) - preds = {"predict": outputs[2]} - else: - input_names = self.predictor.get_input_names() - for i in range(len(input_names)): - input_tensor = self.predictor.get_input_handle( - input_names[i]) - input_tensor.copy_from_cpu(inputs[i]) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.benchmark: - self.autolog.times.stamp() - preds = {"predict": outputs[2]} - elif self.rec_algorithm == "SAR": - valid_ratios = np.concatenate(valid_ratios) - inputs = [ - norm_img_batch, - valid_ratios, - ] - if self.use_onnx: - input_dict = {} - input_dict[self.input_tensor.name] = norm_img_batch - outputs = self.predictor.run(self.output_tensors, - input_dict) - preds = outputs[0] - else: - input_names = self.predictor.get_input_names() - for i in range(len(input_names)): - input_tensor = self.predictor.get_input_handle( - input_names[i]) - input_tensor.copy_from_cpu(inputs[i]) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.benchmark: - self.autolog.times.stamp() - preds = outputs[0] - else: - if self.use_onnx: - input_dict = {} - input_dict[self.input_tensor.name] = norm_img_batch - outputs = self.predictor.run(self.output_tensors, - input_dict) - preds = outputs[0] - else: - self.input_tensor.copy_from_cpu(norm_img_batch) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.benchmark: - self.autolog.times.stamp() - if len(outputs) != 1: - preds = outputs - else: - preds = outputs[0] - rec_result = self.postprocess_op(preds) - for rno in range(len(rec_result)): - rec_res[indices[beg_img_no + rno]] = rec_result[rno] - if self.benchmark: - self.autolog.times.end(stamp=True) - return rec_res, time.time() - st - - -def main(args): - image_file_list = get_image_file_list(args.image_dir) - text_recognizer = TextRecognizer(args) - valid_image_file_list = [] - img_list = [] - - logger.info( - "In PP-OCRv3, rec_image_shape parameter defaults to '3, 48, 320', " - "if you are using recognition model with PP-OCRv2 or an older version, please set --rec_image_shape='3,32,320" - ) - # warmup 2 times - if args.warmup: - img = np.random.uniform(0, 255, [48, 320, 3]).astype(np.uint8) - for i in range(2): - res = text_recognizer([img] * int(args.rec_batch_num)) - - for image_file in image_file_list: - img, flag = check_and_read_gif(image_file) - if not flag: - img = cv2.imread(image_file) - if img is None: - logger.info("error in loading image:{}".format(image_file)) - continue - valid_image_file_list.append(image_file) - img_list.append(img) - try: - rec_res, _ = text_recognizer(img_list) - - except Exception as E: - logger.info(traceback.format_exc()) - logger.info(E) - exit() - for ino in range(len(img_list)): - logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], - rec_res[ino])) - if args.benchmark: - text_recognizer.autolog.report() - - -if __name__ == "__main__": - main(utility.parse_args()) diff --git a/backend/tools/infer/predict_system.py b/backend/tools/infer/predict_system.py deleted file mode 100755 index 4af3da70..00000000 --- a/backend/tools/infer/predict_system.py +++ /dev/null @@ -1,210 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -import subprocess - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import copy -import numpy as np -import json -import time -import logging -from PIL import Image -import tools.infer.utility as utility -import tools.infer.predict_rec as predict_rec -import tools.infer.predict_det as predict_det -import tools.infer.predict_cls as predict_cls -from ppocr.utils.utility import get_image_file_list, check_and_read_gif -from ppocr.utils.logging import get_logger -from tools.infer.utility import draw_ocr_box_txt, get_rotate_crop_image -logger = get_logger() - - -class TextSystem(object): - def __init__(self, args): - if not args.show_log: - logger.setLevel(logging.INFO) - - self.text_detector = predict_det.TextDetector(args) - self.text_recognizer = predict_rec.TextRecognizer(args) - self.use_angle_cls = args.use_angle_cls - self.drop_score = args.drop_score - if self.use_angle_cls: - self.text_classifier = predict_cls.TextClassifier(args) - - self.args = args - self.crop_image_res_index = 0 - - def draw_crop_rec_res(self, output_dir, img_crop_list, rec_res): - os.makedirs(output_dir, exist_ok=True) - bbox_num = len(img_crop_list) - for bno in range(bbox_num): - cv2.imwrite( - os.path.join(output_dir, - f"mg_crop_{bno+self.crop_image_res_index}.jpg"), - img_crop_list[bno]) - logger.debug(f"{bno}, {rec_res[bno]}") - self.crop_image_res_index += bbox_num - - def __call__(self, img, cls=True): - ori_im = img.copy() - dt_boxes, elapse = self.text_detector(img) - - if dt_boxes is None: - return None, None - img_crop_list = [] - - dt_boxes = sorted_boxes(dt_boxes) - - for bno in range(len(dt_boxes)): - tmp_box = copy.deepcopy(dt_boxes[bno]) - img_crop = get_rotate_crop_image(ori_im, tmp_box) - img_crop_list.append(img_crop) - if self.use_angle_cls and cls: - img_crop_list, angle_list, elapse = self.text_classifier( - img_crop_list) - - - rec_res, elapse = self.text_recognizer(img_crop_list) - if self.args.save_crop_res: - self.draw_crop_rec_res(self.args.crop_res_save_dir, img_crop_list, - rec_res) - filter_boxes, filter_rec_res = [], [] - for box, rec_result in zip(dt_boxes, rec_res): - text, score = rec_result - if score >= self.drop_score: - filter_boxes.append(box) - filter_rec_res.append(rec_result) - return filter_boxes, filter_rec_res - - -def sorted_boxes(dt_boxes): - """ - Sort text boxes in order from top to bottom, left to right - args: - dt_boxes(array):detected text boxes with shape [4, 2] - return: - sorted boxes(array) with shape [4, 2] - """ - num_boxes = dt_boxes.shape[0] - sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) - _boxes = list(sorted_boxes) - - for i in range(num_boxes - 1): - if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \ - (_boxes[i + 1][0][0] < _boxes[i][0][0]): - tmp = _boxes[i] - _boxes[i] = _boxes[i + 1] - _boxes[i + 1] = tmp - return _boxes - - -def main(args): - image_file_list = get_image_file_list(args.image_dir) - image_file_list = image_file_list[args.process_id::args.total_process_num] - text_sys = TextSystem(args) - is_visualize = True - font_path = args.vis_font_path - drop_score = args.drop_score - draw_img_save_dir = args.draw_img_save_dir - os.makedirs(draw_img_save_dir, exist_ok=True) - save_results = [] - - logger.info("In PP-OCRv3, rec_image_shape parameter defaults to '3, 48, 320', " - "if you are using recognition model with PP-OCRv2 or an older version, please set --rec_image_shape='3,32,320") - - # warm up 10 times - if args.warmup: - img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8) - for i in range(10): - res = text_sys(img) - - total_time = 0 - cpu_mem, gpu_mem, gpu_util = 0, 0, 0 - _st = time.time() - count = 0 - for idx, image_file in enumerate(image_file_list): - - img, flag = check_and_read_gif(image_file) - if not flag: - img = cv2.imread(image_file) - if img is None: - logger.debug("error in loading image:{}".format(image_file)) - continue - starttime = time.time() - dt_boxes, rec_res = text_sys(img) - elapse = time.time() - starttime - total_time += elapse - - - res = [{ - "transcription": rec_res[idx][0], - "points": np.array(dt_boxes[idx]).astype(np.int32).tolist(), - } for idx in range(len(dt_boxes))] - save_pred = os.path.basename(image_file) + "\t" + json.dumps( - res, ensure_ascii=False) + "\n" - save_results.append(save_pred) - - if is_visualize: - image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) - boxes = dt_boxes - txts = [rec_res[i][0] for i in range(len(rec_res))] - scores = [rec_res[i][1] for i in range(len(rec_res))] - - draw_img = draw_ocr_box_txt( - image, - boxes, - txts, - scores, - drop_score=drop_score, - font_path=font_path) - if flag: - image_file = image_file[:-3] + "png" - cv2.imwrite( - os.path.join(draw_img_save_dir, os.path.basename(image_file)), - draw_img[:, :, ::-1]) - - - logger.info("The predict total time is {}".format(time.time() - _st)) - if args.benchmark: - text_sys.text_detector.autolog.report() - text_sys.text_recognizer.autolog.report() - - with open(os.path.join(draw_img_save_dir, "system_results.txt"), 'w', encoding='utf-8') as f: - f.writelines(save_results) - - -if __name__ == "__main__": - args = utility.parse_args() - if args.use_mp: - p_list = [] - total_process_num = args.total_process_num - for process_id in range(total_process_num): - cmd = [sys.executable, "-u"] + sys.argv + [ - "--process_id={}".format(process_id), - "--use_mp={}".format(False) - ] - p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout) - p_list.append(p) - for p in p_list: - p.wait() - else: - main(args) diff --git a/backend/tools/infer/utility.py b/backend/tools/infer/utility.py deleted file mode 100644 index 29b3755e..00000000 --- a/backend/tools/infer/utility.py +++ /dev/null @@ -1,645 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import sys -import platform -import cv2 -import numpy as np -import paddle -from PIL import Image, ImageDraw, ImageFont -import math -from paddle import inference -import time -from ppocr.utils.logging import get_logger - - -def str2bool(v): - return v.lower() in ("true", "t", "1") - - -def init_args(): - parser = argparse.ArgumentParser() - # params for prediction engine - parser.add_argument("--use_gpu", type=str2bool, default=True) - parser.add_argument("--ir_optim", type=str2bool, default=True) - parser.add_argument("--use_tensorrt", type=str2bool, default=False) - parser.add_argument("--min_subgraph_size", type=int, default=15) - parser.add_argument("--precision", type=str, default="fp32") - parser.add_argument("--gpu_mem", type=int, default=500) - - # params for text detector - parser.add_argument("--image_dir", type=str) - parser.add_argument("--det_algorithm", type=str, default='DB') - parser.add_argument("--det_model_dir", type=str) - parser.add_argument("--det_limit_side_len", type=float, default=960) - parser.add_argument("--det_limit_type", type=str, default='max') - - # DB parmas - parser.add_argument("--det_db_thresh", type=float, default=0.3) - parser.add_argument("--det_db_box_thresh", type=float, default=0.6) - parser.add_argument("--det_db_unclip_ratio", type=float, default=1.5) - parser.add_argument("--max_batch_size", type=int, default=10) - parser.add_argument("--use_dilation", type=str2bool, default=False) - parser.add_argument("--det_db_score_mode", type=str, default="fast") - # EAST parmas - parser.add_argument("--det_east_score_thresh", type=float, default=0.8) - parser.add_argument("--det_east_cover_thresh", type=float, default=0.1) - parser.add_argument("--det_east_nms_thresh", type=float, default=0.2) - - # SAST parmas - parser.add_argument("--det_sast_score_thresh", type=float, default=0.5) - parser.add_argument("--det_sast_nms_thresh", type=float, default=0.2) - parser.add_argument("--det_sast_polygon", type=str2bool, default=False) - - # PSE parmas - parser.add_argument("--det_pse_thresh", type=float, default=0) - parser.add_argument("--det_pse_box_thresh", type=float, default=0.85) - parser.add_argument("--det_pse_min_area", type=float, default=16) - parser.add_argument("--det_pse_box_type", type=str, default='quad') - parser.add_argument("--det_pse_scale", type=int, default=1) - - # FCE parmas - parser.add_argument("--scales", type=list, default=[8, 16, 32]) - parser.add_argument("--alpha", type=float, default=1.0) - parser.add_argument("--beta", type=float, default=1.0) - parser.add_argument("--fourier_degree", type=int, default=5) - parser.add_argument("--det_fce_box_type", type=str, default='poly') - - # params for text recognizer - parser.add_argument("--rec_algorithm", type=str, default='CRNN') - parser.add_argument("--rec_model_dir", type=str) - parser.add_argument("--rec_image_shape", type=str, default="3, 48, 320") - parser.add_argument("--rec_batch_num", type=int, default=6) - parser.add_argument("--max_text_length", type=int, default=25) - parser.add_argument( - "--rec_char_dict_path", - type=str, - default="./ppocr/utils/ppocr_keys_v1.txt") - parser.add_argument("--use_space_char", type=str2bool, default=True) - parser.add_argument( - "--vis_font_path", type=str, default="./doc/fonts/simfang.ttf") - parser.add_argument("--drop_score", type=float, default=0.5) - - # params for e2e - parser.add_argument("--e2e_algorithm", type=str, default='PGNet') - parser.add_argument("--e2e_model_dir", type=str) - parser.add_argument("--e2e_limit_side_len", type=float, default=768) - parser.add_argument("--e2e_limit_type", type=str, default='max') - - # PGNet parmas - parser.add_argument("--e2e_pgnet_score_thresh", type=float, default=0.5) - parser.add_argument( - "--e2e_char_dict_path", type=str, default="./ppocr/utils/ic15_dict.txt") - parser.add_argument("--e2e_pgnet_valid_set", type=str, default='totaltext') - parser.add_argument("--e2e_pgnet_mode", type=str, default='fast') - - # params for text classifier - parser.add_argument("--use_angle_cls", type=str2bool, default=False) - parser.add_argument("--cls_model_dir", type=str) - parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192") - parser.add_argument("--label_list", type=list, default=['0', '180']) - parser.add_argument("--cls_batch_num", type=int, default=6) - parser.add_argument("--cls_thresh", type=float, default=0.9) - - parser.add_argument("--enable_mkldnn", type=str2bool, default=False) - parser.add_argument("--cpu_threads", type=int, default=10) - parser.add_argument("--use_pdserving", type=str2bool, default=False) - parser.add_argument("--warmup", type=str2bool, default=False) - - # - parser.add_argument( - "--draw_img_save_dir", type=str, default="./inference_results") - parser.add_argument("--save_crop_res", type=str2bool, default=False) - parser.add_argument("--crop_res_save_dir", type=str, default="./output") - - # multi-process - parser.add_argument("--use_mp", type=str2bool, default=False) - parser.add_argument("--total_process_num", type=int, default=1) - parser.add_argument("--process_id", type=int, default=0) - - parser.add_argument("--benchmark", type=str2bool, default=False) - parser.add_argument("--save_log_path", type=str, default="./log_output/") - - parser.add_argument("--show_log", type=str2bool, default=False) - parser.add_argument("--use_onnx", type=str2bool, default=False) - return parser - - -def parse_args(): - parser = init_args() - return parser.parse_args() - - -def create_predictor(args, mode, logger): - if mode == "det": - model_dir = args.det_model_dir - elif mode == 'cls': - model_dir = args.cls_model_dir - elif mode == 'rec': - model_dir = args.rec_model_dir - elif mode == 'table': - model_dir = args.table_model_dir - else: - model_dir = args.e2e_model_dir - - if model_dir is None: - logger.info("not find {} model file path {}".format(mode, model_dir)) - sys.exit(0) - if args.use_onnx: - import onnxruntime as ort - model_file_path = model_dir - if not os.path.exists(model_file_path): - raise ValueError("not find model file path {}".format( - model_file_path)) - sess = ort.InferenceSession(model_file_path) - return sess, sess.get_inputs()[0], None, None - - else: - model_file_path = model_dir + "/inference.pdmodel" - params_file_path = model_dir + "/inference.pdiparams" - if not os.path.exists(model_file_path): - raise ValueError("not find model file path {}".format( - model_file_path)) - if not os.path.exists(params_file_path): - raise ValueError("not find params file path {}".format( - params_file_path)) - - config = inference.Config(model_file_path, params_file_path) - - if hasattr(args, 'precision'): - if args.precision == "fp16" and args.use_tensorrt: - precision = inference.PrecisionType.Half - elif args.precision == "int8": - precision = inference.PrecisionType.Int8 - else: - precision = inference.PrecisionType.Float32 - else: - precision = inference.PrecisionType.Float32 - - if args.use_gpu: - gpu_id = get_infer_gpuid() - if gpu_id is None: - logger.warning( - "GPU is not found in current device by nvidia-smi. Please check your device or ignore it if run on jetson." - ) - config.enable_use_gpu(args.gpu_mem, 0) - if args.use_tensorrt: - config.enable_tensorrt_engine( - workspace_size=1 << 30, - precision_mode=precision, - max_batch_size=args.max_batch_size, - min_subgraph_size=args.min_subgraph_size) - # skip the minmum trt subgraph - use_dynamic_shape = True - if mode == "det": - min_input_shape = { - "x": [1, 3, 50, 50], - "conv2d_92.tmp_0": [1, 120, 20, 20], - "conv2d_91.tmp_0": [1, 24, 10, 10], - "conv2d_59.tmp_0": [1, 96, 20, 20], - "nearest_interp_v2_1.tmp_0": [1, 256, 10, 10], - "nearest_interp_v2_2.tmp_0": [1, 256, 20, 20], - "conv2d_124.tmp_0": [1, 256, 20, 20], - "nearest_interp_v2_3.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_4.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_5.tmp_0": [1, 64, 20, 20], - "elementwise_add_7": [1, 56, 2, 2], - "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2] - } - max_input_shape = { - "x": [1, 3, 1536, 1536], - "conv2d_92.tmp_0": [1, 120, 400, 400], - "conv2d_91.tmp_0": [1, 24, 200, 200], - "conv2d_59.tmp_0": [1, 96, 400, 400], - "nearest_interp_v2_1.tmp_0": [1, 256, 200, 200], - "conv2d_124.tmp_0": [1, 256, 400, 400], - "nearest_interp_v2_2.tmp_0": [1, 256, 400, 400], - "nearest_interp_v2_3.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_4.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_5.tmp_0": [1, 64, 400, 400], - "elementwise_add_7": [1, 56, 400, 400], - "nearest_interp_v2_0.tmp_0": [1, 256, 400, 400] - } - opt_input_shape = { - "x": [1, 3, 640, 640], - "conv2d_92.tmp_0": [1, 120, 160, 160], - "conv2d_91.tmp_0": [1, 24, 80, 80], - "conv2d_59.tmp_0": [1, 96, 160, 160], - "nearest_interp_v2_1.tmp_0": [1, 256, 80, 80], - "nearest_interp_v2_2.tmp_0": [1, 256, 160, 160], - "conv2d_124.tmp_0": [1, 256, 160, 160], - "nearest_interp_v2_3.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_4.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_5.tmp_0": [1, 64, 160, 160], - "elementwise_add_7": [1, 56, 40, 40], - "nearest_interp_v2_0.tmp_0": [1, 256, 40, 40] - } - min_pact_shape = { - "nearest_interp_v2_26.tmp_0": [1, 256, 20, 20], - "nearest_interp_v2_27.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_28.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_29.tmp_0": [1, 64, 20, 20] - } - max_pact_shape = { - "nearest_interp_v2_26.tmp_0": [1, 256, 400, 400], - "nearest_interp_v2_27.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_28.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_29.tmp_0": [1, 64, 400, 400] - } - opt_pact_shape = { - "nearest_interp_v2_26.tmp_0": [1, 256, 160, 160], - "nearest_interp_v2_27.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_28.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_29.tmp_0": [1, 64, 160, 160] - } - min_input_shape.update(min_pact_shape) - max_input_shape.update(max_pact_shape) - opt_input_shape.update(opt_pact_shape) - elif mode == "rec": - if args.rec_algorithm != "CRNN": - use_dynamic_shape = False - imgH = int(args.rec_image_shape.split(',')[-2]) - min_input_shape = {"x": [1, 3, imgH, 10]} - max_input_shape = {"x": [args.rec_batch_num, 3, imgH, 1536]} - opt_input_shape = {"x": [args.rec_batch_num, 3, imgH, 320]} - elif mode == "cls": - min_input_shape = {"x": [1, 3, 48, 10]} - max_input_shape = {"x": [args.rec_batch_num, 3, 48, 1024]} - opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]} - else: - use_dynamic_shape = False - if use_dynamic_shape: - config.set_trt_dynamic_shape_info( - min_input_shape, max_input_shape, opt_input_shape) - - else: - config.disable_gpu() - if hasattr(args, "cpu_threads"): - config.set_cpu_math_library_num_threads(args.cpu_threads) - else: - # default cpu threads as 10 - config.set_cpu_math_library_num_threads(10) - if args.enable_mkldnn: - # cache 10 different shapes for mkldnn to avoid memory leak - config.set_mkldnn_cache_capacity(10) - config.enable_mkldnn() - if args.precision == "fp16": - config.enable_mkldnn_bfloat16() - # enable memory optim - config.enable_memory_optim() - config.disable_glog_info() - config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") - config.delete_pass("matmul_transpose_reshape_fuse_pass") - if mode == 'table': - config.delete_pass("fc_fuse_pass") # not supported for table - config.switch_use_feed_fetch_ops(False) - config.switch_ir_optim(True) - - # create predictor - predictor = inference.create_predictor(config) - input_names = predictor.get_input_names() - for name in input_names: - input_tensor = predictor.get_input_handle(name) - output_tensors = get_output_tensors(args, mode, predictor) - return predictor, input_tensor, output_tensors, config - - -def get_output_tensors(args, mode, predictor): - output_names = predictor.get_output_names() - output_tensors = [] - if mode == "rec" and args.rec_algorithm == "CRNN": - output_name = 'softmax_0.tmp_0' - if output_name in output_names: - return [predictor.get_output_handle(output_name)] - else: - for output_name in output_names: - output_tensor = predictor.get_output_handle(output_name) - output_tensors.append(output_tensor) - else: - for output_name in output_names: - output_tensor = predictor.get_output_handle(output_name) - output_tensors.append(output_tensor) - return output_tensors - - -def get_infer_gpuid(): - sysstr = platform.system() - if sysstr == "Windows": - return 0 - - if not paddle.core.is_compiled_with_rocm(): - cmd = "env | grep CUDA_VISIBLE_DEVICES" - else: - cmd = "env | grep HIP_VISIBLE_DEVICES" - env_cuda = os.popen(cmd).readlines() - if len(env_cuda) == 0: - return 0 - else: - gpu_id = env_cuda[0].strip().split("=")[1] - return int(gpu_id[0]) - - -def draw_e2e_res(dt_boxes, strs, img_path): - src_im = cv2.imread(img_path) - for box, str in zip(dt_boxes, strs): - box = box.astype(np.int32).reshape((-1, 1, 2)) - cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2) - cv2.putText( - src_im, - str, - org=(int(box[0, 0, 0]), int(box[0, 0, 1])), - fontFace=cv2.FONT_HERSHEY_COMPLEX, - fontScale=0.7, - color=(0, 255, 0), - thickness=1) - return src_im - - -def draw_text_det_res(dt_boxes, img_path): - src_im = cv2.imread(img_path) - for box in dt_boxes: - box = np.array(box).astype(np.int32).reshape(-1, 2) - cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2) - return src_im - - -def resize_img(img, input_size=600): - """ - resize img and limit the longest side of the image to input_size - """ - img = np.array(img) - im_shape = img.shape - im_size_max = np.max(im_shape[0:2]) - im_scale = float(input_size) / float(im_size_max) - img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) - return img - - -def draw_ocr(image, - boxes, - txts=None, - scores=None, - drop_score=0.5, - font_path="./doc/fonts/simfang.ttf"): - """ - Visualize the results of OCR detection and recognition - args: - image(Image|array): RGB image - boxes(list): boxes with shape(N, 4, 2) - txts(list): the texts - scores(list): txxs corresponding scores - drop_score(float): only scores greater than drop_threshold will be visualized - font_path: the path of font which is used to draw text - return(array): - the visualized img - """ - if scores is None: - scores = [1] * len(boxes) - box_num = len(boxes) - for i in range(box_num): - if scores is not None and (scores[i] < drop_score or - math.isnan(scores[i])): - continue - box = np.reshape(np.array(boxes[i]), [-1, 1, 2]).astype(np.int64) - image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) - if txts is not None: - img = np.array(resize_img(image, input_size=600)) - txt_img = text_visual( - txts, - scores, - img_h=img.shape[0], - img_w=600, - threshold=drop_score, - font_path=font_path) - img = np.concatenate([np.array(img), np.array(txt_img)], axis=1) - return img - return image - - -def draw_ocr_box_txt(image, - boxes, - txts, - scores=None, - drop_score=0.5, - font_path="./doc/simfang.ttf"): - h, w = image.height, image.width - img_left = image.copy() - img_right = Image.new('RGB', (w, h), (255, 255, 255)) - - import random - - random.seed(0) - draw_left = ImageDraw.Draw(img_left) - draw_right = ImageDraw.Draw(img_right) - for idx, (box, txt) in enumerate(zip(boxes, txts)): - if scores is not None and scores[idx] < drop_score: - continue - color = (random.randint(0, 255), random.randint(0, 255), - random.randint(0, 255)) - draw_left.polygon(box, fill=color) - draw_right.polygon( - [ - box[0][0], box[0][1], box[1][0], box[1][1], box[2][0], - box[2][1], box[3][0], box[3][1] - ], - outline=color) - box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][ - 1])**2) - box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][ - 1])**2) - if box_height > 2 * box_width: - font_size = max(int(box_width * 0.9), 10) - font = ImageFont.truetype(font_path, font_size, encoding="utf-8") - cur_y = box[0][1] - for c in txt: - char_size = font.getsize(c) - draw_right.text( - (box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font) - cur_y += char_size[1] - else: - font_size = max(int(box_height * 0.8), 10) - font = ImageFont.truetype(font_path, font_size, encoding="utf-8") - draw_right.text( - [box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font) - img_left = Image.blend(image, img_left, 0.5) - img_show = Image.new('RGB', (w * 2, h), (255, 255, 255)) - img_show.paste(img_left, (0, 0, w, h)) - img_show.paste(img_right, (w, 0, w * 2, h)) - return np.array(img_show) - - -def str_count(s): - """ - Count the number of Chinese characters, - a single English character and a single number - equal to half the length of Chinese characters. - args: - s(string): the input of string - return(int): - the number of Chinese characters - """ - import string - count_zh = count_pu = 0 - s_len = len(s) - en_dg_count = 0 - for c in s: - if c in string.ascii_letters or c.isdigit() or c.isspace(): - en_dg_count += 1 - elif c.isalpha(): - count_zh += 1 - else: - count_pu += 1 - return s_len - math.ceil(en_dg_count / 2) - - -def text_visual(texts, - scores, - img_h=400, - img_w=600, - threshold=0., - font_path="./doc/simfang.ttf"): - """ - create new blank img and draw txt on it - args: - texts(list): the text will be draw - scores(list|None): corresponding score of each txt - img_h(int): the height of blank img - img_w(int): the width of blank img - font_path: the path of font which is used to draw text - return(array): - """ - if scores is not None: - assert len(texts) == len( - scores), "The number of txts and corresponding scores must match" - - def create_blank_img(): - blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255 - blank_img[:, img_w - 1:] = 0 - blank_img = Image.fromarray(blank_img).convert("RGB") - draw_txt = ImageDraw.Draw(blank_img) - return blank_img, draw_txt - - blank_img, draw_txt = create_blank_img() - - font_size = 20 - txt_color = (0, 0, 0) - font = ImageFont.truetype(font_path, font_size, encoding="utf-8") - - gap = font_size + 5 - txt_img_list = [] - count, index = 1, 0 - for idx, txt in enumerate(texts): - index += 1 - if scores[idx] < threshold or math.isnan(scores[idx]): - index -= 1 - continue - first_line = True - while str_count(txt) >= img_w // font_size - 4: - tmp = txt - txt = tmp[:img_w // font_size - 4] - if first_line: - new_txt = str(index) + ': ' + txt - first_line = False - else: - new_txt = ' ' + txt - draw_txt.text((0, gap * count), new_txt, txt_color, font=font) - txt = tmp[img_w // font_size - 4:] - if count >= img_h // gap - 1: - txt_img_list.append(np.array(blank_img)) - blank_img, draw_txt = create_blank_img() - count = 0 - count += 1 - if first_line: - new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx]) - else: - new_txt = " " + txt + " " + '%.3f' % (scores[idx]) - draw_txt.text((0, gap * count), new_txt, txt_color, font=font) - # whether add new blank img or not - if count >= img_h // gap - 1 and idx + 1 < len(texts): - txt_img_list.append(np.array(blank_img)) - blank_img, draw_txt = create_blank_img() - count = 0 - count += 1 - txt_img_list.append(np.array(blank_img)) - if len(txt_img_list) == 1: - blank_img = np.array(txt_img_list[0]) - else: - blank_img = np.concatenate(txt_img_list, axis=1) - return np.array(blank_img) - - -def base64_to_cv2(b64str): - import base64 - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def draw_boxes(image, boxes, scores=None, drop_score=0.5): - if scores is None: - scores = [1] * len(boxes) - for (box, score) in zip(boxes, scores): - if score < drop_score: - continue - box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64) - image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) - return image - - -def get_rotate_crop_image(img, points): - ''' - img_height, img_width = img.shape[0:2] - left = int(np.min(points[:, 0])) - right = int(np.max(points[:, 0])) - top = int(np.min(points[:, 1])) - bottom = int(np.max(points[:, 1])) - img_crop = img[top:bottom, left:right, :].copy() - points[:, 0] = points[:, 0] - left - points[:, 1] = points[:, 1] - top - ''' - assert len(points) == 4, "shape of points must be 4*2" - img_crop_width = int( - max( - np.linalg.norm(points[0] - points[1]), - np.linalg.norm(points[2] - points[3]))) - img_crop_height = int( - max( - np.linalg.norm(points[0] - points[3]), - np.linalg.norm(points[1] - points[2]))) - pts_std = np.float32([[0, 0], [img_crop_width, 0], - [img_crop_width, img_crop_height], - [0, img_crop_height]]) - M = cv2.getPerspectiveTransform(points, pts_std) - dst_img = cv2.warpPerspective( - img, - M, (img_crop_width, img_crop_height), - borderMode=cv2.BORDER_REPLICATE, - flags=cv2.INTER_CUBIC) - dst_img_height, dst_img_width = dst_img.shape[0:2] - if dst_img_height * 1.0 / dst_img_width >= 1.5: - dst_img = np.rot90(dst_img) - return dst_img - - -def check_gpu(use_gpu): - if use_gpu and not paddle.is_compiled_with_cuda(): - use_gpu = False - return use_gpu - - -if __name__ == '__main__': - pass diff --git a/backend/tools/infer_cls.py b/backend/tools/infer_cls.py deleted file mode 100755 index 7fd6b536..00000000 --- a/backend/tools/infer_cls.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import paddle - -from ppocr.data import create_operators, transform -from ppocr.modeling.architectures import build_model -from ppocr.postprocess import build_post_process -from ppocr.utils.save_load import load_model -from ppocr.utils.utility import get_image_file_list -import tools.program as program - - -def main(): - global_config = config['Global'] - - # build post process - post_process_class = build_post_process(config['PostProcess'], - global_config) - - # build model - model = build_model(config['Architecture']) - - load_model(config, model) - - # create data ops - transforms = [] - for op in config['Eval']['dataset']['transforms']: - op_name = list(op)[0] - if 'Label' in op_name: - continue - elif op_name == 'KeepKeys': - op[op_name]['keep_keys'] = ['image'] - elif op_name == "SSLRotateResize": - op[op_name]["mode"] = "test" - transforms.append(op) - global_config['infer_mode'] = True - ops = create_operators(transforms, global_config) - - model.eval() - for file in get_image_file_list(config['Global']['infer_img']): - logger.info("infer_img: {}".format(file)) - with open(file, 'rb') as f: - img = f.read() - data = {'image': img} - batch = transform(data, ops) - - images = np.expand_dims(batch[0], axis=0) - images = paddle.to_tensor(images) - preds = model(images) - post_result = post_process_class(preds) - for rec_result in post_result: - logger.info('\t result: {}'.format(rec_result)) - logger.info("success!") - - -if __name__ == '__main__': - config, device, logger, vdl_writer = program.preprocess() - main() diff --git a/backend/tools/infer_det.py b/backend/tools/infer_det.py deleted file mode 100755 index 1acecedf..00000000 --- a/backend/tools/infer_det.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import json -import paddle - -from ppocr.data import create_operators, transform -from ppocr.modeling.architectures import build_model -from ppocr.postprocess import build_post_process -from ppocr.utils.save_load import load_model -from ppocr.utils.utility import get_image_file_list -import tools.program as program - - -def draw_det_res(dt_boxes, config, img, img_name, save_path): - if len(dt_boxes) > 0: - import cv2 - src_im = img - for box in dt_boxes: - box = box.astype(np.int32).reshape((-1, 1, 2)) - cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2) - if not os.path.exists(save_path): - os.makedirs(save_path) - save_path = os.path.join(save_path, os.path.basename(img_name)) - cv2.imwrite(save_path, src_im) - logger.info("The detected Image saved in {}".format(save_path)) - - -@paddle.no_grad() -def main(): - global_config = config['Global'] - - # build model - model = build_model(config['Architecture']) - - load_model(config, model) - # build post process - post_process_class = build_post_process(config['PostProcess']) - - # create data ops - transforms = [] - for op in config['Eval']['dataset']['transforms']: - op_name = list(op)[0] - if 'Label' in op_name: - continue - elif op_name == 'KeepKeys': - op[op_name]['keep_keys'] = ['image', 'shape'] - transforms.append(op) - - ops = create_operators(transforms, global_config) - - save_res_path = config['Global']['save_res_path'] - if not os.path.exists(os.path.dirname(save_res_path)): - os.makedirs(os.path.dirname(save_res_path)) - - model.eval() - with open(save_res_path, "wb") as fout: - for file in get_image_file_list(config['Global']['infer_img']): - logger.info("infer_img: {}".format(file)) - with open(file, 'rb') as f: - img = f.read() - data = {'image': img} - batch = transform(data, ops) - - images = np.expand_dims(batch[0], axis=0) - shape_list = np.expand_dims(batch[1], axis=0) - images = paddle.to_tensor(images) - preds = model(images) - post_result = post_process_class(preds, shape_list) - - src_img = cv2.imread(file) - - dt_boxes_json = [] - # parser boxes if post_result is dict - if isinstance(post_result, dict): - det_box_json = {} - for k in post_result.keys(): - boxes = post_result[k][0]['points'] - dt_boxes_list = [] - for box in boxes: - tmp_json = {"transcription": ""} - tmp_json['points'] = box.tolist() - dt_boxes_list.append(tmp_json) - det_box_json[k] = dt_boxes_list - save_det_path = os.path.dirname(config['Global'][ - 'save_res_path']) + "/det_results_{}/".format(k) - draw_det_res(boxes, config, src_img, file, save_det_path) - else: - boxes = post_result[0]['points'] - dt_boxes_json = [] - # write result - for box in boxes: - tmp_json = {"transcription": ""} - tmp_json['points'] = box.tolist() - dt_boxes_json.append(tmp_json) - save_det_path = os.path.dirname(config['Global'][ - 'save_res_path']) + "/det_results/" - draw_det_res(boxes, config, src_img, file, save_det_path) - otstr = file + "\t" + json.dumps(dt_boxes_json) + "\n" - fout.write(otstr.encode()) - - logger.info("success!") - - -if __name__ == '__main__': - config, device, logger, vdl_writer = program.preprocess() - main() diff --git a/backend/tools/infer_e2e.py b/backend/tools/infer_e2e.py deleted file mode 100755 index d3e6b28f..00000000 --- a/backend/tools/infer_e2e.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import json -import paddle - -from ppocr.data import create_operators, transform -from ppocr.modeling.architectures import build_model -from ppocr.postprocess import build_post_process -from ppocr.utils.save_load import load_model -from ppocr.utils.utility import get_image_file_list -import tools.program as program - - -def draw_e2e_res(dt_boxes, strs, config, img, img_name): - if len(dt_boxes) > 0: - src_im = img - for box, str in zip(dt_boxes, strs): - box = box.astype(np.int32).reshape((-1, 1, 2)) - cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2) - cv2.putText( - src_im, - str, - org=(int(box[0, 0, 0]), int(box[0, 0, 1])), - fontFace=cv2.FONT_HERSHEY_COMPLEX, - fontScale=0.7, - color=(0, 255, 0), - thickness=1) - save_det_path = os.path.dirname(config['Global'][ - 'save_res_path']) + "/e2e_results/" - if not os.path.exists(save_det_path): - os.makedirs(save_det_path) - save_path = os.path.join(save_det_path, os.path.basename(img_name)) - cv2.imwrite(save_path, src_im) - logger.info("The e2e Image saved in {}".format(save_path)) - - -def main(): - global_config = config['Global'] - - # build model - model = build_model(config['Architecture']) - - load_model(config, model) - - # build post process - post_process_class = build_post_process(config['PostProcess'], - global_config) - - # create data ops - transforms = [] - for op in config['Eval']['dataset']['transforms']: - op_name = list(op)[0] - if 'Label' in op_name: - continue - elif op_name == 'KeepKeys': - op[op_name]['keep_keys'] = ['image', 'shape'] - transforms.append(op) - - ops = create_operators(transforms, global_config) - - save_res_path = config['Global']['save_res_path'] - if not os.path.exists(os.path.dirname(save_res_path)): - os.makedirs(os.path.dirname(save_res_path)) - - model.eval() - with open(save_res_path, "wb") as fout: - for file in get_image_file_list(config['Global']['infer_img']): - logger.info("infer_img: {}".format(file)) - with open(file, 'rb') as f: - img = f.read() - data = {'image': img} - batch = transform(data, ops) - images = np.expand_dims(batch[0], axis=0) - shape_list = np.expand_dims(batch[1], axis=0) - images = paddle.to_tensor(images) - preds = model(images) - post_result = post_process_class(preds, shape_list) - points, strs = post_result['points'], post_result['texts'] - # write result - dt_boxes_json = [] - for poly, str in zip(points, strs): - tmp_json = {"transcription": str} - tmp_json['points'] = poly.tolist() - dt_boxes_json.append(tmp_json) - otstr = file + "\t" + json.dumps(dt_boxes_json) + "\n" - fout.write(otstr.encode()) - src_img = cv2.imread(file) - draw_e2e_res(points, strs, config, src_img, file) - logger.info("success!") - - -if __name__ == '__main__': - config, device, logger, vdl_writer = program.preprocess() - main() diff --git a/backend/tools/infer_kie.py b/backend/tools/infer_kie.py deleted file mode 100755 index 0cb0b870..00000000 --- a/backend/tools/infer_kie.py +++ /dev/null @@ -1,153 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import paddle.nn.functional as F - -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import paddle - -from ppocr.data import create_operators, transform -from ppocr.modeling.architectures import build_model -from ppocr.utils.save_load import load_model -import tools.program as program -import time - - -def read_class_list(filepath): - dict = {} - with open(filepath, "r") as f: - lines = f.readlines() - for line in lines: - key, value = line.split(" ") - dict[key] = value.rstrip() - return dict - - -def draw_kie_result(batch, node, idx_to_cls, count): - img = batch[6].copy() - boxes = batch[7] - h, w = img.shape[:2] - pred_img = np.ones((h, w * 2, 3), dtype=np.uint8) * 255 - max_value, max_idx = paddle.max(node, -1), paddle.argmax(node, -1) - node_pred_label = max_idx.numpy().tolist() - node_pred_score = max_value.numpy().tolist() - - for i, box in enumerate(boxes): - if i >= len(node_pred_label): - break - new_box = [[box[0], box[1]], [box[2], box[1]], [box[2], box[3]], - [box[0], box[3]]] - Pts = np.array([new_box], np.int32) - cv2.polylines( - img, [Pts.reshape((-1, 1, 2))], - True, - color=(255, 255, 0), - thickness=1) - x_min = int(min([point[0] for point in new_box])) - y_min = int(min([point[1] for point in new_box])) - - pred_label = str(node_pred_label[i]) - if pred_label in idx_to_cls: - pred_label = idx_to_cls[pred_label] - pred_score = '{:.2f}'.format(node_pred_score[i]) - text = pred_label + '(' + pred_score + ')' - cv2.putText(pred_img, text, (x_min * 2, y_min), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1) - vis_img = np.ones((h, w * 3, 3), dtype=np.uint8) * 255 - vis_img[:, :w] = img - vis_img[:, w:] = pred_img - save_kie_path = os.path.dirname(config['Global'][ - 'save_res_path']) + "/kie_results/" - if not os.path.exists(save_kie_path): - os.makedirs(save_kie_path) - save_path = os.path.join(save_kie_path, str(count) + ".png") - cv2.imwrite(save_path, vis_img) - logger.info("The Kie Image saved in {}".format(save_path)) - - -def main(): - global_config = config['Global'] - - # build model - model = build_model(config['Architecture']) - load_model(config, model) - - # create data ops - transforms = [] - for op in config['Eval']['dataset']['transforms']: - transforms.append(op) - - data_dir = config['Eval']['dataset']['data_dir'] - - ops = create_operators(transforms, global_config) - - save_res_path = config['Global']['save_res_path'] - class_path = config['Global']['class_path'] - idx_to_cls = read_class_list(class_path) - if not os.path.exists(os.path.dirname(save_res_path)): - os.makedirs(os.path.dirname(save_res_path)) - - model.eval() - - warmup_times = 0 - count_t = [] - with open(save_res_path, "wb") as fout: - with open(config['Global']['infer_img'], "rb") as f: - lines = f.readlines() - for index, data_line in enumerate(lines): - if index == 10: - warmup_t = time.time() - data_line = data_line.decode('utf-8') - substr = data_line.strip("\n").split("\t") - img_path, label = data_dir + "/" + substr[0], substr[1] - data = {'img_path': img_path, 'label': label} - with open(data['img_path'], 'rb') as f: - img = f.read() - data['image'] = img - st = time.time() - batch = transform(data, ops) - batch_pred = [0] * len(batch) - for i in range(len(batch)): - batch_pred[i] = paddle.to_tensor( - np.expand_dims( - batch[i], axis=0)) - st = time.time() - node, edge = model(batch_pred) - node = F.softmax(node, -1) - count_t.append(time.time() - st) - draw_kie_result(batch, node, idx_to_cls, index) - logger.info("success!") - logger.info("It took {} s for predict {} images.".format( - np.sum(count_t), len(count_t))) - ips = len(count_t[warmup_times:]) / np.sum(count_t[warmup_times:]) - logger.info("The ips is {} images/s".format(ips)) - - -if __name__ == '__main__': - config, device, logger, vdl_writer = program.preprocess() - main() diff --git a/backend/tools/infer_rec.py b/backend/tools/infer_rec.py deleted file mode 100755 index 193e24a4..00000000 --- a/backend/tools/infer_rec.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -import os -import sys -import json - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import paddle - -from ppocr.data import create_operators, transform -from ppocr.modeling.architectures import build_model -from ppocr.postprocess import build_post_process -from ppocr.utils.save_load import load_model -from ppocr.utils.utility import get_image_file_list -import tools.program as program - - -def main(): - global_config = config['Global'] - - # build post process - post_process_class = build_post_process(config['PostProcess'], - global_config) - - # build model - if hasattr(post_process_class, 'character'): - char_num = len(getattr(post_process_class, 'character')) - if config['Architecture']["algorithm"] in ["Distillation", - ]: # distillation model - for key in config['Architecture']["Models"]: - if config['Architecture']['Models'][key]['Head'][ - 'name'] == 'MultiHead': # for multi head - out_channels_list = {} - if config['PostProcess'][ - 'name'] == 'DistillationSARLabelDecode': - char_num = char_num - 2 - out_channels_list['CTCLabelDecode'] = char_num - out_channels_list['SARLabelDecode'] = char_num + 2 - config['Architecture']['Models'][key]['Head'][ - 'out_channels_list'] = out_channels_list - else: - config['Architecture']["Models"][key]["Head"][ - 'out_channels'] = char_num - elif config['Architecture']['Head'][ - 'name'] == 'MultiHead': # for multi head loss - out_channels_list = {} - if config['PostProcess']['name'] == 'SARLabelDecode': - char_num = char_num - 2 - out_channels_list['CTCLabelDecode'] = char_num - out_channels_list['SARLabelDecode'] = char_num + 2 - config['Architecture']['Head'][ - 'out_channels_list'] = out_channels_list - else: # base rec model - config['Architecture']["Head"]['out_channels'] = char_num - - model = build_model(config['Architecture']) - - load_model(config, model) - - # create data ops - transforms = [] - for op in config['Eval']['dataset']['transforms']: - op_name = list(op)[0] - if 'Label' in op_name: - continue - elif op_name in ['RecResizeImg']: - op[op_name]['infer_mode'] = True - elif op_name == 'KeepKeys': - if config['Architecture']['algorithm'] == "SRN": - op[op_name]['keep_keys'] = [ - 'image', 'encoder_word_pos', 'gsrm_word_pos', - 'gsrm_slf_attn_bias1', 'gsrm_slf_attn_bias2' - ] - elif config['Architecture']['algorithm'] == "SAR": - op[op_name]['keep_keys'] = ['image', 'valid_ratio'] - else: - op[op_name]['keep_keys'] = ['image'] - transforms.append(op) - global_config['infer_mode'] = True - ops = create_operators(transforms, global_config) - - save_res_path = config['Global'].get('save_res_path', - "./output/rec/predicts_rec.txt") - if not os.path.exists(os.path.dirname(save_res_path)): - os.makedirs(os.path.dirname(save_res_path)) - - model.eval() - - with open(save_res_path, "w") as fout: - for file in get_image_file_list(config['Global']['infer_img']): - logger.info("infer_img: {}".format(file)) - with open(file, 'rb') as f: - img = f.read() - data = {'image': img} - batch = transform(data, ops) - if config['Architecture']['algorithm'] == "SRN": - encoder_word_pos_list = np.expand_dims(batch[1], axis=0) - gsrm_word_pos_list = np.expand_dims(batch[2], axis=0) - gsrm_slf_attn_bias1_list = np.expand_dims(batch[3], axis=0) - gsrm_slf_attn_bias2_list = np.expand_dims(batch[4], axis=0) - - others = [ - paddle.to_tensor(encoder_word_pos_list), - paddle.to_tensor(gsrm_word_pos_list), - paddle.to_tensor(gsrm_slf_attn_bias1_list), - paddle.to_tensor(gsrm_slf_attn_bias2_list) - ] - if config['Architecture']['algorithm'] == "SAR": - valid_ratio = np.expand_dims(batch[-1], axis=0) - img_metas = [paddle.to_tensor(valid_ratio)] - - images = np.expand_dims(batch[0], axis=0) - images = paddle.to_tensor(images) - if config['Architecture']['algorithm'] == "SRN": - preds = model(images, others) - elif config['Architecture']['algorithm'] == "SAR": - preds = model(images, img_metas) - else: - preds = model(images) - post_result = post_process_class(preds) - info = None - if isinstance(post_result, dict): - rec_info = dict() - for key in post_result: - if len(post_result[key][0]) >= 2: - rec_info[key] = { - "label": post_result[key][0][0], - "score": float(post_result[key][0][1]), - } - info = json.dumps(rec_info, ensure_ascii=False) - else: - if len(post_result[0]) >= 2: - info = post_result[0][0] + "\t" + str(post_result[0][1]) - - if info is not None: - logger.info("\t result: {}".format(info)) - fout.write(file + "\t" + info) - logger.info("success!") - - -if __name__ == '__main__': - config, device, logger, vdl_writer = program.preprocess() - main() diff --git a/backend/tools/infer_table.py b/backend/tools/infer_table.py deleted file mode 100644 index 66c2da44..00000000 --- a/backend/tools/infer_table.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -import os -import sys -import json - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import paddle -from paddle.jit import to_static - -from ppocr.data import create_operators, transform -from ppocr.modeling.architectures import build_model -from ppocr.postprocess import build_post_process -from ppocr.utils.save_load import load_model -from ppocr.utils.utility import get_image_file_list -import tools.program as program -import cv2 - - -def main(config, device, logger, vdl_writer): - global_config = config['Global'] - - # build post process - post_process_class = build_post_process(config['PostProcess'], - global_config) - - # build model - if hasattr(post_process_class, 'character'): - config['Architecture']["Head"]['out_channels'] = len( - getattr(post_process_class, 'character')) - - model = build_model(config['Architecture']) - - load_model(config, model) - - # create data ops - transforms = [] - use_padding = False - for op in config['Eval']['dataset']['transforms']: - op_name = list(op)[0] - if 'Label' in op_name: - continue - if op_name == 'KeepKeys': - op[op_name]['keep_keys'] = ['image'] - if op_name == "ResizeTableImage": - use_padding = True - padding_max_len = op['ResizeTableImage']['max_len'] - transforms.append(op) - - global_config['infer_mode'] = True - ops = create_operators(transforms, global_config) - - model.eval() - for file in get_image_file_list(config['Global']['infer_img']): - logger.info("infer_img: {}".format(file)) - with open(file, 'rb') as f: - img = f.read() - data = {'image': img} - batch = transform(data, ops) - images = np.expand_dims(batch[0], axis=0) - images = paddle.to_tensor(images) - preds = model(images) - post_result = post_process_class(preds) - res_html_code = post_result['res_html_code'] - res_loc = post_result['res_loc'] - img = cv2.imread(file) - imgh, imgw = img.shape[0:2] - res_loc_final = [] - for rno in range(len(res_loc[0])): - x0, y0, x1, y1 = res_loc[0][rno] - left = max(int(imgw * x0), 0) - top = max(int(imgh * y0), 0) - right = min(int(imgw * x1), imgw - 1) - bottom = min(int(imgh * y1), imgh - 1) - cv2.rectangle(img, (left, top), (right, bottom), (0, 0, 255), 2) - res_loc_final.append([left, top, right, bottom]) - res_loc_str = json.dumps(res_loc_final) - logger.info("result: {}, {}".format(res_html_code, res_loc_final)) - logger.info("success!") - - -if __name__ == '__main__': - config, device, logger, vdl_writer = program.preprocess() - main(config, device, logger, vdl_writer) diff --git a/backend/tools/infer_vqa_token_ser.py b/backend/tools/infer_vqa_token_ser.py deleted file mode 100755 index 83ed72b3..00000000 --- a/backend/tools/infer_vqa_token_ser.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' -import cv2 -import json -import paddle - -from ppocr.data import create_operators, transform -from ppocr.modeling.architectures import build_model -from ppocr.postprocess import build_post_process -from ppocr.utils.save_load import load_model -from ppocr.utils.visual import draw_ser_results -from ppocr.utils.utility import get_image_file_list, load_vqa_bio_label_maps -import tools.program as program - - -def to_tensor(data): - import numbers - from collections import defaultdict - data_dict = defaultdict(list) - to_tensor_idxs = [] - for idx, v in enumerate(data): - if isinstance(v, (np.ndarray, paddle.Tensor, numbers.Number)): - if idx not in to_tensor_idxs: - to_tensor_idxs.append(idx) - data_dict[idx].append(v) - for idx in to_tensor_idxs: - data_dict[idx] = paddle.to_tensor(data_dict[idx]) - return list(data_dict.values()) - - -class SerPredictor(object): - def __init__(self, config): - global_config = config['Global'] - - # build post process - self.post_process_class = build_post_process(config['PostProcess'], - global_config) - - # build model - self.model = build_model(config['Architecture']) - - load_model( - config, self.model, model_type=config['Architecture']["model_type"]) - - from paddleocr import PaddleOCR - - self.ocr_engine = PaddleOCR(use_angle_cls=False, show_log=False) - - # create data ops - transforms = [] - for op in config['Eval']['dataset']['transforms']: - op_name = list(op)[0] - if 'Label' in op_name: - op[op_name]['ocr_engine'] = self.ocr_engine - elif op_name == 'KeepKeys': - op[op_name]['keep_keys'] = [ - 'input_ids', 'labels', 'bbox', 'image', 'attention_mask', - 'token_type_ids', 'segment_offset_id', 'ocr_info', - 'entities' - ] - - transforms.append(op) - global_config['infer_mode'] = True - self.ops = create_operators(config['Eval']['dataset']['transforms'], - global_config) - self.model.eval() - - def __call__(self, img_path): - with open(img_path, 'rb') as f: - img = f.read() - data = {'image': img} - batch = transform(data, self.ops) - batch = to_tensor(batch) - preds = self.model(batch) - post_result = self.post_process_class( - preds, - attention_masks=batch[4], - segment_offset_ids=batch[6], - ocr_infos=batch[7]) - return post_result, batch - - -if __name__ == '__main__': - config, device, logger, vdl_writer = program.preprocess() - os.makedirs(config['Global']['save_res_path'], exist_ok=True) - - ser_engine = SerPredictor(config) - - infer_imgs = get_image_file_list(config['Global']['infer_img']) - with open( - os.path.join(config['Global']['save_res_path'], - "infer_results.txt"), - "w", - encoding='utf-8') as fout: - for idx, img_path in enumerate(infer_imgs): - save_img_path = os.path.join( - config['Global']['save_res_path'], - os.path.splitext(os.path.basename(img_path))[0] + "_ser.jpg") - logger.info("process: [{}/{}], save result to {}".format( - idx, len(infer_imgs), save_img_path)) - - result, _ = ser_engine(img_path) - result = result[0] - fout.write(img_path + "\t" + json.dumps( - { - "ocr_info": result, - }, ensure_ascii=False) + "\n") - img_res = draw_ser_results(img_path, result) - cv2.imwrite(save_img_path, img_res) diff --git a/backend/tools/infer_vqa_token_ser_re.py b/backend/tools/infer_vqa_token_ser_re.py deleted file mode 100755 index 40f1dd5c..00000000 --- a/backend/tools/infer_vqa_token_ser_re.py +++ /dev/null @@ -1,199 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' -import cv2 -import json -import paddle -import paddle.distributed as dist - -from ppocr.data import create_operators, transform -from ppocr.modeling.architectures import build_model -from ppocr.postprocess import build_post_process -from ppocr.utils.save_load import load_model -from ppocr.utils.visual import draw_re_results -from ppocr.utils.logging import get_logger -from ppocr.utils.utility import get_image_file_list, load_vqa_bio_label_maps, print_dict -from tools.program import ArgsParser, load_config, merge_config, check_gpu -from tools.infer_vqa_token_ser import SerPredictor - - -class ReArgsParser(ArgsParser): - def __init__(self): - super(ReArgsParser, self).__init__() - self.add_argument( - "-c_ser", "--config_ser", help="ser configuration file to use") - self.add_argument( - "-o_ser", - "--opt_ser", - nargs='+', - help="set ser configuration options ") - - def parse_args(self, argv=None): - args = super(ReArgsParser, self).parse_args(argv) - assert args.config_ser is not None, \ - "Please specify --config_ser=ser_configure_file_path." - args.opt_ser = self._parse_opt(args.opt_ser) - return args - - -def make_input(ser_inputs, ser_results): - entities_labels = {'HEADER': 0, 'QUESTION': 1, 'ANSWER': 2} - - entities = ser_inputs[8][0] - ser_results = ser_results[0] - assert len(entities) == len(ser_results) - - # entities - start = [] - end = [] - label = [] - entity_idx_dict = {} - for i, (res, entity) in enumerate(zip(ser_results, entities)): - if res['pred'] == 'O': - continue - entity_idx_dict[len(start)] = i - start.append(entity['start']) - end.append(entity['end']) - label.append(entities_labels[res['pred']]) - entities = dict(start=start, end=end, label=label) - - # relations - head = [] - tail = [] - for i in range(len(entities["label"])): - for j in range(len(entities["label"])): - if entities["label"][i] == 1 and entities["label"][j] == 2: - head.append(i) - tail.append(j) - - relations = dict(head=head, tail=tail) - - batch_size = ser_inputs[0].shape[0] - entities_batch = [] - relations_batch = [] - entity_idx_dict_batch = [] - for b in range(batch_size): - entities_batch.append(entities) - relations_batch.append(relations) - entity_idx_dict_batch.append(entity_idx_dict) - - ser_inputs[8] = entities_batch - ser_inputs.append(relations_batch) - # remove ocr_info segment_offset_id and label in ser input - ser_inputs.pop(7) - ser_inputs.pop(6) - ser_inputs.pop(1) - return ser_inputs, entity_idx_dict_batch - - -class SerRePredictor(object): - def __init__(self, config, ser_config): - self.ser_engine = SerPredictor(ser_config) - - # init re model - global_config = config['Global'] - - # build post process - self.post_process_class = build_post_process(config['PostProcess'], - global_config) - - # build model - self.model = build_model(config['Architecture']) - - load_model( - config, self.model, model_type=config['Architecture']["model_type"]) - - self.model.eval() - - def __call__(self, img_path): - ser_results, ser_inputs = self.ser_engine(img_path) - paddle.save(ser_inputs, 'ser_inputs.npy') - paddle.save(ser_results, 'ser_results.npy') - re_input, entity_idx_dict_batch = make_input(ser_inputs, ser_results) - preds = self.model(re_input) - post_result = self.post_process_class( - preds, - ser_results=ser_results, - entity_idx_dict_batch=entity_idx_dict_batch) - return post_result - - -def preprocess(): - FLAGS = ReArgsParser().parse_args() - config = load_config(FLAGS.settings_config) - config = merge_config(config, FLAGS.opt) - - ser_config = load_config(FLAGS.config_ser) - ser_config = merge_config(ser_config, FLAGS.opt_ser) - - logger = get_logger() - - # check if set use_gpu=True in paddlepaddle cpu version - use_gpu = config['Global']['use_gpu'] - check_gpu(use_gpu) - - device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu' - device = paddle.set_device(device) - - logger.info('{} re config {}'.format('*' * 10, '*' * 10)) - print_dict(config, logger) - logger.info('\n') - logger.info('{} ser config {}'.format('*' * 10, '*' * 10)) - print_dict(ser_config, logger) - logger.info('train with paddle {} and device {}'.format(paddle.__version__, - device)) - return config, ser_config, device, logger - - -if __name__ == '__main__': - config, ser_config, device, logger = preprocess() - os.makedirs(config['Global']['save_res_path'], exist_ok=True) - - ser_re_engine = SerRePredictor(config, ser_config) - - infer_imgs = get_image_file_list(config['Global']['infer_img']) - with open( - os.path.join(config['Global']['save_res_path'], - "infer_results.txt"), - "w", - encoding='utf-8') as fout: - for idx, img_path in enumerate(infer_imgs): - save_img_path = os.path.join( - config['Global']['save_res_path'], - os.path.splitext(os.path.basename(img_path))[0] + "_ser.jpg") - logger.info("process: [{}/{}], save result to {}".format( - idx, len(infer_imgs), save_img_path)) - - result = ser_re_engine(img_path) - result = result[0] - fout.write(img_path + "\t" + json.dumps( - { - "ser_result": result, - }, ensure_ascii=False) + "\n") - img_res = draw_re_results(img_path, result) - cv2.imwrite(save_img_path, img_res) diff --git a/backend/tools/ocr.py b/backend/tools/ocr.py index 018140db..5b23562c 100644 --- a/backend/tools/ocr.py +++ b/backend/tools/ocr.py @@ -1,15 +1,12 @@ -from tools.infer import utility -from tools.infer.predict_system import TextSystem import config import importlib - +from paddleocr import PaddleOCR # 加载文本检测+识别模型 class OcrRecogniser: def __init__(self): # 获取参数对象 importlib.reload(config) - self.args = utility.parse_args() self.recogniser = self.init_model() @staticmethod @@ -22,7 +19,7 @@ def y_round(y): return y_max def predict(self, image): - detection_box, recognise_result = self.recogniser(image) + detection_box, recognise_result, _ = self.recogniser(image, cls=False) if len(detection_box) > 0: coordinate_list = list() if isinstance(detection_box, list): @@ -84,22 +81,24 @@ def predict(self, image): return detection_box, recognise_result def init_model(self): - self.args.use_gpu = config.USE_GPU - if not config.USE_GPU: - import paddle - paddle.set_device('cpu') - # 设置文本检测模型路径 - self.args.det_model_dir = config.DET_MODEL_PATH - # 设置文本识别模型路径 - self.args.rec_model_dir = config.REC_MODEL_PATH - self.args.rec_char_dict_path = config.DICT_PATH - self.args.rec_image_shape = config.REC_IMAGE_SHAPE - # 设置识别文本的类型 - self.args.rec_char_type = config.REC_CHAR_TYPE - # 设置每张图文本框批处理数量 - self.args.rec_batch_num = config.REC_BATCH_NUM - self.args.max_batch_size = config.MAX_BATCH_SIZE - return TextSystem(self.args) + return PaddleOCR(use_gpu=config.USE_GPU, + gpu_mem=500, + det_algorithm='DB', + # 设置文本检测模型路径 + det_model_dir=config.DET_MODEL_PATH, + rec_algorithm='CRNN', + # 设置每张图文本框批处理数量 + rec_batch_num=config.REC_BATCH_NUM, + # 设置文本识别模型路径 + rec_model_dir=config.REC_MODEL_PATH, + max_batch_size=config.MAX_BATCH_SIZE, + det=True, + use_angle_cls=False, + drop_score=0, + lang=config.REC_CHAR_TYPE, + ocr_version=f'PP-OCR{config.MODEL_VERSION.lower()}', + rec_image_shape=config.REC_IMAGE_SHAPE, + debug=False, show_log=False) def get_coordinates(dt_box): diff --git a/backend/tools/program.py b/backend/tools/program.py deleted file mode 100755 index 7c02dc01..00000000 --- a/backend/tools/program.py +++ /dev/null @@ -1,602 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import platform -import yaml -import time -import datetime -import paddle -import paddle.distributed as dist -from tqdm import tqdm -from argparse import ArgumentParser, RawDescriptionHelpFormatter - -from ppocr.utils.stats import TrainingStats -from ppocr.utils.save_load import save_model -from ppocr.utils.utility import print_dict, AverageMeter -from ppocr.utils.logging import get_logger -from ppocr.utils.loggers import VDLLogger, WandbLogger, Loggers -from ppocr.utils import profiler -from ppocr.data import build_dataloader - - -class ArgsParser(ArgumentParser): - def __init__(self): - super(ArgsParser, self).__init__( - formatter_class=RawDescriptionHelpFormatter) - self.add_argument("-c", "--config", help="configuration file to use") - self.add_argument( - "-o", "--opt", nargs='+', help="set configuration options") - self.add_argument( - '-p', - '--profiler_options', - type=str, - default=None, - help='The option of profiler, which should be in format ' \ - '\"key1=value1;key2=value2;key3=value3\".' - ) - - def parse_args(self, argv=None): - args = super(ArgsParser, self).parse_args(argv) - assert args.config is not None, \ - "Please specify --config=configure_file_path." - args.opt = self._parse_opt(args.opt) - return args - - def _parse_opt(self, opts): - config = {} - if not opts: - return config - for s in opts: - s = s.strip() - k, v = s.split('=') - config[k] = yaml.load(v, Loader=yaml.Loader) - return config - - -def load_config(file_path): - """ - Load config from yml/yaml file. - Args: - file_path (str): Path of the config file to be loaded. - Returns: global config - """ - _, ext = os.path.splitext(file_path) - assert ext in ['.yml', '.yaml'], "only support yaml files for now" - config = yaml.load(open(file_path, 'rb'), Loader=yaml.Loader) - return config - - -def merge_config(config, opts): - """ - Merge config into global config. - Args: - config (dict): Config to be merged. - Returns: global config - """ - for key, value in opts.items(): - if "." not in key: - if isinstance(value, dict) and key in config: - config[key].update(value) - else: - config[key] = value - else: - sub_keys = key.split('.') - assert ( - sub_keys[0] in config - ), "the sub_keys can only be one of global_config: {}, but get: " \ - "{}, please check your running command".format( - config.keys(), sub_keys[0]) - cur = config[sub_keys[0]] - for idx, sub_key in enumerate(sub_keys[1:]): - if idx == len(sub_keys) - 2: - cur[sub_key] = value - else: - cur = cur[sub_key] - return config - - -def check_gpu(use_gpu): - """ - Log error and exit when set use_gpu=true in paddlepaddle - cpu version. - """ - err = "Config use_gpu cannot be set as true while you are " \ - "using paddlepaddle cpu version ! \nPlease try: \n" \ - "\t1. Install paddlepaddle-gpu to run model on GPU \n" \ - "\t2. Set use_gpu as false in config file to run " \ - "model on CPU" - - try: - if use_gpu and not paddle.is_compiled_with_cuda(): - print(err) - sys.exit(1) - except Exception as e: - pass - - -def check_xpu(use_xpu): - """ - Log error and exit when set use_xpu=true in paddlepaddle - cpu/gpu version. - """ - err = "Config use_xpu cannot be set as true while you are " \ - "using paddlepaddle cpu/gpu version ! \nPlease try: \n" \ - "\t1. Install paddlepaddle-xpu to run model on XPU \n" \ - "\t2. Set use_xpu as false in config file to run " \ - "model on CPU/GPU" - - try: - if use_xpu and not paddle.is_compiled_with_xpu(): - print(err) - sys.exit(1) - except Exception as e: - pass - - -def train(config, - train_dataloader, - valid_dataloader, - device, - model, - loss_class, - optimizer, - lr_scheduler, - post_process_class, - eval_class, - pre_best_model_dict, - logger, - log_writer=None, - scaler=None): - cal_metric_during_train = config['Global'].get('cal_metric_during_train', - False) - calc_epoch_interval = config['Global'].get('calc_epoch_interval', 1) - log_smooth_window = config['Global']['log_smooth_window'] - epoch_num = config['Global']['epoch_num'] - print_batch_step = config['Global']['print_batch_step'] - eval_batch_step = config['Global']['eval_batch_step'] - profiler_options = config['profiler_options'] - - global_step = 0 - if 'global_step' in pre_best_model_dict: - global_step = pre_best_model_dict['global_step'] - start_eval_step = 0 - if type(eval_batch_step) == list and len(eval_batch_step) >= 2: - start_eval_step = eval_batch_step[0] - eval_batch_step = eval_batch_step[1] - if len(valid_dataloader) == 0: - logger.info( - 'No Images in eval dataset, evaluation during training ' \ - 'will be disabled' - ) - start_eval_step = 1e111 - logger.info( - "During the training process, after the {}th iteration, " \ - "an evaluation is run every {} iterations". - format(start_eval_step, eval_batch_step)) - save_epoch_step = config['Global']['save_epoch_step'] - save_model_dir = config['Global']['save_model_dir'] - if not os.path.exists(save_model_dir): - os.makedirs(save_model_dir) - main_indicator = eval_class.main_indicator - best_model_dict = {main_indicator: 0} - best_model_dict.update(pre_best_model_dict) - train_stats = TrainingStats(log_smooth_window, ['lr']) - model_average = False - model.train() - - use_srn = config['Architecture']['algorithm'] == "SRN" - extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR"] - extra_input = False - if config['Architecture']['algorithm'] == 'Distillation': - for key in config['Architecture']["Models"]: - extra_input = extra_input or config['Architecture']['Models'][key][ - 'algorithm'] in extra_input_models - else: - extra_input = config['Architecture']['algorithm'] in extra_input_models - try: - model_type = config['Architecture']['model_type'] - except: - model_type = None - - algorithm = config['Architecture']['algorithm'] - - start_epoch = best_model_dict[ - 'start_epoch'] if 'start_epoch' in best_model_dict else 1 - - total_samples = 0 - train_reader_cost = 0.0 - train_batch_cost = 0.0 - reader_start = time.time() - eta_meter = AverageMeter() - - max_iter = len(train_dataloader) - 1 if platform.system( - ) == "Windows" else len(train_dataloader) - - for epoch in range(start_epoch, epoch_num + 1): - if train_dataloader.dataset.need_reset: - train_dataloader = build_dataloader( - config, 'Train', device, logger, seed=epoch) - max_iter = len(train_dataloader) - 1 if platform.system( - ) == "Windows" else len(train_dataloader) - for idx, batch in enumerate(train_dataloader): - profiler.add_profiler_step(profiler_options) - train_reader_cost += time.time() - reader_start - if idx >= max_iter: - break - lr = optimizer.get_lr() - images = batch[0] - if use_srn: - model_average = True - - # use amp - if scaler: - with paddle.amp.auto_cast(): - if model_type == 'table' or extra_input: - preds = model(images, data=batch[1:]) - else: - preds = model(images) - else: - if model_type == 'table' or extra_input: - preds = model(images, data=batch[1:]) - elif model_type in ["kie", 'vqa']: - preds = model(batch) - else: - preds = model(images) - - loss = loss_class(preds, batch) - avg_loss = loss['loss'] - - if scaler: - scaled_avg_loss = scaler.scale(avg_loss) - scaled_avg_loss.backward() - scaler.minimize(optimizer, scaled_avg_loss) - else: - avg_loss.backward() - optimizer.step() - optimizer.clear_grad() - - if cal_metric_during_train and epoch % calc_epoch_interval == 0: # only rec and cls need - batch = [item.numpy() for item in batch] - if model_type in ['table', 'kie']: - eval_class(preds, batch) - else: - if config['Loss']['name'] in ['MultiLoss', 'MultiLoss_v2' - ]: # for multi head loss - post_result = post_process_class( - preds['ctc'], batch[1]) # for CTC head out - else: - post_result = post_process_class(preds, batch[1]) - eval_class(post_result, batch) - metric = eval_class.get_metric() - train_stats.update(metric) - - train_batch_time = time.time() - reader_start - train_batch_cost += train_batch_time - eta_meter.update(train_batch_time) - global_step += 1 - total_samples += len(images) - - if not isinstance(lr_scheduler, float): - lr_scheduler.step() - - # logger and visualdl - stats = {k: v.numpy().mean() for k, v in loss.items()} - stats['lr'] = lr - train_stats.update(stats) - - if log_writer is not None and dist.get_rank() == 0: - log_writer.log_metrics(metrics=train_stats.get(), prefix="TRAIN", step=global_step) - - if dist.get_rank() == 0 and ( - (global_step > 0 and global_step % print_batch_step == 0) or - (idx >= len(train_dataloader) - 1)): - logs = train_stats.log() - - eta_sec = ((epoch_num + 1 - epoch) * \ - len(train_dataloader) - idx - 1) * eta_meter.avg - eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec))) - strs = 'epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: ' \ - '{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, ' \ - 'ips: {:.5f} samples/s, eta: {}'.format( - epoch, epoch_num, global_step, logs, - train_reader_cost / print_batch_step, - train_batch_cost / print_batch_step, - total_samples / print_batch_step, - total_samples / train_batch_cost, eta_sec_format) - logger.info(strs) - - total_samples = 0 - train_reader_cost = 0.0 - train_batch_cost = 0.0 - # eval - if global_step > start_eval_step and \ - (global_step - start_eval_step) % eval_batch_step == 0 \ - and dist.get_rank() == 0: - if model_average: - Model_Average = paddle.incubate.optimizer.ModelAverage( - 0.15, - parameters=model.parameters(), - min_average_window=10000, - max_average_window=15625) - Model_Average.apply() - cur_metric = eval( - model, - valid_dataloader, - post_process_class, - eval_class, - model_type, - extra_input=extra_input) - cur_metric_str = 'cur metric, {}'.format(', '.join( - ['{}: {}'.format(k, v) for k, v in cur_metric.items()])) - logger.info(cur_metric_str) - - # logger metric - if log_writer is not None: - log_writer.log_metrics(metrics=cur_metric, prefix="EVAL", step=global_step) - - if cur_metric[main_indicator] >= best_model_dict[ - main_indicator]: - best_model_dict.update(cur_metric) - best_model_dict['best_epoch'] = epoch - save_model( - model, - optimizer, - save_model_dir, - logger, - config, - is_best=True, - prefix='best_accuracy', - best_model_dict=best_model_dict, - epoch=epoch, - global_step=global_step) - best_str = 'best metric, {}'.format(', '.join([ - '{}: {}'.format(k, v) for k, v in best_model_dict.items() - ])) - logger.info(best_str) - # logger best metric - if log_writer is not None: - log_writer.log_metrics(metrics={ - "best_{}".format(main_indicator): best_model_dict[main_indicator] - }, prefix="EVAL", step=global_step) - - log_writer.log_model(is_best=True, prefix="best_accuracy", metadata=best_model_dict) - - reader_start = time.time() - if dist.get_rank() == 0: - save_model( - model, - optimizer, - save_model_dir, - logger, - config, - is_best=False, - prefix='latest', - best_model_dict=best_model_dict, - epoch=epoch, - global_step=global_step) - - if log_writer is not None: - log_writer.log_model(is_best=False, prefix="latest") - - if dist.get_rank() == 0 and epoch > 0 and epoch % save_epoch_step == 0: - save_model( - model, - optimizer, - save_model_dir, - logger, - config, - is_best=False, - prefix='iter_epoch_{}'.format(epoch), - best_model_dict=best_model_dict, - epoch=epoch, - global_step=global_step) - if log_writer is not None: - log_writer.log_model(is_best=False, prefix='iter_epoch_{}'.format(epoch)) - - best_str = 'best metric, {}'.format(', '.join( - ['{}: {}'.format(k, v) for k, v in best_model_dict.items()])) - logger.info(best_str) - if dist.get_rank() == 0 and log_writer is not None: - log_writer.close() - return - - -def eval(model, - valid_dataloader, - post_process_class, - eval_class, - model_type=None, - extra_input=False): - model.eval() - with paddle.no_grad(): - total_frame = 0.0 - total_time = 0.0 - pbar = tqdm( - total=len(valid_dataloader), - desc='eval model:', - position=0, - leave=True) - max_iter = len(valid_dataloader) - 1 if platform.system( - ) == "Windows" else len(valid_dataloader) - for idx, batch in enumerate(valid_dataloader): - if idx >= max_iter: - break - images = batch[0] - start = time.time() - if model_type == 'table' or extra_input: - preds = model(images, data=batch[1:]) - elif model_type in ["kie", 'vqa']: - preds = model(batch) - else: - preds = model(images) - - batch_numpy = [] - for item in batch: - if isinstance(item, paddle.Tensor): - batch_numpy.append(item.numpy()) - else: - batch_numpy.append(item) - # Obtain usable results from post-processing methods - total_time += time.time() - start - # Evaluate the results of the current batch - if model_type in ['table', 'kie']: - eval_class(preds, batch_numpy) - elif model_type in ['vqa']: - post_result = post_process_class(preds, batch_numpy) - eval_class(post_result, batch_numpy) - else: - post_result = post_process_class(preds, batch_numpy[1]) - eval_class(post_result, batch_numpy) - - pbar.update(1) - total_frame += len(images) - # Get final metric,eg. acc or hmean - metric = eval_class.get_metric() - - pbar.close() - model.train() - metric['fps'] = total_frame / total_time - return metric - - -def update_center(char_center, post_result, preds): - result, label = post_result - feats, logits = preds - logits = paddle.argmax(logits, axis=-1) - feats = feats.numpy() - logits = logits.numpy() - - for idx_sample in range(len(label)): - if result[idx_sample][0] == label[idx_sample][0]: - feat = feats[idx_sample] - logit = logits[idx_sample] - for idx_time in range(len(logit)): - index = logit[idx_time] - if index in char_center.keys(): - char_center[index][0] = ( - char_center[index][0] * char_center[index][1] + - feat[idx_time]) / (char_center[index][1] + 1) - char_center[index][1] += 1 - else: - char_center[index] = [feat[idx_time], 1] - return char_center - - -def get_center(model, eval_dataloader, post_process_class): - pbar = tqdm(total=len(eval_dataloader), desc='get center:') - max_iter = len(eval_dataloader) - 1 if platform.system( - ) == "Windows" else len(eval_dataloader) - char_center = dict() - for idx, batch in enumerate(eval_dataloader): - if idx >= max_iter: - break - images = batch[0] - start = time.time() - preds = model(images) - - batch = [item.numpy() for item in batch] - # Obtain usable results from post-processing methods - post_result = post_process_class(preds, batch[1]) - - #update char_center - char_center = update_center(char_center, post_result, preds) - pbar.update(1) - - pbar.close() - for key in char_center.keys(): - char_center[key] = char_center[key][0] - return char_center - - -def preprocess(is_train=False): - FLAGS = ArgsParser().parse_args() - profiler_options = FLAGS.profiler_options - config = load_config(FLAGS.config) - config = merge_config(config, FLAGS.opt) - profile_dic = {"profiler_options": FLAGS.profiler_options} - config = merge_config(config, profile_dic) - - if is_train: - # save_config - save_model_dir = config['Global']['save_model_dir'] - os.makedirs(save_model_dir, exist_ok=True) - with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f: - yaml.dump( - dict(config), f, default_flow_style=False, sort_keys=False) - log_file = '{}/train.log'.format(save_model_dir) - else: - log_file = None - logger = get_logger(log_file=log_file) - - # check if set use_gpu=True in paddlepaddle cpu version - use_gpu = config['Global']['use_gpu'] - check_gpu(use_gpu) - - # check if set use_xpu=True in paddlepaddle cpu/gpu version - use_xpu = False - if 'use_xpu' in config['Global']: - use_xpu = config['Global']['use_xpu'] - check_xpu(use_xpu) - - alg = config['Architecture']['algorithm'] - assert alg in [ - 'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN', - 'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE', - 'SEED', 'SDMGR', 'LayoutXLM', 'LayoutLM', 'PREN', 'FCE', 'SVTR' - ] - - device = 'cpu' - if use_gpu: - device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) - if use_xpu: - device = 'xpu' - device = paddle.set_device(device) - - config['Global']['distributed'] = dist.get_world_size() != 1 - - loggers = [] - - if 'use_visualdl' in config['Global'] and config['Global']['use_visualdl']: - save_model_dir = config['Global']['save_model_dir'] - vdl_writer_path = '{}/vdl/'.format(save_model_dir) - log_writer = VDLLogger(save_model_dir) - loggers.append(log_writer) - if ('use_wandb' in config['Global'] and config['Global']['use_wandb']) or 'wandb' in config: - save_dir = config['Global']['save_model_dir'] - wandb_writer_path = "{}/wandb".format(save_dir) - if "wandb" in config: - wandb_params = config['wandb'] - else: - wandb_params = dict() - wandb_params.update({'save_dir': save_model_dir}) - log_writer = WandbLogger(**wandb_params, config=config) - loggers.append(log_writer) - else: - log_writer = None - print_dict(config, logger) - - if loggers: - log_writer = Loggers(loggers) - else: - log_writer = None - - logger.info('train with paddle {} and device {}'.format(paddle.__version__, - device)) - return config, device, logger, log_writer diff --git a/backend/tools/subtitle_ocr.py b/backend/tools/subtitle_ocr.py index 5b452dd8..d6380e62 100644 --- a/backend/tools/subtitle_ocr.py +++ b/backend/tools/subtitle_ocr.py @@ -4,9 +4,9 @@ import cv2 from PIL import ImageFont, ImageDraw, Image from tqdm import tqdm -from tools.ocr import OcrRecogniser, get_coordinates -from tools.constant import SubtitleArea -from tools import constant +from backend.tools.ocr import OcrRecogniser, get_coordinates +from backend.tools.constant import SubtitleArea +from backend.tools import constant from threading import Thread import queue from shapely.geometry import Polygon @@ -183,8 +183,6 @@ def ocr_task_producer(ocr_queue, task_queue, progress_queue, video_path, raw_sub cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame_no - 1) # 读取视频帧 ret, frame = cap.read() - ocr = OcrRecogniser() - dt_box, rec_res = ocr.predict(frame) # 如果读取成功 if ret: # 根据默认字幕位置,则对视频帧进行裁剪,裁剪后处理 diff --git a/backend/tools/test_hubserving.py b/backend/tools/test_hubserving.py deleted file mode 100755 index ec17a941..00000000 --- a/backend/tools/test_hubserving.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) - -from ppocr.utils.logging import get_logger -logger = get_logger() - -import cv2 -import numpy as np -import time -from PIL import Image -from ppocr.utils.utility import get_image_file_list -from tools.infer.utility import draw_ocr, draw_boxes, str2bool -from ppstructure.utility import draw_structure_result -from ppstructure.predict_system import to_excel - -import requests -import json -import base64 - - -def cv2_to_base64(image): - return base64.b64encode(image).decode('utf8') - - -def draw_server_result(image_file, res): - img = cv2.imread(image_file) - image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) - if len(res) == 0: - return np.array(image) - keys = res[0].keys() - if 'text_region' not in keys: # for ocr_rec, draw function is invalid - logger.info("draw function is invalid for ocr_rec!") - return None - elif 'text' not in keys: # for ocr_det - logger.info("draw text boxes only!") - boxes = [] - for dno in range(len(res)): - boxes.append(res[dno]['text_region']) - boxes = np.array(boxes) - draw_img = draw_boxes(image, boxes) - return draw_img - else: # for ocr_system - logger.info("draw boxes and texts!") - boxes = [] - texts = [] - scores = [] - for dno in range(len(res)): - boxes.append(res[dno]['text_region']) - texts.append(res[dno]['text']) - scores.append(res[dno]['confidence']) - boxes = np.array(boxes) - scores = np.array(scores) - draw_img = draw_ocr( - image, boxes, texts, scores, draw_txt=True, drop_score=0.5) - return draw_img - - -def save_structure_res(res, save_folder, image_file): - img = cv2.imread(image_file) - excel_save_folder = os.path.join(save_folder, os.path.basename(image_file)) - os.makedirs(excel_save_folder, exist_ok=True) - # save res - with open( - os.path.join(excel_save_folder, 'res.txt'), 'w', - encoding='utf8') as f: - for region in res: - if region['type'] == 'Table': - excel_path = os.path.join(excel_save_folder, - '{}.xlsx'.format(region['bbox'])) - to_excel(region['res'], excel_path) - elif region['type'] == 'Figure': - x1, y1, x2, y2 = region['bbox'] - print(region['bbox']) - roi_img = img[y1:y2, x1:x2, :] - img_path = os.path.join(excel_save_folder, - '{}.jpg'.format(region['bbox'])) - cv2.imwrite(img_path, roi_img) - else: - for text_result in region['res']: - f.write('{}\n'.format(json.dumps(text_result))) - - -def main(args): - image_file_list = get_image_file_list(args.image_dir) - is_visualize = False - headers = {"Content-type": "application/json"} - cnt = 0 - total_time = 0 - for image_file in image_file_list: - img = open(image_file, 'rb').read() - if img is None: - logger.info("error in loading image:{}".format(image_file)) - continue - img_name = os.path.basename(image_file) - # seed http request - starttime = time.time() - data = {'images': [cv2_to_base64(img)]} - r = requests.post( - url=args.server_url, headers=headers, data=json.dumps(data)) - elapse = time.time() - starttime - total_time += elapse - logger.info("Predict time of %s: %.3fs" % (image_file, elapse)) - res = r.json()["results"][0] - logger.info(res) - - if args.visualize: - draw_img = None - if 'structure_table' in args.server_url: - to_excel(res['html'], './{}.xlsx'.format(img_name)) - elif 'structure_system' in args.server_url: - save_structure_res(res['regions'], args.output, image_file) - else: - draw_img = draw_server_result(image_file, res) - if draw_img is not None: - if not os.path.exists(args.output): - os.makedirs(args.output) - cv2.imwrite( - os.path.join(args.output, os.path.basename(image_file)), - draw_img[:, :, ::-1]) - logger.info("The visualized image saved in {}".format( - os.path.join(args.output, os.path.basename(image_file)))) - cnt += 1 - if cnt % 100 == 0: - logger.info("{} processed".format(cnt)) - logger.info("avg time cost: {}".format(float(total_time) / cnt)) - - -def parse_args(): - import argparse - parser = argparse.ArgumentParser(description="args for hub serving") - parser.add_argument("--server_url", type=str, required=True) - parser.add_argument("--image_dir", type=str, required=True) - parser.add_argument("--visualize", type=str2bool, default=False) - parser.add_argument("--output", type=str, default='./hubserving_result') - args = parser.parse_args() - return args - - -if __name__ == '__main__': - args = parse_args() - main(args) diff --git a/backend/tools/train.py b/backend/tools/train.py deleted file mode 100755 index 42aba548..00000000 --- a/backend/tools/train.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..'))) - -import yaml -import paddle -import paddle.distributed as dist - -from ppocr.data import build_dataloader -from ppocr.modeling.architectures import build_model -from ppocr.losses import build_loss -from ppocr.optimizer import build_optimizer -from ppocr.postprocess import build_post_process -from ppocr.metrics import build_metric -from ppocr.utils.save_load import load_model -from ppocr.utils.utility import set_seed -import tools.program as program - -dist.get_world_size() - - -def main(config, device, logger, vdl_writer): - # init dist environment - if config['Global']['distributed']: - dist.init_parallel_env() - - global_config = config['Global'] - - # build dataloader - train_dataloader = build_dataloader(config, 'Train', device, logger) - if len(train_dataloader) == 0: - logger.error( - "No Images in train dataset, please ensure\n" + - "\t1. The images num in the train label_file_list should be larger than or equal with batch size.\n" - + - "\t2. The annotation file and path in the configuration file are provided normally." - ) - return - - if config['Eval']: - valid_dataloader = build_dataloader(config, 'Eval', device, logger) - else: - valid_dataloader = None - - # build post process - post_process_class = build_post_process(config['PostProcess'], - global_config) - - # build model - # for rec algorithm - if hasattr(post_process_class, 'character'): - char_num = len(getattr(post_process_class, 'character')) - if config['Architecture']["algorithm"] in ["Distillation", - ]: # distillation model - for key in config['Architecture']["Models"]: - if config['Architecture']['Models'][key]['Head'][ - 'name'] == 'MultiHead': # for multi head - if config['PostProcess'][ - 'name'] == 'DistillationSARLabelDecode': - char_num = char_num - 2 - # update SARLoss params - assert list(config['Loss']['loss_config_list'][-1].keys())[ - 0] == 'DistillationSARLoss' - config['Loss']['loss_config_list'][-1][ - 'DistillationSARLoss']['ignore_index'] = char_num + 1 - out_channels_list = {} - out_channels_list['CTCLabelDecode'] = char_num - out_channels_list['SARLabelDecode'] = char_num + 2 - config['Architecture']['Models'][key]['Head'][ - 'out_channels_list'] = out_channels_list - else: - config['Architecture']["Models"][key]["Head"][ - 'out_channels'] = char_num - elif config['Architecture']['Head'][ - 'name'] == 'MultiHead': # for multi head - if config['PostProcess']['name'] == 'SARLabelDecode': - char_num = char_num - 2 - # update SARLoss params - assert list(config['Loss']['loss_config_list'][1].keys())[ - 0] == 'SARLoss' - if config['Loss']['loss_config_list'][1]['SARLoss'] is None: - config['Loss']['loss_config_list'][1]['SARLoss'] = { - 'ignore_index': char_num + 1 - } - else: - config['Loss']['loss_config_list'][1]['SARLoss'][ - 'ignore_index'] = char_num + 1 - out_channels_list = {} - out_channels_list['CTCLabelDecode'] = char_num - out_channels_list['SARLabelDecode'] = char_num + 2 - config['Architecture']['Head'][ - 'out_channels_list'] = out_channels_list - else: # base rec model - config['Architecture']["Head"]['out_channels'] = char_num - - if config['PostProcess']['name'] == 'SARLabelDecode': # for SAR model - config['Loss']['ignore_index'] = char_num - 1 - - model = build_model(config['Architecture']) - if config['Global']['distributed']: - model = paddle.DataParallel(model) - - # build loss - loss_class = build_loss(config['Loss']) - - # build optim - optimizer, lr_scheduler = build_optimizer( - config['Optimizer'], - epochs=config['Global']['epoch_num'], - step_each_epoch=len(train_dataloader), - model=model) - - # build metric - eval_class = build_metric(config['Metric']) - # load pretrain model - pre_best_model_dict = load_model(config, model, optimizer, - config['Architecture']["model_type"]) - logger.info('train dataloader has {} iters'.format(len(train_dataloader))) - if valid_dataloader is not None: - logger.info('valid dataloader has {} iters'.format( - len(valid_dataloader))) - - use_amp = config["Global"].get("use_amp", False) - if use_amp: - AMP_RELATED_FLAGS_SETTING = { - 'FLAGS_cudnn_batchnorm_spatial_persistent': 1, - 'FLAGS_max_inplace_grad_add': 8, - } - paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING) - scale_loss = config["Global"].get("scale_loss", 1.0) - use_dynamic_loss_scaling = config["Global"].get( - "use_dynamic_loss_scaling", False) - scaler = paddle.amp.GradScaler( - init_loss_scaling=scale_loss, - use_dynamic_loss_scaling=use_dynamic_loss_scaling) - else: - scaler = None - - # start train - program.train(config, train_dataloader, valid_dataloader, device, model, - loss_class, optimizer, lr_scheduler, post_process_class, - eval_class, pre_best_model_dict, logger, vdl_writer, scaler) - - -def test_reader(config, device, logger): - loader = build_dataloader(config, 'Train', device, logger) - import time - starttime = time.time() - count = 0 - try: - for data in loader(): - count += 1 - if count % 1 == 0: - batch_time = time.time() - starttime - starttime = time.time() - logger.info("reader: {}, {}, {}".format( - count, len(data[0]), batch_time)) - except Exception as e: - logger.info(e) - logger.info("finish reader: {}, Success!".format(count)) - - -if __name__ == '__main__': - config, device, logger, vdl_writer = program.preprocess(is_train=True) - seed = config['Global']['seed'] if 'seed' in config['Global'] else 1024 - set_seed(seed) - main(config, device, logger, vdl_writer) - # test_reader(config, device, logger) From 55223d94f47f8b1d3d3a7e32a0b6a82ad84f57e1 Mon Sep 17 00:00:00 2001 From: jason Date: Sun, 23 Feb 2025 14:35:07 +0800 Subject: [PATCH 3/5] =?UTF-8?q?=E6=94=AF=E6=8C=81DirectML=E7=AD=89?= =?UTF-8?q?=E5=85=B6=E4=BB=96ONNX=E8=83=BD=E8=AF=86=E5=88=AB=E7=9A=84?= =?UTF-8?q?=E5=90=8E=E7=AB=AF=E4=BD=BF=E7=94=A8GPU=E5=8A=A0=E9=80=9F?= =?UTF-8?q?=E6=89=A7=E8=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build-windows-directml.yml | 93 ++++++++ .gitignore | 1 + README.md | 214 ++++++++++--------- README_en.md | 204 ++++++++++-------- backend/config.py | 34 ++- backend/interface/ch.ini | 3 + backend/interface/chinese_cht.ini | 3 + backend/interface/en.ini | 3 + backend/interface/es.ini | 3 + backend/interface/japan.ini | 3 + backend/interface/ko.ini | 3 + backend/interface/vi.ini | 3 + backend/main.py | 9 +- backend/tools/__init__.py | 14 -- backend/tools/ocr.py | 53 ++++- gui.py | 4 +- requirements.txt | 6 +- requirements_directml.txt | 3 + 18 files changed, 448 insertions(+), 208 deletions(-) create mode 100644 .github/workflows/build-windows-directml.yml create mode 100644 requirements_directml.txt diff --git a/.github/workflows/build-windows-directml.yml b/.github/workflows/build-windows-directml.yml new file mode 100644 index 00000000..d8514621 --- /dev/null +++ b/.github/workflows/build-windows-directml.yml @@ -0,0 +1,93 @@ +name: Build Windows DirectML + +on: + push: + branches: + - '**' + workflow_dispatch: + inputs: + ssh: + description: 'SSH connection to Actions' + required: false + default: false + + +jobs: + build: + runs-on: windows-2019 + steps: + - uses: actions/checkout@v4 + - name: 读取 VERSION + id: version + run: | + VERSION=$(sed -n 's/^VERSION = "\(.*\)"/\1/p' backend/config.py) + echo "VERSION=$VERSION" >> $GITHUB_ENV + echo "VERSION=$VERSION" >> $GITHUB_OUTPUT + shell: bash + # - name: 检查 tag 是否已存在 + # run: | + # TAG_NAME="${VERSION}" + # if git ls-remote --tags origin | grep -q "refs/tags/$TAG_NAME"; then + # echo "Tag $TAG_NAME 已存在,发布中止" + # exit 1 + # fi + # shell: bash + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'pip' # caching pip dependencies + - run: pip install paddlepaddle==3.0.0rc1 + - run: pip install -r requirements.txt + - run: pip install -r requirements_directml.txt + - run: pip freeze > requirements.txt + - run: pip install QPT==1.0b8 setuptools + - name: 获取 site-packages 路径 + shell: bash + run: | + SITE_PACKAGES=$(python -c "import site, os; print(os.path.join(site.getsitepackages()[0], 'Lib', 'site-packages'))") + SITE_PACKAGES_UNIX=$(cygpath -u "$SITE_PACKAGES") + echo "site-packages路径: $SITE_PACKAGES" + echo "site-packages UNIX路径: $SITE_PACKAGES_UNIX" + echo "SITE_PACKAGES_UNIX=$SITE_PACKAGES_UNIX" >> $GITHUB_ENV + echo "SITE_PACKAGES=$SITE_PACKAGES" >> $GITHUB_ENV + - name: 修复QPT内部错误 + run: sed -i '98c\ try:\n dep = pkg.requires()\n except TypeError:\n continue' ${SITE_PACKAGES_UNIX}/qpt/kernel/qpackage.py + shell: bash + - name: Start SSH via tmate + if: (github.event.inputs.ssh == 'true' && github.event.inputs.ssh != 'false') || contains(github.event.action, 'ssh') + uses: mxschmitt/action-tmate@v3 + - run: | + python backend/tools/makedist.py && \ + mv ../vse_out ./vse_out && \ + rm -fv ./vse_out/*/opt/packages/numpy-2* + env: + QPT_Action: "True" + shell: bash + - name: 上传 Debug 文件夹到 Artifacts + uses: actions/upload-artifact@v4 + with: + name: vse-v${{ env.VERSION }}-windows-directml-debug + path: vse_out/Debug/ + - name: 上传 Release 文件夹到 Artifacts + uses: actions/upload-artifact@v4 + with: + name: vse-v${{ env.VERSION }}-windows-directml-release + path: vse_out/Release/ + - name: 打包 Release 文件夹 + run: | + cd vse_out/Release + 7z a -t7z -mx=9 -m0=LZMA2 -ms=on -mfb=64 -md=32m -mmt=on -v1888m vse-v${{ env.VERSION }}-windows-directml.7z * && \ + # 检测是否只有一个分卷 + if [ -f vse-v${{ env.VERSION }}-windows-directml.7z.001 ] && [ ! -f vse-v${{ env.VERSION }}-windows-directml.7z.002 ]; then \ + mv vse-v${{ env.VERSION }}-windows-directml.7z.001 vse-v${{ env.VERSION }}-windows-directml.7z; fi + shell: bash + - name: Release + uses: softprops/action-gh-release@v1 + with: + prerelease: true + tag_name: ${{ env.VERSION }} + target_commitish: ${{ github.sha }} + name: 硬字幕提取器 ${{ env.VERSION }} + files: | + vse_out/Release/vse-v${{ env.VERSION }}-windows-directml.7z* + \ No newline at end of file diff --git a/.gitignore b/.gitignore index 3b0d3dfe..74eeaea6 100644 --- a/.gitignore +++ b/.gitignore @@ -359,3 +359,4 @@ $RECYCLE.BIN/ /test.py /test2.py /subtitle.ini +/backend/models/**/*.onnx \ No newline at end of file diff --git a/README.md b/README.md index 82fdeb24..3d627261 100755 --- a/README.md +++ b/README.md @@ -87,6 +87,27 @@ Video-subtitle-extractor (VSE) 是一款将视频中的硬字幕提取为外挂 > **有任何改进意见请在ISSUES和DISCUSSION中提出** + +**预构建包对比说明**: +| 预构建包名 | Python | Paddle | 环境 | 支持的计算能力范围| +|---------------|------------|--------------|-----------------------------|----------| +| `vse-windows-cpu.7z` | 3.12 | 3.0rc1 | 无 GPU,CPU 运行 | 通用 | +| `vse-windows-directml.7z` | 3.12 | 3.0rc1 | Windows 非Nvidia显卡 | 通用 | +| `vse-windows-nvidia-cuda-10.2.7z` | 3.11 | 2.5.2 | CUDA 10.2 | 3.0 – 7.5 | +| `vse-windows-nvidia-cuda-11.8.7z` | 3.12 | 3.0rc1 | CUDA 11.8 | 3.5 – 8.9 | +| `vse-windows-nvidia-cuda-12.3.7z` | 3.12 | 3.0rc1 | CUDA 12.3 | 5.0 – 9.0 | + +> NVIDIA官方提供了各GPU型号的计算能力列表,您可以参考链接: [CUDA GPUs](https://developer.nvidia.com/cuda-gpus) 查看你的GPU适合哪个CUDA版本 + +**识别模式选择说明**: +| 模式名称 | GPU | OCR模型尺寸 | 字幕检测引擎 | 备注 | +|---------------|-----|---------|------|------| +| 快速 | 有/无 | 迷你 | VideoSubFinder | | +| 自动 | 有| 大 | VideoSubFinder | 推荐 | +| 自动 | 无| 迷你 | VideoSubFinder | 推荐 | +| 精准 | 有/无| 大 | VSE | 非常慢 | +> Windows/Linux环境下字幕检测引擎都是VideoSubFinder + ## 演示 - GUI版: @@ -109,127 +130,128 @@ Video-subtitle-extractor (VSE) 是一款将视频中的硬字幕提取为外挂 ## 源码使用说明 -#### 1. 下载安装Miniconda - -- Windows: Miniconda3-py312_24.7.1-0-Windows-x86_64.exe +#### 1. 安装 Python +请确保您已经安装了 Python 3.12+。 -- MacOS:Miniconda3-py312_24.7.1-0-MacOSX-x86_64.pkg - +- Windows 用户可以前往 [Python 官网](https://www.python.org/downloads/windows/) 下载并安装 Python。 +- MacOS 用户可以使用 Homebrew 安装: + ```shell + brew install python@3.12 + ``` +- Linux 用户可以使用包管理器安装,例如 Ubuntu/Debian: + ```shell + sudo apt update && sudo apt install python3.12 python3.12-venv python3.12-dev + ``` -- Linux: Miniconda3-py312_24.7.1-0-Linux-x86_64.sh +#### 2. 安装依赖文件 -#### 2. 创建并激活虚机环境 +请使用虚拟环境来管理项目依赖,避免与系统环境冲突。 -(1)切换到源码所在目录: +(1)创建虚拟环境并激活 ```shell -cd <源码所在目录> +python -m venv videoEnv ``` -> 例如:如果你的源代码放在D盘的tools文件下,并且源代码的文件夹名为video-subtitle-extractor,就输入 ```cd D:/tools/video-subtitle-extractor-main``` -(2)创建激活conda环境 +- Windows: ```shell -conda create -n videoEnv python=3.12 +videoEnv\\Scripts\\activate ``` +- MacOS/Linux: +```shell +source videoEnv/bin/activate +``` + +#### 3. 创建并激活项目目录 +切换到源码所在目录: ```shell -conda activate videoEnv +cd <源码所在目录> ``` +> 例如:如果您的源代码放在 D 盘的 tools 文件夹下,并且源代码的文件夹名为 video-subtitle-extractor,则输入: +> ```shell +> cd D:/tools/video-subtitle-extractor-main +> ``` -#### 3. 安装依赖文件 +#### 4. 安装合适的运行环境 -请确保你已经安装 python 3.12+,使用conda创建项目虚拟环境并激活环境 (建议创建虚拟环境运行,以免后续出现问题) - -- 安装依赖: +本项目支持 CUDA(NVIDIA显卡加速)、CPU(无 GPU)、DirectML(AMD、Intel等GPU/APU加速)、ONNX四种运行模式。 + +##### (1) CUDA(NVIDIA 显卡用户) + +> 请确保您的 NVIDIA 显卡驱动支持所选 CUDA 版本。 + +- 推荐 CUDA 11.8,对应 cuDNN 8.6.0。 + +- 安装 CUDA: + - Windows:[CUDA 11.8 下载](https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_522.06_windows.exe) + - Linux: + ```shell + wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run + sudo sh cuda_11.8.0_520.61.05_linux.run + ``` + - MacOS 不支持 CUDA。 + +- 安装 cuDNN(CUDA 11.8 对应 cuDNN 8.6.0): + - [Windows cuDNN 8.6.0 下载](https://developer.download.nvidia.cn/compute/redist/cudnn/v8.6.0/local_installers/11.8/cudnn-windows-x86_64-8.6.0.163_cuda11-archive.zip) + - [Linux cuDNN 8.6.0 下载](https://developer.download.nvidia.cn/compute/redist/cudnn/v8.6.0/local_installers/11.8/cudnn-linux-x86_64-8.6.0.163_cuda11-archive.tar.xz) + - 安装方法请参考 NVIDIA 官方文档。 +- 安装 PaddlePaddle GPU 版本(CUDA 11.8): ```shell + pip install paddlepaddle-gpu==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ pip install -r requirements.txt ``` -- 安装CUDA和cuDNN - -> 请确保有拥有Nvidia的显卡,**30系列以上的显卡驱动可能不支持 cuda 11.2及以下版本的安装** -> -> 如果安装cuda 10.2,请对应安装7.6.5的cuDNN,并使用对应cuda版本的paddlepaddle,**请不要使用cuDNN v8.x 和 cuda 10.2的组合** -> -> 如果安装cuda 11.2,请对应安装8.1.1的cuDNN,并使用对应cuda版本的paddlepaddle -> -> 如果安装cuda 11.6,请对应安装8.4.0的cuDNN,并使用对应cuda版本的paddlepaddle -> -> 如果安装cuda 11.8,请对应安装8.6.0的cuDNN,并使用对应cuda版本的paddlepaddle -> -> 如果安装cuda 12.0,请对应安装8.9.1的cuDNN,并使用对应cuda版本的paddlepaddle - - - - - -
- Linux用户 -
(1) 下载CUDA 11.7
-
wget https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run
-
(2) 安装CUDA 11.7
-
sudo sh cuda_11.7.0_515.43.04_linux.run
-

1. 输入accept

- -

2. 选中CUDA Toolkit 11.7(如果你没有安装nvidia驱动则选中Driver,如果你已经安装了nvidia驱动请不要选中driver),之后选中install,回车

- -

3. 添加环境变量

-

在 ~/.bashrc 加入以下内容

-
# CUDA
-  export PATH=/usr/local/cuda-11.7/bin${PATH:+:${PATH}}
-  export LD_LIBRARY_PATH=/usr/local/cuda-11.7/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
-

使其生效

-
source ~/.bashrc
-
(3) 下载cuDNN 8.4.1
-

国内:cudnn-linux-x86_64-8.4.1.50_cuda11.6-archive.tar.xz 提取码:57mg

-

国外:cudnn-linux-x86_64-8.4.1.50_cuda11.6-archive.tar.xz

-
(4) 安装cuDNN 8.4.1
-
 tar -xf cudnn-linux-x86_64-8.4.1.50_cuda11.6-archive.tar.xz
-   mv cudnn-linux-x86_64-8.4.1.50_cuda11.6-archive cuda
-   sudo cp ./cuda/include/* /usr/local/cuda-11.7/include/
-   sudo cp ./cuda/lib/* /usr/local/cuda-11.7/lib64/
-   sudo chmod a+r /usr/local/cuda-11.7/lib64/*
-   sudo chmod a+r /usr/local/cuda-11.7/include/*
-
- -
- Windows用户 -
(1) 下载CUDA 11.7
- cuda_11.7.0_516.01_windows.exe -
(2) 安装CUDA 11.7
-
(3) 下载cuDNN 8.4.0
-

cudnn-windows-x86_64-8.4.0.27_cuda11.6-archive.zip

-
(4) 安装cuDNN 8.4.0
-

- 将cuDNN解压后的cuda文件夹中的bin, include, lib目录下的文件复制到C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\对应目录下 -

-
- - -- 安装paddlepaddle: - - - windows: - - ```shell - python -m pip install paddlepaddle-gpu==2.6.1.post117 -f https://www.paddlepaddle.org.cn/whl/windows/mkl/avx/stable.html - ``` - - - Linux: - - ```shell - python -m pip install paddlepaddle-gpu==2.6.1.post117 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html - ``` - -#### 4. 运行程序 - -- 运行图形化界面版本(GUI) +##### (2) DirectML(AMD、Intel等GPU/APU加速卡用户) + +- 适用于 Windows 设备的 AMD/NVIDIA/Intel GPU。 +- 安装 ONNX Runtime DirectML 版本: + ```shell + pip install paddlepaddle==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/ + pip install -r requirements.txt + pip install -r requirements_directml.txt + ``` + +##### (3) ONNX (适合macOS、AMD ROCm等环境加速用户, 基础环境与DirectML方式一致,未测试!) + +- 使用这个方式部署请勿反馈Issues +- 适用于 Linux 或 macOS 设备的 AMD/Metal GPU/Apple Silicon GPU。 +- 安装 ONNX Runtime DirectML 版本: + ```shell + pip install paddlepaddle==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/ + pip install -r requirements.txt + + # 阅读文档 https://onnxruntime.ai/docs/execution-providers/ + # 根据你的设备选择合适的执行后端, 参考requirements_directml.txt文件修改成合适你环境的依赖 + + # 例如: + # requirements_coreml.txt + # paddle2onnx==1.3.1 + # onnxruntime-gpu==1.20.1 + # onnxruntime-coreml==1.13.1 + + pip install -r requirements_coreml.txt + ``` + +##### (4) CPU 运行(无 GPU 加速) + +- 适用于没有 GPU 或不希望使用 GPU 的情况。 +- 直接安装 CPU 版本 PaddlePaddle: + ```shell + pip install paddlepaddle==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/ + pip install -r requirements.txt + ``` + +#### 5. 运行程序 + +- 运行图形化界面版本(GUI) ```shell python gui.py ``` -- 运行命令行版本(CLI) +- 运行命令行版本(CLI) ```shell python ./backend/main.py diff --git a/README_en.md b/README_en.md index 8e081282..d8a2df51 100644 --- a/README_en.md +++ b/README_en.md @@ -68,6 +68,28 @@ > **Provide your suggestions to improve this project in ISSUES & DISCUSSION** +**Pre-built Package Comparison**: + +| Pre-built Package Name | Python | Paddle | Environment | Supported Compute Capability Range | +|----------------------------------|--------|--------|-----------------------------------|------------------------------------| +| `vse-windows-cpu.7z` | 3.12 | 3.0rc1 | No GPU, CPU only | Universal | +| `vse-windows-directml.7z` | 3.12 | 3.0rc1 | Windows without Nvidia GPU | Universal | +| `vse-windows-nvidia-cuda-10.2.7z`| 3.11 | 2.5.2 | CUDA 10.2 | 3.0 – 7.5 | +| `vse-windows-nvidia-cuda-11.8.7z`| 3.12 | 3.0rc1 | CUDA 11.8 | 3.5 – 8.9 | +| `vse-windows-nvidia-cuda-12.3.7z`| 3.12 | 3.0rc1 | CUDA 12.3 | 5.0 – 9.0 | + +> NVIDIA provides a list of supported compute capabilities for each GPU model. You can refer to the following link: [CUDA GPUs](https://developer.nvidia.com/cuda-gpus) to check which CUDA version is compatible with your GPU. + +**Recognition Mode Selection**: + +| Mode Name | GPU | OCR Model Size | Subtitle Detection Engine | Notes | +|---------------|------|-----------------|---------------------------|------------------| +| Fast | Yes/No | Mini | VideoSubFinder | | +| Auto | Yes | Large | VideoSubFinder | Recommended | +| Auto | No | Mini | VideoSubFinder | Recommended | +| Precise | Yes/No | Large | VSE | Very slow | + +> The subtitle detection engine for both Windows/Linux environments is VideoSubFinder. ## Demo @@ -90,120 +112,128 @@ ## Getting Started with Source Code -#### 1. Download and Install Miniconda +#### 1. Install Python +Please ensure that you have installed Python 3.12+. -- Windows: Miniconda3-py312_24.7.1-0-Windows-x86_64.exe - - -- MacOS:Miniconda3-py312_24.7.1-0-MacOSX-x86_64.pkg - - -- Linux: Miniconda3-py312_24.7.1-0-Linux-x86_64.sh +- Windows users can go to the [Python official website](https://www.python.org/downloads/windows/) to download and install Python. +- MacOS users can install using Homebrew: + ```shell + brew install python@3.12 + ``` +- Linux users can install via the package manager, such as on Ubuntu/Debian: + ```shell + sudo apt update && sudo apt install python3.12 python3.12-venv python3.12-dev + ``` +#### 2. Install Dependencies -#### 2. Activate Vitrual Environment +It is recommended to use a virtual environment to manage project dependencies to avoid conflicts with the system environment. -(1) Switch to working directory +(1) Create and activate the virtual environment: ```shell -cd +python -m venv videoEnv ``` -(2) create and activate conda environment +- Windows: +```shell +videoEnv\\Scripts\\activate +``` +- MacOS/Linux: ```shell -conda create -n videoEnv python=3.12 pip +source videoEnv/bin/activate ``` +#### 3. Create and Activate Project Directory + +Change to the directory where your source code is located: ```shell -conda activate videoEnv +cd ``` +> For example, if your source code is in the `tools` folder on the D drive and the folder name is `video-subtitle-extractor`, use: +> ```shell +> cd D:/tools/video-subtitle-extractor-main +> ``` + +#### 4. Install the Appropriate Runtime Environment + +This project supports four runtime modes: CUDA (NVIDIA GPU acceleration), CPU (no GPU), DirectML (AMD, Intel, and other GPUs/APUs), and ONNX. +##### (1) CUDA (For NVIDIA GPU users) -#### 3. Install Dependencies +> Make sure your NVIDIA GPU driver supports the selected CUDA version. -Before you install dependencies, make sure your python 3.8+ has installed as well as conda virtual environment has created and activated. +- Recommended CUDA 11.8, corresponding to cuDNN 8.6.0. -- Install dependencies: +- Install CUDA: + - Windows: [Download CUDA 11.8](https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_522.06_windows.exe) + - Linux: + ```shell + wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run + sudo sh cuda_11.8.0_520.61.05_linux.run + ``` + - CUDA is not supported on MacOS. +- Install cuDNN (CUDA 11.8 corresponds to cuDNN 8.6.0): + - [Windows cuDNN 8.6.0 Download](https://developer.download.nvidia.cn/compute/redist/cudnn/v8.6.0/local_installers/11.8/cudnn-windows-x86_64-8.6.0.163_cuda11-archive.zip) + - [Linux cuDNN 8.6.0 Download](https://developer.download.nvidia.cn/compute/redist/cudnn/v8.6.0/local_installers/11.8/cudnn-linux-x86_64-8.6.0.163_cuda11-archive.tar.xz) + - Follow the installation guide in the NVIDIA official documentation. + +- Install PaddlePaddle GPU version (CUDA 11.8): ```shell + pip install paddlepaddle-gpu==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ pip install -r requirements.txt ``` -- Install **CUDA** and **cuDNN** - > make sure that you have **NVIDIA** graphic card before doing this step - -
- Linux -
(1) Download CUDA 11.7
-
wget https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run
-
(2) Install CUDA 11.7
-
sudo sh cuda_11.7.0_515.43.04_linux.run
-

1. Input accept

- -

2. make sure CUDA Toolkit 11.7 is chosen (If you have already installed driver, do not select Driver)

- -

3. Add environment variables

-

add the following content in ~/.bashrc

-
# CUDA
-    export PATH=/usr/local/cuda-11.7/bin${PATH:+:${PATH}}
-    export LD_LIBRARY_PATH=/usr/local/cuda-11.7/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
-

Make sure it works

-
source ~/.bashrc
-
(3) Download cuDNN 8.4.1
-

cudnn-linux-x86_64-8.4.1.50_cuda11.6-archive.tar.xz

-
(4) Install cuDNN 8.4.1
-
 tar -xf cudnn-linux-x86_64-8.4.1.50_cuda11.6-archive.tar.xz
-   mv cudnn-linux-x86_64-8.4.1.50_cuda11.6-archive cuda
-   sudo cp ./cuda/include/* /usr/local/cuda-11.7/include/
-   sudo cp ./cuda/lib/* /usr/local/cuda-11.7/lib64/
-   sudo chmod a+r /usr/local/cuda-11.7/lib64/*
-   sudo chmod a+r /usr/local/cuda-11.7/include/*
-
- -
- Windows -
(1) Download CUDA 11.7
- cuda_11.7.0_516.01_windows.exe -
(2) Install CUDA 11.7
-
(3) Download cuDNN 8.4.0
-

cudnn-windows-x86_64-8.4.0.27_cuda11.6-archive.zip

-
(4) Install cuDNN 8.4.0
-

- unzip "cudnn-windows-x86_64-8.4.0.27_cuda11.6-archive.zip", then move all files in "bin, include, lib" in cuda - directory to C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\ -

-
- - - - Install paddlepaddle: - - windows: - - ```shell - python -m pip install paddlepaddle-gpu==2.6.1.post117 -f https://www.paddlepaddle.org.cn/whl/windows/mkl/avx/stable.html - ``` - - - Linux: - - ```shell - python -m pip install paddlepaddle-gpu==2.6.1.post117 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html - ``` - - > If you installed cuda 10.2,please install cuDNN 7.6.5 instead of cuDNN v8.x - - > If you installed cuda 11.2, please install cuDNN 8.1.1. However, RTX 30xx might be incompatible with cuda 11.2 - - -#### 3. Running the program - -- Run GUI version +##### (2) DirectML (For AMD, Intel, and other GPU/APU users) +- Suitable for Windows devices with AMD/NVIDIA/Intel GPUs. +- Install ONNX Runtime DirectML version: + ```shell + pip install paddlepaddle==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/ + pip install -r requirements.txt + pip install -r requirements_directml.txt + ``` + +##### (3) ONNX (For macOS, AMD ROCm, and other GPU acceleration environments, not tested!) + +- If using this method, DO NOT REPORT ISSUES. +- Suitable for Linux or macOS devices with AMD/Metal GPUs/Apple Silicon GPUs. +- Install ONNX Runtime DirectML version: + ```shell + pip install paddlepaddle==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/ + pip install -r requirements.txt + + # Read documentation https://onnxruntime.ai/docs/execution-providers/ + # Choose the appropriate execution backend based on your device, modify the dependencies in requirements_directml.txt accordingly. + + # Example: + # requirements_coreml.txt + # paddle2onnx==1.3.1 + # onnxruntime-gpu==1.20.1 + # onnxruntime-coreml==1.13.1 + + pip install -r requirements_coreml.txt + ``` + +##### (4) CPU Only (For systems without GPU or those not wanting to use GPU acceleration) + +- Suitable for systems without GPU or those that do not wish to use GPU. +- Install the CPU version of PaddlePaddle: + ```shell + pip install paddlepaddle==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/ + pip install -r requirements.txt + ``` + +#### 5. Run the Program + +- Run the graphical user interface version (GUI): ```shell python gui.py ``` -- Run CLI version - -```shell +- Run the command-line interface version (CLI): +```shell python ./backend/main.py ``` diff --git a/backend/config.py b/backend/config.py index dfdb2d06..678c89e7 100644 --- a/backend/config.py +++ b/backend/config.py @@ -70,7 +70,7 @@ # ×××××××××××××××××××× [不要改]判断是否使用GPU start ×××××××××××××××××××× -# 是否使用GPU +# 是否使用GPU(Nvidia) USE_GPU = False # 如果paddlepaddle编译了gpu的版本 if paddle.is_compiled_with_cuda(): @@ -78,7 +78,37 @@ if len(paddle.static.cuda_places()) > 0: # 如果有GPU则使用GPU USE_GPU = True -# ×××××××××××××××××××× [不要改]判断是否使用GPU start ×××××××××××××××××××× + +# 是否使用ONNX(DirectML/AMD/Intel) +ONNX_PROVIDERS = [] +if USE_GPU == False: + try: + import onnxruntime as ort + available_providers = ort.get_available_providers() + for provider in available_providers: + if provider in [ + "CPUExecutionProvider" + ]: + continue + if provider not in [ + "DmlExecutionProvider", # DirectML,适用于 Windows GPU + "ROCMExecutionProvider", # AMD ROCm + "MIGraphXExecutionProvider", # AMD MIGraphX + "VitisAIExecutionProvider", # AMD VitisAI,适用于 RyzenAI & Windows, 实测和DirectML性能似乎差不多 + "OpenVINOExecutionProvider", # Intel GPU + "MetalExecutionProvider", # Apple macOS + "CoreMLExecutionProvider", # Apple macOS + "CUDAExecutionProvider", # Nvidia GPU + ]: + print(interface_config['Main']['OnnxExectionProviderNotSupportedSkipped'].format(provider)) + continue + print(interface_config['Main']['OnnxExecutionProviderDetected'].format(provider)) + ONNX_PROVIDERS.append(provider) + except ModuleNotFoundError as e: + print(interface_config['Main']['OnnxRuntimeNotInstall']) +if len(ONNX_PROVIDERS) > 0: + USE_GPU = True +# ×××××××××××××××××××× [不要改]判断是否使用GPU end ×××××××××××××××××××× # ×××××××××××××××××××× [不要改]读取语言、模型路径、字典路径 start ×××××××××××××××××××× diff --git a/backend/interface/ch.ini b/backend/interface/ch.ini index 7ad16484..c128daa3 100644 --- a/backend/interface/ch.ini +++ b/backend/interface/ch.ini @@ -137,3 +137,6 @@ DeleteNoSubArea = 红色框区域外的字幕是否去除? 输入 "y" 或 "回 FinishDeleteNoSubArea = 去除完毕 SubLocation = 字幕文件生成位置: InputVideo = 请输入视频完整路径: +OnnxExectionProviderNotSupportedSkipped = ONNX 执行提供程序: {} 不支持,已跳过。 +OnnxExecutionProviderDetected=检测到 ONNX 执行提供程序: {} +OnnxRuntimeNotInstall = ONNX 运行环境未安装,已跳过。 diff --git a/backend/interface/chinese_cht.ini b/backend/interface/chinese_cht.ini index cc64f9a0..9dbb8741 100644 --- a/backend/interface/chinese_cht.ini +++ b/backend/interface/chinese_cht.ini @@ -137,3 +137,6 @@ DeleteNoSubArea = 紅色框區域外的字幕是否去除? 輸入 "y" 或 "回 FinishDeleteNoSubArea = 去除完畢 SubLocation = 字幕文件生成位置: InputVideo = 請輸入視頻完整路徑: +OnnxExectionProviderNotSupportedSkipped = ONNX 執行提供程序: {} 不支援,已跳過。 +OnnxExecutionProviderDetected = 檢測到 ONNX 執行提供程序: {} +OnnxRuntimeNotInstall = ONNX 執行環境未安裝,已跳過。 diff --git a/backend/interface/en.ini b/backend/interface/en.ini index 52fa70cb..df5e7a08 100644 --- a/backend/interface/en.ini +++ b/backend/interface/en.ini @@ -137,3 +137,6 @@ DeleteNoSubArea = Are the subtitles outside the red box area removed? Input "y" FinishDeleteNoSubArea = Removed SubLocation = Subtitle file generated at: InputVideo = Please enter the full path of the video: +OnnxExectionProviderNotSupportedSkipped = ONNX Execution Provider: {} is not supported, skipped. +OnnxExecutionProviderDetected=Detected ONNX execution provider: {} +OnnxRuntimeNotInstall = ONNX runtime environment not installed, skipped. diff --git a/backend/interface/es.ini b/backend/interface/es.ini index ef846ea5..9c2690ed 100644 --- a/backend/interface/es.ini +++ b/backend/interface/es.ini @@ -137,3 +137,6 @@ DeleteNoSubArea = ¿Se remueven los subtítulos fuera del área del cuadro rojo? FinishDeleteNoSubArea = Eliminado SubLocation = Archivo de subtítulos generado en: InputVideo = Por favor, ingrese la ruta completa del video: +OnnxExectionProviderNotSupportedSkipped = Proveedor de ejecución de ONNX: {} no es compatible, ya se omitió. +OnnxExecutionProviderDetected = Proveedor de ejecución de ONNX detectado: {} +OnnxRuntimeNotInstall = Entorno de ejecución de ONNX no instalado, omitido. diff --git a/backend/interface/japan.ini b/backend/interface/japan.ini index 8dde5c1e..c9c04333 100644 --- a/backend/interface/japan.ini +++ b/backend/interface/japan.ini @@ -137,3 +137,6 @@ DeleteNoSubArea = 赤い枠以外の領域の字幕を削除しますか? "y" FinishDeleteNoSubArea = 削除済み SubLocation = 生成された字幕ファイルの場所: InputVideo = ビデオのフルパスを入力してください: +OnnxExectionProviderNotSupportedSkipped = ONNX 実行プロバイダー: {} はサポートされていないため、スキップされました。 +OnnxExecutionProviderDetected = ONNX 実行プロバイダー検出: {} +OnnxRuntimeNotInstall = ONNX 実行環境がインストールされていません、スキップされました。 diff --git a/backend/interface/ko.ini b/backend/interface/ko.ini index 2d0f9171..201358d9 100644 --- a/backend/interface/ko.ini +++ b/backend/interface/ko.ini @@ -137,3 +137,6 @@ DeleteNoSubArea = 빨간색 상자 영역 밖의 자막이 제거되었나요? FinishDeleteNoSubArea = 제거 완료 SubLocation = 자막 파일이 생성된 위치: InputVideo = 비디오의 전체 경로를 입력하세요: +OnnxExectionProviderNotSupportedSkipped = ONNX 실행 제공자: {} 지원되지 않음, 이미 건너뛰었습니다. +OnnxExecutionProviderDetected = ONNX 실행 제공자 감지됨: {} +OnnxRuntimeNotInstall = ONNX 실행 환경이 설치되지 않음, 건너뛰었습니다. diff --git a/backend/interface/vi.ini b/backend/interface/vi.ini index a34a059f..6f674767 100644 --- a/backend/interface/vi.ini +++ b/backend/interface/vi.ini @@ -137,3 +137,6 @@ DeleteNoSubArea = Có xóa phụ đề ngoài khu vực hộp đỏ? Nhập "y" FinishDeleteNoSubArea = Đã xóa SubLocation = Tệp phụ đề được tạo tại: InputVideo = Xin nhập đường dẫn đầy đủ của video: +OnnxExectionProviderNotSupportedSkipped = Nhà cung cấp thực thi ONNX: {} không được hỗ trợ, đã bỏ qua. +OnnxExecutionProviderDetected = Đã phát hiện nhà cung cấp thực thi ONNX: {} +OnnxRuntimeNotInstall = Môi trường thực thi ONNX chưa được cài đặt, đã bỏ qua. diff --git a/backend/main.py b/backend/main.py index 4723e592..68bba152 100644 --- a/backend/main.py +++ b/backend/main.py @@ -431,9 +431,12 @@ def vsf_output(out, ): left_end = self.sub_area[2] / self.frame_width # re:图像右半部分所占百分比,取值【0-1】 right_end = self.sub_area[3] / self.frame_width - cpu_count = max(int(multiprocessing.cpu_count() * 2 / 3), 1) - if cpu_count < 4: - cpu_count = max(multiprocessing.cpu_count() - 1, 1) + if config.USE_GPU and len(config.ONNX_PROVIDERS) > 0: + cpu_count = multiprocessing.cpu_count() + else: + cpu_count = max(int(multiprocessing.cpu_count() * 2 / 3), 1) + if cpu_count < 4: + cpu_count = max(multiprocessing.cpu_count() - 1, 1) if platform.system() == 'Windows': # 定义执行命令 cmd = f"{path_vsf} --use_cuda -c -r -i \"{self.video_path}\" -o \"{self.temp_output_dir}\" -ces \"{self.vsf_subtitle}\" " diff --git a/backend/tools/__init__.py b/backend/tools/__init__.py index d56c9dba..e69de29b 100644 --- a/backend/tools/__init__.py +++ b/backend/tools/__init__.py @@ -1,14 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/backend/tools/ocr.py b/backend/tools/ocr.py index 5b23562c..0925d637 100644 --- a/backend/tools/ocr.py +++ b/backend/tools/ocr.py @@ -1,3 +1,4 @@ +import os import config import importlib from paddleocr import PaddleOCR @@ -85,12 +86,12 @@ def init_model(self): gpu_mem=500, det_algorithm='DB', # 设置文本检测模型路径 - det_model_dir=config.DET_MODEL_PATH, + det_model_dir=self.convertToOnnxModelIfNeeded(config.DET_MODEL_PATH), rec_algorithm='CRNN', # 设置每张图文本框批处理数量 rec_batch_num=config.REC_BATCH_NUM, # 设置文本识别模型路径 - rec_model_dir=config.REC_MODEL_PATH, + rec_model_dir=self.convertToOnnxModelIfNeeded(config.REC_MODEL_PATH), max_batch_size=config.MAX_BATCH_SIZE, det=True, use_angle_cls=False, @@ -98,7 +99,55 @@ def init_model(self): lang=config.REC_CHAR_TYPE, ocr_version=f'PP-OCR{config.MODEL_VERSION.lower()}', rec_image_shape=config.REC_IMAGE_SHAPE, + use_onnx=len(config.ONNX_PROVIDERS) > 0, + onnx_providers=config.ONNX_PROVIDERS, debug=False, show_log=False) + + + def convertToOnnxModelIfNeeded(self, model_dir, model_filename="inference.pdmodel", params_filename="inference.pdiparams", opset_version=14): + """Converts a Paddle model to ONNX if ONNX providers are available and the model does not already exist.""" + + if not config.ONNX_PROVIDERS: + return model_dir + + onnx_model_path = os.path.join(model_dir, "model.onnx") + + if os.path.exists(onnx_model_path): + print(f"ONNX model already exists: {onnx_model_path}. Skipping conversion.") + return onnx_model_path + + print(f"Converting Paddle model {model_dir} to ONNX...") + model_file = os.path.join(model_dir, model_filename) + params_file = os.path.join(model_dir, params_filename) if params_filename else "" + + try: + import paddle2onnx + # Ensure the target directory exists + os.makedirs(os.path.dirname(onnx_model_path), exist_ok=True) + + # Convert and save the model + onnx_model = paddle2onnx.export( + model_filename=model_file, + params_filename=params_file, + save_file=onnx_model_path, + opset_version=opset_version, + auto_upgrade_opset=True, + verbose=True, + enable_onnx_checker=True, + enable_experimental_op=True, + enable_optimize=True, + custom_op_info={}, + deploy_backend="onnxruntime", + calibration_file="calibration.cache", + external_file=os.path.join(model_dir, "external_data"), + export_fp16_model=False, + ) + + print(f"Conversion successful. ONNX model saved to: {onnx_model_path}") + return onnx_model_path + except Exception as e: + print(f"Error during conversion: {e}") + return model_dir def get_coordinates(dt_box): diff --git a/gui.py b/gui.py index b9b9dfc8..b88ada52 100644 --- a/gui.py +++ b/gui.py @@ -90,7 +90,7 @@ def run(self): # 创建布局 self._create_layout() # 创建窗口 - self.window = sg.Window(title=self.interface_config['SubtitleExtractorGUI']['Title'], layout=self.layout, + self.window = sg.Window(title=self.interface_config['SubtitleExtractorGUI']['Title'] + " v" + backend.main.config.VERSION, layout=self.layout, icon=self.icon) while True: # 循环读取事件 @@ -135,7 +135,7 @@ def run(self): def update_interface_text(self): self._load_config() - self.window.set_title(self.interface_config['SubtitleExtractorGUI']['Title']) + self.window.set_title(self.interface_config['SubtitleExtractorGUI']['Title'] + " v" + backend.main.config.VERSION) self.window['-FILE_BTN-'].Update(self.interface_config['SubtitleExtractorGUI']['Open']) self.window['-FRAME1-'].Update(self.interface_config['SubtitleExtractorGUI']['Vertical']) self.window['-FRAME2-'].Update(self.interface_config['SubtitleExtractorGUI']['Horizontal']) diff --git a/requirements.txt b/requirements.txt index c579e1c0..c5fb4f71 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,9 @@ wordsegment==1.3.1 scikit-image==0.25.1 lmdb==1.5.1 pyclipper==1.3.0.post5 -PySimpleGUI==4.70.1 +PySimpleGUI-4-foss==4.60.4.1 numpy==1.26.4 shapely==2.0.7 -six==1.17.0 \ No newline at end of file +six==1.17.0 +setuptools==75.8.0 +paddleocr==2.9.1 \ No newline at end of file diff --git a/requirements_directml.txt b/requirements_directml.txt new file mode 100644 index 00000000..1151d12e --- /dev/null +++ b/requirements_directml.txt @@ -0,0 +1,3 @@ +paddle2onnx==1.3.1 +onnxruntime-gpu==1.20.1 +onnxruntime-directml==1.20.1 \ No newline at end of file From 2cc76d87bda39981695dead23243857dd6e8b1f5 Mon Sep 17 00:00:00 2001 From: jason Date: Sat, 1 Mar 2025 21:27:18 +0800 Subject: [PATCH 4/5] =?UTF-8?q?=E4=BF=AE=E5=A4=8DQPT=E4=BC=9A=E5=9B=A0?= =?UTF-8?q?=E4=B8=BA=E4=BE=9D=E8=B5=96=E7=AD=89=E5=85=B3=E7=B3=BB=E5=B0=86?= =?UTF-8?q?CPU=E7=89=88=E6=9C=AC=E7=9A=84Paddle=E6=89=93=E5=8C=85=E5=85=A5?= =?UTF-8?q?=E5=86=85=EF=BC=8C=E9=80=A0=E6=88=90CPU+GPU=E7=89=88=E6=9C=AC?= =?UTF-8?q?=E5=90=8C=E6=97=B6=E8=A2=AB=E5=AE=89=E8=A3=85=EF=BC=8C=E6=8F=90?= =?UTF-8?q?=E7=A4=BA=E8=BF=90=E8=A1=8C=E5=9C=A8CPU=E6=A8=A1=E5=BC=8F?= =?UTF-8?q?=E4=B8=8B=EF=BC=8C=20=E5=AE=9E=E9=99=85=E4=B8=8A=E5=8F=AF?= =?UTF-8?q?=E4=BB=A5=E7=94=A8=E4=B8=8AGPU=20=E7=BB=99=E7=94=A8=E6=88=B7?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=A3=80=E6=B5=8BGPU=E5=8F=AF=E7=94=A8?= =?UTF-8?q?=E6=80=A7=E6=8F=90=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build-windows-cuda-10.2.yml | 4 +++- .github/workflows/build-windows-cuda-11.8.yml | 4 +++- .github/workflows/build-windows-cuda-12.3.yml | 4 +++- backend/config.py | 2 +- backend/interface/ch.ini | 1 + backend/interface/chinese_cht.ini | 1 + backend/interface/en.ini | 1 + backend/interface/es.ini | 1 + backend/interface/japan.ini | 1 + backend/interface/ko.ini | 1 + backend/interface/vi.ini | 1 + gui.py | 1 + 12 files changed, 18 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-windows-cuda-10.2.yml b/.github/workflows/build-windows-cuda-10.2.yml index d0329153..184437a1 100644 --- a/.github/workflows/build-windows-cuda-10.2.yml +++ b/.github/workflows/build-windows-cuda-10.2.yml @@ -73,7 +73,9 @@ jobs: pip download -d ./vse_out/Debug/opt/packages/ paddlepaddle-gpu==2.5.2.post102 -f https://www.paddlepaddle.org.cn/whl/windows/mkl/avx/stable.html && \ pip download -d ./vse_out/Debug/opt/packages/ -r requirements.txt && \ cp -rfv ./vse_out/Debug/opt/packages/* ./vse_out/Release/opt/packages/ && \ - rm -fv ./vse_out/*/opt/packages/numpy-2* + rm -fv ./vse_out/*/opt/packages/numpy-2* && \ + rm -fv ./vse_out/*/opt/packages/paddlepaddle-* && \ + rm -fv ./vse_out/*/opt/PaddlePaddlePackage/001-* env: QPT_Action: "True" CUDA_PATH_V10_2: "${{steps.cuda-toolkit.outputs.CUDA_PATH}}" diff --git a/.github/workflows/build-windows-cuda-11.8.yml b/.github/workflows/build-windows-cuda-11.8.yml index 22c2ff69..8476edfa 100644 --- a/.github/workflows/build-windows-cuda-11.8.yml +++ b/.github/workflows/build-windows-cuda-11.8.yml @@ -62,7 +62,9 @@ jobs: pip download -d ./vse_out/Debug/opt/packages/ paddlepaddle-gpu==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ && \ pip download -d ./vse_out/Debug/opt/packages/ -r requirements.txt && \ cp -rfv ./vse_out/Debug/opt/packages/* ./vse_out/Release/opt/packages/ && \ - rm -fv ./vse_out/*/opt/packages/numpy-2* + rm -fv ./vse_out/*/opt/packages/numpy-2* && \ + rm -fv ./vse_out/*/opt/packages/paddlepaddle-* && \ + rm -fv ./vse_out/*/opt/PaddlePaddlePackage/001-* env: QPT_Action: "True" shell: bash diff --git a/.github/workflows/build-windows-cuda-12.3.yml b/.github/workflows/build-windows-cuda-12.3.yml index 0f091a23..97b62bcb 100644 --- a/.github/workflows/build-windows-cuda-12.3.yml +++ b/.github/workflows/build-windows-cuda-12.3.yml @@ -62,7 +62,9 @@ jobs: pip download -d ./vse_out/Debug/opt/packages/ paddlepaddle-gpu==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cu123/ && \ pip download -d ./vse_out/Debug/opt/packages/ -r requirements.txt && \ cp -rfv ./vse_out/Debug/opt/packages/* ./vse_out/Release/opt/packages/ && \ - rm -fv ./vse_out/*/opt/packages/numpy-2* + rm -fv ./vse_out/*/opt/packages/numpy-2* && \ + rm -fv ./vse_out/*/opt/packages/paddlepaddle-* && \ + rm -fv ./vse_out/*/opt/PaddlePaddlePackage/001-* env: QPT_Action: "True" shell: bash diff --git a/backend/config.py b/backend/config.py index 678c89e7..2c3c3593 100644 --- a/backend/config.py +++ b/backend/config.py @@ -17,7 +17,7 @@ from tools.constant import * # 项目版本号 -VERSION = "2.0.1" +VERSION = "2.0.2" # 项目的base目录 BASE_DIR = str(Path(os.path.abspath(__file__)).parent) diff --git a/backend/interface/ch.ini b/backend/interface/ch.ini index c128daa3..8ab12f6d 100644 --- a/backend/interface/ch.ini +++ b/backend/interface/ch.ini @@ -111,6 +111,7 @@ SubtitleArea = 字幕区域 RecSubLang = 识别字幕语言 RecMode = 识别模式 IllegalPathWarning = 【警告】程序运行中断!路径不合法!请不要将程序放入带有空格和中文的路径下!!!请修改程序路径名后重新运行程序 +GPUWarning = 【重要提示】请勿通过任务管理器、GPU低占用率或"PaddlePaddle works on..."等信息提示判断程序是否调用显卡资源。由于现代显卡的运算效率极高,通常可在每秒内处理上千张图像任务,出现低占用率属正常现象。如需验证硬件调用状态,请通过对比CPU版本与GPU版本的执行耗时差异进行判断,若两者运行时间相近再进行反馈。 GPUSpeedUp = 使用GPU进行加速 FrameCount = 帧数 FrameRate = 帧率 diff --git a/backend/interface/chinese_cht.ini b/backend/interface/chinese_cht.ini index 9dbb8741..614fa49e 100644 --- a/backend/interface/chinese_cht.ini +++ b/backend/interface/chinese_cht.ini @@ -111,6 +111,7 @@ SubtitleArea = 字幕區域 RecSubLang = 識別字幕語言 RecMode = 識別模式 IllegalPathWarning = 【警告】程序運行中斷!路徑不合法!請不要將程序放入帶有空格和中文的路徑下!!!請修改程序路徑名後重新運行程序 +GPUWarning = 【重要提示】請勿透過工作管理員、GPU低使用率或「PaddlePaddle works on...」等訊息提示判斷程式是否調用顯示卡資源。由於現代顯示卡的運算效率極高,通常可在每秒內處理上千張影像任務,低使用率屬正常現象。如需驗證硬體調用狀態,請透過對比CPU版本與GPU版本的執行耗時差異進行判斷,若兩者運行時間相近再進行回饋。 GPUSpeedUp = 使用GPU進行加速 FrameCount = 幀數 FrameRate = 幀率 diff --git a/backend/interface/en.ini b/backend/interface/en.ini index df5e7a08..6d13e006 100644 --- a/backend/interface/en.ini +++ b/backend/interface/en.ini @@ -111,6 +111,7 @@ SubtitleArea = Subtitle Area RecSubLang = Subtitle Language RecMode = Mode IllegalPathWarning = [Warning] The program is interrupted! The path is illegal! Please do not put the program in a path with spaces and Chinese! ! ! Please modify the program path name and re-run the program +GPUWarning = [Important Notice] Do not rely on Task Manager, low GPU utilization, or messages like "PaddlePaddle works on..." to determine whether the program utilizes GPU resources. Modern GPUs are designed for extreme computational efficiency, typically capable of processing thousands of image tasks per second, making low utilization rates a normal occurrence. To verify hardware resource allocation, please compare execution time differences between the CPU and GPU versions. Submit feedback only if the runtime durations are comparable. GPUSpeedUp = Use GPU for acceleration FrameCount = Frame Count FrameRate = Frame Rate diff --git a/backend/interface/es.ini b/backend/interface/es.ini index 9c2690ed..783b373e 100644 --- a/backend/interface/es.ini +++ b/backend/interface/es.ini @@ -111,6 +111,7 @@ SubtitleArea = Área de subtítulos RecSubLang = Idioma de subtítulos RecMode = Modo IllegalPathWarning = [Advertencia] ¡El programa se interrumpió! ¡La ruta es ilegal! ¡No coloque el programa en una ruta con espacios y caracteres chinos! Por favor, modifique el nombre de ruta del programa y vuelva a ejecutarlo. +GPUWarning = [Aviso importante] No utilice el Administrador de tareas, la baja utilización de GPU ni mensajes como "PaddlePaddle works on..." para evaluar el uso de recursos gráficos. Las GPU modernas tienen una eficiencia computacional excepcional, procesando miles de imágenes por segundo, por lo que una baja tasa de uso es normal. Para verificar el estado de asignación de hardware, compare las diferencias de tiempo de ejecución entre las versiones de CPU y GPU. Proporcione comentarios únicamente si los tiempos de ejecución son similares. GPUSpeedUp = Use GPU para aceleración FrameCount = Conteo de fotogramas FrameRate = Velocidad de fotogramas diff --git a/backend/interface/japan.ini b/backend/interface/japan.ini index c9c04333..eec41023 100644 --- a/backend/interface/japan.ini +++ b/backend/interface/japan.ini @@ -111,6 +111,7 @@ SubtitleArea = サブタイトル領域 RecSubLang = サブタイトル言語 RecMode = モード IllegalPathWarning = 【注意】プログラムは中断されました! パスが不正です! プログラムをスペースや中国語が含まれるパスに置かないでください!!! プログラムのパス名を修正してプログラムを再実行してください +GPUWarning = 【重要なお知らせ】タスクマネージャー、GPU低使用率、または「PaddlePaddle works on...」といったメッセージでGPUリソースの使用状況を判断しないでください。最新GPUは極めて高い演算効率を備え、通常1秒間に数千の画像タスクを処理可能なため、低使用率は正常です。ハードウェアリソースの使用状態を確認する場合は、CPU版とGPU版の実行時間差を比較してください。両者の処理時間が近似する場合に限り、フィードバックを提出願います。 GPUSpeedUp = GPUを使用して加速します FrameCount = フレーム数 FrameRate = フレームレート diff --git a/backend/interface/ko.ini b/backend/interface/ko.ini index 201358d9..32f981e2 100644 --- a/backend/interface/ko.ini +++ b/backend/interface/ko.ini @@ -111,6 +111,7 @@ SubtitleArea = 자막 영역 RecSubLang = 자막 언어 인식 RecMode = 인식 모드 IllegalPathWarning = [경고] 프로그램이 중단되었습니다! 경로가 올바르지 않습니다! 공백과 한국어가 포함된 경로에 프로그램을 넣지 마세요! 경로 이름을 변경하고 프로그램을 다시 실행해주세요 +GPUWarning = [중요 공지] 작업 관리자, GPU 저사용률 또는 "PaddlePaddle works on..." 메시지로 GPU 리소스 사용 여부를 판단하지 마십시오. 최신 GPU는 초당 수천 장의 이미지 작업을 처리할 수 있는 극한의 연산 효율성을 지니며, 낮은 사용률은 정상 현상입니다. 하드웨어 할당 상태를 확인하려면 CPU 버전과 GPU 버전의 실행 시간 차이를 비교해 주시기 바랍니다. 두 버전의 소요 시간이 유사한 경우에만 피드백을 제출해 주십시오. GPUSpeedUp = 가속을 위해 GPU 사용 FrameCount = 프레임 수 FrameRate = 프레임 속도 diff --git a/backend/interface/vi.ini b/backend/interface/vi.ini index 6f674767..e3808a6b 100644 --- a/backend/interface/vi.ini +++ b/backend/interface/vi.ini @@ -111,6 +111,7 @@ SubtitleArea = Khu vực phụ đề RecSubLang = Ngôn ngữ phụ đề RecMode = Chế độ IllegalPathWarning = [Cảnh báo] Chương trình bị gián đoạn! Đường dẫn không hợp lệ! Xin đừng để chương trình trong đường dẫn có dấu cách và tiếng Trung! ! ! Xin sửa tên đường dẫn chương trình và chạy lại chương trình +GPUWarning = [Thông báo quan trọng] Không sử dụng Task Manager, tỷ lệ sử dụng GPU thấp hoặc thông báo như "PaddlePaddle works on..." để đánh giá việc dùng tài nguyên GPU. Card đồ họa hiện đại có hiệu suất xử lý cực cao, xử lý được hàng ngàn ảnh mỗi giây nên tỷ lệ sử dụng thấp là bình thường. Để kiểm tra trạng thái phân bổ phần cứng, hãy so sánh thời gian chạy giữa phiên bản CPU và GPU. Chỉ phản hồi khi thời gian xử lý của hai phiên bản gần như tương đương. GPUSpeedUp = Sử dụng GPU để tăng tốc FrameCount = Số khung hình FrameRate = Tốc độ khung hình diff --git a/gui.py b/gui.py index b88ada52..bf68e0f1 100644 --- a/gui.py +++ b/gui.py @@ -87,6 +87,7 @@ def __init__(self): self.se = None def run(self): + print(self.interface_config['Main']['GPUWarning']) # 创建布局 self._create_layout() # 创建窗口 From ec01d222710285d0647bf4e2e414bf3405d8e7bb Mon Sep 17 00:00:00 2001 From: jason Date: Thu, 6 Mar 2025 18:49:48 +0800 Subject: [PATCH 5/5] =?UTF-8?q?=E4=BF=AE=E5=A4=8DONNX=E4=B8=8D=E5=B7=A5?= =?UTF-8?q?=E4=BD=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/config.py | 4 ++-- requirements.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/config.py b/backend/config.py index 2c3c3593..304de86a 100644 --- a/backend/config.py +++ b/backend/config.py @@ -17,7 +17,7 @@ from tools.constant import * # 项目版本号 -VERSION = "2.0.2" +VERSION = "2.0.3" # 项目的base目录 BASE_DIR = str(Path(os.path.abspath(__file__)).parent) @@ -94,7 +94,7 @@ "DmlExecutionProvider", # DirectML,适用于 Windows GPU "ROCMExecutionProvider", # AMD ROCm "MIGraphXExecutionProvider", # AMD MIGraphX - "VitisAIExecutionProvider", # AMD VitisAI,适用于 RyzenAI & Windows, 实测和DirectML性能似乎差不多 + # "VitisAIExecutionProvider", # AMD VitisAI,适用于 RyzenAI & Windows "OpenVINOExecutionProvider", # Intel GPU "MetalExecutionProvider", # Apple macOS "CoreMLExecutionProvider", # Apple macOS diff --git a/requirements.txt b/requirements.txt index c5fb4f71..6eaa2cef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,4 @@ numpy==1.26.4 shapely==2.0.7 six==1.17.0 setuptools==75.8.0 -paddleocr==2.9.1 \ No newline at end of file +je-paddleocr==2.9.1 \ No newline at end of file