oobabooga
diff --git a/‎.github/workflows/build-wheels-release-rocm62.yml
+217 b/‎.github/workflows/build-wheels-release-rocm62.yml
+217
diff --git a/‎examples/multimodal_grounding_qwen.py
+73-7 b/‎examples/multimodal_grounding_qwen.py
+73-7
@@ -0,0 +1,217 @@
+name: Build Wheels & Release ROCm62
+
+on:
+  workflow_dispatch:
+    inputs:
+      release:
+        description: 'Release? 1 = yes, 0 = no'
+        default: '0'
+        required: true
+        type: string
+
+permissions:
+  contents: write
+
+jobs:
+  build_wheels:
+    name: ${{ matrix.os }} P${{ matrix.pyver }} C${{ matrix.cuda }} R${{ matrix.rocm }} T${{ matrix.torch }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        include:
+
+        # Ubuntu 20.04 CUDA
+
+        # ROCm 6.2
+         - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.10', cuda: '',       rocm: '6.2', torch: '2.5.0', cudaarch: ''                                    }
+         - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.11', cuda: '',       rocm: '6.2', torch: '2.5.0', cudaarch: ''                                    }
+         - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.12', cuda: '',       rocm: '6.2', torch: '2.5.0', cudaarch: ''                                    }
+
+        # ROCm 6.2.4
+         - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.10', cuda: '',       rocm: '6.2.4', torch: '2.6.0', cudaarch: ''                                    }
+         - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.11', cuda: '',       rocm: '6.2.4', torch: '2.6.0', cudaarch: ''                                    }
+         - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.12', cuda: '',       rocm: '6.2.4', torch: '2.6.0', cudaarch: ''                                    }
+         - { artname: 'wheel', os: ubuntu-20.04-l, pyver: '3.13', cuda: '',       rocm: '6.2.4', torch: '2.6.0', cudaarch: ''                                    }
+
+      fail-fast: false
+
+    defaults:
+      run:
+        shell: pwsh
+
+    steps:
+      # Free disk space
+
+      - name: Free Disk Space
+        uses: jlumbroso/free-disk-space@v1.3.1
+        if: runner.os == 'Linux'
+        with:
+          tool-cache: true
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: false
+          swap-storage: true
+
+      # Setup Python
+
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5.4.0
+        with:
+          python-version: ${{ matrix.pyver }}
+
+      # Get version string from package
+
+      - name: Get version string
+        id: package_version
+        run: | 
+          $versionString = Get-Content $(Join-Path 'exllamav2' 'version.py') -raw
+          if ($versionString -match '__version__ = "(\d+\.(?:\d+\.?(?:dev\d+)?)*)"') 
+          {
+            Write-Output $('::notice file=build-wheels-release.yml,line=200,title=Package Version::Detected package version is: {0}' -f $Matches[1])
+            Write-Output "PACKAGE_VERSION=$($Matches[1])" >> "$env:GITHUB_OUTPUT"
+          }
+          else
+          {
+            Write-Output '::error file=build-wheels-release.yml,line=203::Could not parse version from exllamav2/version.py! You must upload wheels manually!'
+            Write-Output "PACKAGE_VERSION=None" >> "$env:GITHUB_OUTPUT"
+          }
+
+      # Pin VS build tools to 17.9 so builds won't fail
+
+      - name: Install VS2022 BuildTools 17.9.7
+        run: choco install -y visualstudio2022buildtools --version=117.9.7.0 --params "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 --installChannelUri https://aka.ms/vs/17/release/180911598_-255012421/channel"
+        if: runner.os == 'Windows'
+
+      # Install ROCm SDK, apparently needs to happen before setting up Python
+
+      - name: Build for ROCm
+        if: matrix.rocm != ''
+        shell: bash
+        run: |
+          # --- Install ROCm SDK
+
+          export ROCM_VERSION=${{ matrix.rocm }}
+          export TORCH_VERSION=${{ matrix.torch }}
+
+          [ ! -d /etc/apt/keyrings ] && sudo mkdir --parents --mode=0755 /etc/apt/keyrings
+          wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
+          echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION focal main" | sudo tee --append /etc/apt/sources.list.d/rocm.list
+          echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
+          
+          sudo apt update
+          sudo apt install rocm-hip-sdk -y
+          sudo apt clean -y
+
+          echo "/opt/rocm/bin" >> $GITHUB_PATH
+          echo "ROCM_PATH=/opt/rocm" >> $GITHUB_ENV
+          echo "ROCM_VERSION=$ROCM_VERSION" >> $GITHUB_ENV
+          echo "USE_ROCM=1" >> $GITHUB_ENV
+
+          # --- Install dependencies
+
+          python3 -m ensurepip --upgrade
+          pip3 install torch==${{ matrix.torch }} --index-url="https://download.pytorch.org/whl/rocm$ROCM_VERSION"
+          pip3 install --upgrade setuptools==69.5.1 build wheel safetensors sentencepiece ninja
+          pip3 cache purge
+
+          # --- Build wheel 
+
+          python3 -m build -n --wheel -C--build-option=egg_info "-C--build-option=--tag-build=+rocm${{ matrix.rocm }}-torch${{ matrix.torch }}"
+
+      # Build for CUDA
+
+      - name: Setup Mamba
+        if: matrix.cuda != ''
+        uses: conda-incubator/setup-miniconda@v3.1.0
+        with:
+          activate-environment: "exllama"
+          python-version: ${{ matrix.pyver }}
+#          miniforge-variant: Mambaforge
+          miniforge-version: latest
+#          use-mamba: true
+          add-pip-as-python-dependency: true
+          auto-activate-base: false
+
+      - name: Build for CUDA
+        if: matrix.cuda != ''
+        run: |
+          # --- Spawn the VS shell
+          if ($IsWindows) {
+            Import-Module 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
+            Enter-VsDevShell -VsInstallPath 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools' -DevCmdArguments '-arch=x64 -host_arch=x64'
+            $env:DISTUTILS_USE_SDK=1
+          }
+  
+          # --- Install CUDA using Conda
+          $cudaVersion = '${{ matrix.cuda }}'
+          $cudaVersionPytorch = '${{ matrix.cuda }}'.Remove('${{ matrix.cuda }}'.LastIndexOf('.')).Replace('.','')
+
+          $env:MAMBA_NO_LOW_SPEED_LIMIT = 1
+          mamba install -y -c nvidia/label/cuda-$cudaVersion cuda-toolkit cuda-runtime
+
+          if (!(mamba list cuda)[-1].contains('cuda')) {sleep -s 10; mamba install -y 'cuda' $cudaVersion}
+          if (!(mamba list cuda)[-1].contains('cuda')) {throw 'CUDA Toolkit failed to install!'}
+
+          $env:CUDA_PATH = $env:CONDA_PREFIX
+          $env:CUDA_HOME = $env:CONDA_PREFIX
+          if ($IsLinux) {$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH}
+          
+          # --- Install dependencies
+          
+          python -m ensurepip --upgrade
+          python -m pip install torch==${{ matrix.torch }} --index-url https://download.pytorch.org/whl/cu$cudaVersionPytorch
+          python -m pip install --upgrade setuptools==69.5.1 build wheel safetensors sentencepiece ninja
+
+          # --- Build wheel
+                  
+          $BUILDTAG = "+cu$cudaVersionPytorch-torch${{ matrix.torch }}"
+          $env:TORCH_CUDA_ARCH_LIST = '${{ matrix.cudaarch }}'
+          python -m build -n --wheel -C--build-option=egg_info "-C--build-option=--tag-build=$BUILDTAG"
+
+      # Build sdist
+
+      - name: Build sdist
+        if: matrix.cuda == '' && matrix.rocm == ''
+        run: |
+          # --- Spawn the VS shell
+          if ($IsWindows) {
+            Import-Module 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
+            Enter-VsDevShell -VsInstallPath 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools' -DevCmdArguments '-arch=x64 -host_arch=x64'
+            $env:DISTUTILS_USE_SDK=1
+          }
+
+          # --- Install dependencies
+          
+          python -m pip install torch==${{ matrix.torch }} --index-url https://download.pytorch.org/whl/cpu
+          python -m pip install build wheel ninja
+
+          # --- Build wheel
+           
+          $env:EXLLAMA_NOCOMPILE=1
+          python -m build -n
+
+      # Upload files
+
+      - uses: actions/upload-artifact@v4
+        if: matrix.artname == 'wheel'
+        with:
+          name: wheel-${{ matrix.os }}-py${{ matrix.pyver }}-cuda${{ matrix.cuda }}-torch${{ matrix.torch }}
+          path: ./dist/*
+
+      - uses: actions/upload-artifact@v4
+        if: matrix.artname == 'sdist'
+        with:
+          name: 'sdist'
+          path: ./dist/*
+
+      - name: Upload files to GitHub release
+        if: steps.package_version.outputs.PACKAGE_VERSION != 'None' && inputs.release == '1'
+        uses: svenstaro/upload-release-action@2.6.1
+        with:
+          file: ./dist/*.whl
+          tag: ${{ format('v{0}', steps.package_version.outputs.PACKAGE_VERSION) }}
+          file_glob: true
+          overwrite: true
+          release_name: ${{ steps.package_version.outputs.PACKAGE_VERSION }}
@@ -51,7 +51,7 @@ class Model:
     current_image: Image or None = None
     current_description: str
 
-    def __init__(self, model_directory):
+    def __init__(self, model_directory, bbox_mode: str):
         self.model_directory = model_directory
         self.config = None
         self.vision_model = None
@@ -61,17 +61,22 @@ def __init__(self, model_directory):
         self.current_image = None
         self.current_emb = None
         self.current_description = ""
+        bbox_funcs = {
+            "qwen2": self.get_grounding_bb_qwen2,
+            "qwen25": self.get_grounding_bb_qwen25,
+        }
+        self.bbox_func = bbox_funcs[bbox_mode]
 
     def load(self):
         """Load and initialize the things"""
         self.config = ExLlamaV2Config(self.model_directory)
-        self.config.max_seq_len = 16384
+        self.config.max_seq_len = 8192
 
         self.vision_model = ExLlamaV2VisionTower(self.config)
         self.vision_model.load(progress = True)
 
         self.model = ExLlamaV2(self.config)
-        self.cache = ExLlamaV2Cache(self.model, lazy = True, max_seq_len = 16384)
+        self.cache = ExLlamaV2Cache(self.model, lazy = True, max_seq_len = 32768)
         self.model.load_autosplit(self.cache, progress = True)
         self.tokenizer = ExLlamaV2Tokenizer(self.config)
 
@@ -148,14 +153,21 @@ def inference(self, settext_fn, update_fn):
                 lastupdate = time.time()
                 settext_fn(text)
                 update_fn()
+#
+#         text = \
+# """And you may find yourself living in a shotgun shack
+# And you may find yourself in another part of the world
+# And you may find yourself behind the wheel of a large automobile
+# And you may find yourself in a beautiful house, with a beautiful wife
+# And you may ask yourself, "Well, how did I get here?\""""
 
         settext_fn(text)
         update_fn()
         self.current_description = text
         print("Image description from model:")
         print(text)
 
-    def get_grounding_bb(self, start, end) -> tuple:
+    def get_grounding_bb_qwen2(self, start, end) -> tuple:
         """
         Prompt the model again and try to extraxt the bounding box of the image details indicated by selected portion
         of the description. We do this by repeating the exact same prompt up to and including the selected text, but
@@ -209,6 +221,55 @@ def get_grounding_bb(self, start, end) -> tuple:
 
         return a, b
 
+    def get_grounding_bb_qwen25(self, start, end) -> tuple:
+        """
+        Qwen2.5 works the same way, except the coordinates are no longer normalized and the format is:
+        "(x0,y0,x1,y1)"
+        """
+
+        if start >= end:
+            return None, None
+
+        # Including leading space
+        if start > 0 and self.current_description[start - 1] == " ":
+            start -= 1
+
+        # Repeat the same prompt up to the selection, with grounding tokens added
+        prompt = self.get_prompt()
+        prompt += self.current_description[:start]
+        prompt += "<|object_ref_start|>"
+        prompt += self.current_description[start:end]
+        prompt += "<|object_ref_end|><|box_start|>("
+
+        bb_string, res = self.generator.generate(
+            prompt = prompt,
+            add_bos = True,
+            max_new_tokens = 28,
+            stop_conditions = [self.tokenizer.single_id("<|box_end|>")],
+            gen_settings = ExLlamaV2Sampler.Settings.greedy(),
+            embeddings = [self.current_emb],
+            completion_only = True,
+            return_last_results = True,  # debug purposes
+        )
+        bb_string = "(" + bb_string
+
+        print(f"Generation: {bb_string}")
+        pprint.pprint(res, indent = 4)
+
+        # BB string is in the format "(x0,y0,x1,y1)" with integer coordinates
+
+        s = self.current_image.size
+        try:
+            d = tuple(map(int, bb_string.strip("()").split(",")))
+            a = (d[0] / s[0], d[1] / s[1])
+            b = (d[2] / s[0], d[3] / s[1])
+        except:
+            print("No bounding box could be determined")
+            a, b = None, None
+
+        return a, b
+
+
 
 class GroundingDemo(QMainWindow):
 
@@ -472,7 +533,7 @@ def on_selection_made(self, pos):
 
         print(f"Selected span: {start}, {end}")
         print(f"Selected text: {repr(self.model.current_description[start:end])}")
-        a, b = self.model.get_grounding_bb(start, end)
+        a, b = self.model.bbox_func(start, end)
         self.image_label.set_bounding_box(a, b)
 
 
@@ -481,9 +542,14 @@ def on_selection_made(self, pos):
 #   https://huggingface.co/turboderp/Qwen2-VL-7B-Instruct-exl2
 
 def main():
-    model_dir = "/mnt/str/models/qwen2-vl-7b-instruct-exl2/6.0bpw"
+
+    # model_dir = "/mnt/str/models/qwen2-vl-7b-instruct-exl2/6.0bpw"
+    # bbox_mode = "qwen25"
+    model_dir = "/mnt/str/models/qwen2.5-vl-7b-instruct-exl2/6.0bpw"
+    bbox_mode = "qwen25"
+
     app = QApplication(sys.argv)
-    model = Model(model_dir)
+    model = Model(model_dir, bbox_mode)
     model.load()
     window = GroundingDemo(model, model_dir)
     window.show()