From 7b2c119061b860ccc021c29ab66aeaa97decff6e Mon Sep 17 00:00:00 2001
From: laiwen <80147768+laiwenzh@users.noreply.github.com>
Date: Sun, 30 Jun 2024 18:05:57 +0800
Subject: [PATCH] solve security issue; helper: bugfix, cpu platform check
 (#23)

- check if the cpu can run bf16 gemm
- fix lost tokens bug
- solve github security issue: use protobuf==3.18.3
---
 README.md                                     |  2 +-
 README_CN.md                                  |  2 +-
 conan/conanfile.txt                           |  2 +-
 conan/conanfile_arm.txt                       |  2 +-
 .../python/0_basic/basic_example_qwen_v20.py  |  4 +-
 .../model_config/config_chatglm4_9b.json      |  2 +-
 examples/python/requirements.txt              |  1 +
 python/dashinfer/helper/helper.py             | 41 ++++++++++++++++++-
 python/setup.py                               |  2 +-
 scripts/docker/dev_arm_alinux.Dockerfile      |  2 +-
 scripts/docker/dev_arm_centos8.Dockerfile     |  2 +-
 scripts/docker/dev_x86_centos7.Dockerfile     |  2 +-
 scripts/docker/dev_x86_ubuntu.Dockerfile      |  2 +-
 13 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index a318fccfd..4354dd179 100644
--- a/README.md
+++ b/README.md
@@ -180,7 +180,7 @@ This subsection lists the third-party dependencies for the different stages of D
 
 ## Model Inference Phase
 
-- [protobuf](https://protobuf.dev/)(3.18): For parsing model files.
+- [protobuf](https://protobuf.dev/)(3.18.3): For parsing model files.
 - [pybind11](https://github.com/pybind/pybind11)(2.8): For binding python interfaces.
 - [onednn](https://github.com/oneapi-src/oneDNN), [mkl](https://www.intel.com/content/www/us/en/docs/onemkl/get-started-guide/2023-0/overview.html): BLAS libraries, for accelerating GEMM calculations.
 - [openmp](https://www.openmp.org/): A standard parallel programming library.
diff --git a/README_CN.md b/README_CN.md
index 8acaa1f0c..b389efc64 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -181,7 +181,7 @@ $$ x_{u8} = x_{fp32} / scale + zeropoint $$
 
 ## 模型推理阶段
 
-- [protobuf](https://protobuf.dev/)(3.18): For parsing model files.
+- [protobuf](https://protobuf.dev/)(3.18.3): For parsing model files.
 - [pybind11](https://github.com/pybind/pybind11)(2.8): For binding python interfaces.
 - [onednn](https://github.com/oneapi-src/oneDNN), [mkl](https://www.intel.com/content/www/us/en/docs/onemkl/get-started-guide/2023-0/overview.html): BLAS libraries, for accelerating GEMM calculations.
 - [openmp](https://www.openmp.org/): A standard parallel programming library.
diff --git a/conan/conanfile.txt b/conan/conanfile.txt
index 331687b20..a140b54e6 100644
--- a/conan/conanfile.txt
+++ b/conan/conanfile.txt
@@ -1,5 +1,5 @@
 [requires]
-  protobuf/3.18.1
+  protobuf/3.18.3
   gtest/1.11.0
   glog/0.5.0
   pybind11/2.8.1
diff --git a/conan/conanfile_arm.txt b/conan/conanfile_arm.txt
index fd9047669..b79071f9f 100644
--- a/conan/conanfile_arm.txt
+++ b/conan/conanfile_arm.txt
@@ -1,5 +1,5 @@
 [requires]
-  protobuf/3.18.1
+  protobuf/3.18.3
   gtest/1.11.0
   glog/0.5.0
   libunwind/1.7.2
diff --git a/examples/python/0_basic/basic_example_qwen_v20.py b/examples/python/0_basic/basic_example_qwen_v20.py
index e86080dcc..f055a883c 100644
--- a/examples/python/0_basic/basic_example_qwen_v20.py
+++ b/examples/python/0_basic/basic_example_qwen_v20.py
@@ -139,7 +139,7 @@ def done_callback(future):
     ## download model from huggingface
     # original_model = {
     #     "source": "huggingface",
-    #     "model_id": "Qwen/Qwen2-1.5B",
+    #     "model_id": "Qwen/Qwen2-1.5B-Instruct",
     #     "revision": "",
     #     "model_path": ""
     # }
@@ -147,7 +147,7 @@ def done_callback(future):
     ## download model from modelscope
     original_model = {
         "source": "modelscope",
-        "model_id": "qwen/Qwen2-1.5B",
+        "model_id": "qwen/Qwen2-1.5B-Instruct",
         "revision": "master",
         "model_path": ""
     }
diff --git a/examples/python/model_config/config_chatglm4_9b.json b/examples/python/model_config/config_chatglm4_9b.json
index dade3d9a0..f4b08db99 100644
--- a/examples/python/model_config/config_chatglm4_9b.json
+++ b/examples/python/model_config/config_chatglm4_9b.json
@@ -25,7 +25,7 @@
         "min_length": 0,
         "max_length": 2048,
         "no_repeat_ngram_size": 0,
-        "eos_token_id": 2,
+        "eos_token_id": 151329,
         "seed": 1234,
         "stop_words_ids": [
             [
diff --git a/examples/python/requirements.txt b/examples/python/requirements.txt
index 34dcd13c3..b16eadce4 100644
--- a/examples/python/requirements.txt
+++ b/examples/python/requirements.txt
@@ -4,6 +4,7 @@ huggingface_hub
 modelscope
 gradio
 tabulate
+py-cpuinfo
 
 # model requirements
 sentencepiece
diff --git a/python/dashinfer/helper/helper.py b/python/dashinfer/helper/helper.py
index 1e6dd11ab..571874fbb 100644
--- a/python/dashinfer/helper/helper.py
+++ b/python/dashinfer/helper/helper.py
@@ -242,6 +242,33 @@ def check_model_exist(self):
             return False
         return True
 
+    def _caniuse_bf16_gemm(self):
+        import cpuinfo
+        import platform
+        def get_architecture():
+            arch = platform.machine()
+            if arch.lower().startswith('arm') or arch.lower().startswith('aarch'):
+                return "ARM"
+            elif arch.lower().startswith('x86') or arch.lower().startswith('i686') or arch.lower().startswith('i386') or arch.lower().startswith('amd64'):
+                return "x86"
+            else:
+                return "Unknown"
+
+        def mayiuse_instruction_set(istrset):
+            info = cpuinfo.get_cpu_info()
+            flags = info.get('flags', [])
+            rt = istrset in flags
+            if rt == False:
+                raise ValueError(f"[Error] Current CPU does not support instruction set: {istrset}\n")
+
+        arch = get_architecture()
+        if arch == "ARM":
+            mayiuse_instruction_set("sve")
+        elif arch == "x86":
+            mayiuse_instruction_set("avx512_bf16")
+        else:
+            raise Exception("[Error] Unknown CPU platform\n")
+
     def init_engine(self):
 
         def get_physical_cores_per_numa_node():
@@ -274,7 +301,7 @@ def get_physical_cores_per_numa_node():
         begin = time.time()
 
         if self.check_model_exist() == False:
-            exit(-1)
+            sys.exit(-1)
 
         as_model_config = allspark.AsModelConfig(
             model_name=self.model_name,
@@ -300,6 +327,16 @@ def get_physical_cores_per_numa_node():
         else:
             as_model_config.num_threads = get_physical_cores_per_numa_node()
 
+        if self.engine_config["matmul_precision"] != "highest":
+            try:
+                self._caniuse_bf16_gemm()
+            except ValueError as e:
+                print(f"{str(e)} You need to set the `matmul_precision` field in the config_xxx.json file to `highest`")
+                exit(-1)
+            except Exception as e:
+                print(f"{str(e)}")
+                exit(-1)
+
         as_model_config.matmul_precision = self.engine_config[
             "matmul_precision"]
 
@@ -504,7 +541,7 @@ def process_one_request_impl(self, request, stream_mode=False):
                         request.out_text = ""
 
                     if (len(new_ids) > 0):
-                        output_ids.append(new_ids[0])
+                        output_ids.extend(new_ids)
 
                     request.out_tokens = output_ids
                     request.out_tokens_len = len(output_ids)
diff --git a/python/setup.py b/python/setup.py
index 3b1e58816..bceafcedb 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -167,7 +167,7 @@ def os_script_exec(cmd: str):
       ext_modules=[CMakeExtension("_allspark")],
       cmdclass={"build_ext": CMakeBuild},
       setup_requires=["jinja2"],
-      install_requires=["protobuf==3.18"],
+      install_requires=["protobuf==3.18.3"],
       zip_safe=False,
       python_requires=">=3.8",
       extra_compile_args=["-O3"])
diff --git a/scripts/docker/dev_arm_alinux.Dockerfile b/scripts/docker/dev_arm_alinux.Dockerfile
index 160ef623d..ab15cc865 100644
--- a/scripts/docker/dev_arm_alinux.Dockerfile
+++ b/scripts/docker/dev_arm_alinux.Dockerfile
@@ -67,6 +67,6 @@ RUN conda install -y pybind11
 RUN pip3 install --upgrade pip && pip3 install -U setuptools
 
 # engine requirements
-RUN pip3 install torch transformers==4.38.0 protobuf==3.18.0 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate
+RUN pip3 install torch transformers==4.38.0 protobuf==3.18.3 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate
 
 WORKDIR /root/
diff --git a/scripts/docker/dev_arm_centos8.Dockerfile b/scripts/docker/dev_arm_centos8.Dockerfile
index 35e253de6..0039e5176 100644
--- a/scripts/docker/dev_arm_centos8.Dockerfile
+++ b/scripts/docker/dev_arm_centos8.Dockerfile
@@ -57,6 +57,6 @@ RUN conda install -y pybind11
 RUN pip3 install --upgrade pip && pip3 install -U setuptools
 
 # engine requirements
-RUN pip3 install torch transformers==4.38.0 protobuf==3.18.0 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate
+RUN pip3 install torch transformers==4.38.0 protobuf==3.18.3 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate
 
 WORKDIR /root/
diff --git a/scripts/docker/dev_x86_centos7.Dockerfile b/scripts/docker/dev_x86_centos7.Dockerfile
index 981665581..6f2aad7de 100644
--- a/scripts/docker/dev_x86_centos7.Dockerfile
+++ b/scripts/docker/dev_x86_centos7.Dockerfile
@@ -57,6 +57,6 @@ RUN conda install -y pybind11
 
 # engine requirements
 RUN conda install -y pytorch-cpu -c pytorch
-RUN pip3 install transformers==4.38.0 protobuf==3.18.0 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate
+RUN pip3 install transformers==4.38.0 protobuf==3.18.3 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate
 
 WORKDIR /root/
diff --git a/scripts/docker/dev_x86_ubuntu.Dockerfile b/scripts/docker/dev_x86_ubuntu.Dockerfile
index 2d17df191..4f05a59d3 100644
--- a/scripts/docker/dev_x86_ubuntu.Dockerfile
+++ b/scripts/docker/dev_x86_ubuntu.Dockerfile
@@ -49,6 +49,6 @@ RUN pip3 install --upgrade pip && pip3 install -U setuptools
 
 # engine requirements
 RUN conda install -y pytorch-cpu -c pytorch
-RUN pip3 install transformers==4.38.0 protobuf==3.18.0 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate
+RUN pip3 install transformers==4.38.0 protobuf==3.18.3 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate
 
 WORKDIR /root/