From 7b2c119061b860ccc021c29ab66aeaa97decff6e Mon Sep 17 00:00:00 2001 From: laiwen <80147768+laiwenzh@users.noreply.github.com> Date: Sun, 30 Jun 2024 18:05:57 +0800 Subject: [PATCH] solve security issue; helper: bugfix, cpu platform check (#23) - check if the cpu can run bf16 gemm - fix lost tokens bug - solve github security issue: use protobuf==3.18.3 --- README.md | 2 +- README_CN.md | 2 +- conan/conanfile.txt | 2 +- conan/conanfile_arm.txt | 2 +- .../python/0_basic/basic_example_qwen_v20.py | 4 +- .../model_config/config_chatglm4_9b.json | 2 +- examples/python/requirements.txt | 1 + python/dashinfer/helper/helper.py | 41 ++++++++++++++++++- python/setup.py | 2 +- scripts/docker/dev_arm_alinux.Dockerfile | 2 +- scripts/docker/dev_arm_centos8.Dockerfile | 2 +- scripts/docker/dev_x86_centos7.Dockerfile | 2 +- scripts/docker/dev_x86_ubuntu.Dockerfile | 2 +- 13 files changed, 52 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index a318fccfd..4354dd179 100644 --- a/README.md +++ b/README.md @@ -180,7 +180,7 @@ This subsection lists the third-party dependencies for the different stages of D ## Model Inference Phase -- [protobuf](https://protobuf.dev/)(3.18): For parsing model files. +- [protobuf](https://protobuf.dev/)(3.18.3): For parsing model files. - [pybind11](https://github.com/pybind/pybind11)(2.8): For binding python interfaces. - [onednn](https://github.com/oneapi-src/oneDNN), [mkl](https://www.intel.com/content/www/us/en/docs/onemkl/get-started-guide/2023-0/overview.html): BLAS libraries, for accelerating GEMM calculations. - [openmp](https://www.openmp.org/): A standard parallel programming library. diff --git a/README_CN.md b/README_CN.md index 8acaa1f0c..b389efc64 100644 --- a/README_CN.md +++ b/README_CN.md @@ -181,7 +181,7 @@ $$ x_{u8} = x_{fp32} / scale + zeropoint $$ ## 模型推理阶段 -- [protobuf](https://protobuf.dev/)(3.18): For parsing model files. +- [protobuf](https://protobuf.dev/)(3.18.3): For parsing model files. - [pybind11](https://github.com/pybind/pybind11)(2.8): For binding python interfaces. - [onednn](https://github.com/oneapi-src/oneDNN), [mkl](https://www.intel.com/content/www/us/en/docs/onemkl/get-started-guide/2023-0/overview.html): BLAS libraries, for accelerating GEMM calculations. - [openmp](https://www.openmp.org/): A standard parallel programming library. diff --git a/conan/conanfile.txt b/conan/conanfile.txt index 331687b20..a140b54e6 100644 --- a/conan/conanfile.txt +++ b/conan/conanfile.txt @@ -1,5 +1,5 @@ [requires] - protobuf/3.18.1 + protobuf/3.18.3 gtest/1.11.0 glog/0.5.0 pybind11/2.8.1 diff --git a/conan/conanfile_arm.txt b/conan/conanfile_arm.txt index fd9047669..b79071f9f 100644 --- a/conan/conanfile_arm.txt +++ b/conan/conanfile_arm.txt @@ -1,5 +1,5 @@ [requires] - protobuf/3.18.1 + protobuf/3.18.3 gtest/1.11.0 glog/0.5.0 libunwind/1.7.2 diff --git a/examples/python/0_basic/basic_example_qwen_v20.py b/examples/python/0_basic/basic_example_qwen_v20.py index e86080dcc..f055a883c 100644 --- a/examples/python/0_basic/basic_example_qwen_v20.py +++ b/examples/python/0_basic/basic_example_qwen_v20.py @@ -139,7 +139,7 @@ def done_callback(future): ## download model from huggingface # original_model = { # "source": "huggingface", - # "model_id": "Qwen/Qwen2-1.5B", + # "model_id": "Qwen/Qwen2-1.5B-Instruct", # "revision": "", # "model_path": "" # } @@ -147,7 +147,7 @@ def done_callback(future): ## download model from modelscope original_model = { "source": "modelscope", - "model_id": "qwen/Qwen2-1.5B", + "model_id": "qwen/Qwen2-1.5B-Instruct", "revision": "master", "model_path": "" } diff --git a/examples/python/model_config/config_chatglm4_9b.json b/examples/python/model_config/config_chatglm4_9b.json index dade3d9a0..f4b08db99 100644 --- a/examples/python/model_config/config_chatglm4_9b.json +++ b/examples/python/model_config/config_chatglm4_9b.json @@ -25,7 +25,7 @@ "min_length": 0, "max_length": 2048, "no_repeat_ngram_size": 0, - "eos_token_id": 2, + "eos_token_id": 151329, "seed": 1234, "stop_words_ids": [ [ diff --git a/examples/python/requirements.txt b/examples/python/requirements.txt index 34dcd13c3..b16eadce4 100644 --- a/examples/python/requirements.txt +++ b/examples/python/requirements.txt @@ -4,6 +4,7 @@ huggingface_hub modelscope gradio tabulate +py-cpuinfo # model requirements sentencepiece diff --git a/python/dashinfer/helper/helper.py b/python/dashinfer/helper/helper.py index 1e6dd11ab..571874fbb 100644 --- a/python/dashinfer/helper/helper.py +++ b/python/dashinfer/helper/helper.py @@ -242,6 +242,33 @@ def check_model_exist(self): return False return True + def _caniuse_bf16_gemm(self): + import cpuinfo + import platform + def get_architecture(): + arch = platform.machine() + if arch.lower().startswith('arm') or arch.lower().startswith('aarch'): + return "ARM" + elif arch.lower().startswith('x86') or arch.lower().startswith('i686') or arch.lower().startswith('i386') or arch.lower().startswith('amd64'): + return "x86" + else: + return "Unknown" + + def mayiuse_instruction_set(istrset): + info = cpuinfo.get_cpu_info() + flags = info.get('flags', []) + rt = istrset in flags + if rt == False: + raise ValueError(f"[Error] Current CPU does not support instruction set: {istrset}\n") + + arch = get_architecture() + if arch == "ARM": + mayiuse_instruction_set("sve") + elif arch == "x86": + mayiuse_instruction_set("avx512_bf16") + else: + raise Exception("[Error] Unknown CPU platform\n") + def init_engine(self): def get_physical_cores_per_numa_node(): @@ -274,7 +301,7 @@ def get_physical_cores_per_numa_node(): begin = time.time() if self.check_model_exist() == False: - exit(-1) + sys.exit(-1) as_model_config = allspark.AsModelConfig( model_name=self.model_name, @@ -300,6 +327,16 @@ def get_physical_cores_per_numa_node(): else: as_model_config.num_threads = get_physical_cores_per_numa_node() + if self.engine_config["matmul_precision"] != "highest": + try: + self._caniuse_bf16_gemm() + except ValueError as e: + print(f"{str(e)} You need to set the `matmul_precision` field in the config_xxx.json file to `highest`") + exit(-1) + except Exception as e: + print(f"{str(e)}") + exit(-1) + as_model_config.matmul_precision = self.engine_config[ "matmul_precision"] @@ -504,7 +541,7 @@ def process_one_request_impl(self, request, stream_mode=False): request.out_text = "" if (len(new_ids) > 0): - output_ids.append(new_ids[0]) + output_ids.extend(new_ids) request.out_tokens = output_ids request.out_tokens_len = len(output_ids) diff --git a/python/setup.py b/python/setup.py index 3b1e58816..bceafcedb 100644 --- a/python/setup.py +++ b/python/setup.py @@ -167,7 +167,7 @@ def os_script_exec(cmd: str): ext_modules=[CMakeExtension("_allspark")], cmdclass={"build_ext": CMakeBuild}, setup_requires=["jinja2"], - install_requires=["protobuf==3.18"], + install_requires=["protobuf==3.18.3"], zip_safe=False, python_requires=">=3.8", extra_compile_args=["-O3"]) diff --git a/scripts/docker/dev_arm_alinux.Dockerfile b/scripts/docker/dev_arm_alinux.Dockerfile index 160ef623d..ab15cc865 100644 --- a/scripts/docker/dev_arm_alinux.Dockerfile +++ b/scripts/docker/dev_arm_alinux.Dockerfile @@ -67,6 +67,6 @@ RUN conda install -y pybind11 RUN pip3 install --upgrade pip && pip3 install -U setuptools # engine requirements -RUN pip3 install torch transformers==4.38.0 protobuf==3.18.0 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate +RUN pip3 install torch transformers==4.38.0 protobuf==3.18.3 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate WORKDIR /root/ diff --git a/scripts/docker/dev_arm_centos8.Dockerfile b/scripts/docker/dev_arm_centos8.Dockerfile index 35e253de6..0039e5176 100644 --- a/scripts/docker/dev_arm_centos8.Dockerfile +++ b/scripts/docker/dev_arm_centos8.Dockerfile @@ -57,6 +57,6 @@ RUN conda install -y pybind11 RUN pip3 install --upgrade pip && pip3 install -U setuptools # engine requirements -RUN pip3 install torch transformers==4.38.0 protobuf==3.18.0 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate +RUN pip3 install torch transformers==4.38.0 protobuf==3.18.3 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate WORKDIR /root/ diff --git a/scripts/docker/dev_x86_centos7.Dockerfile b/scripts/docker/dev_x86_centos7.Dockerfile index 981665581..6f2aad7de 100644 --- a/scripts/docker/dev_x86_centos7.Dockerfile +++ b/scripts/docker/dev_x86_centos7.Dockerfile @@ -57,6 +57,6 @@ RUN conda install -y pybind11 # engine requirements RUN conda install -y pytorch-cpu -c pytorch -RUN pip3 install transformers==4.38.0 protobuf==3.18.0 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate +RUN pip3 install transformers==4.38.0 protobuf==3.18.3 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate WORKDIR /root/ diff --git a/scripts/docker/dev_x86_ubuntu.Dockerfile b/scripts/docker/dev_x86_ubuntu.Dockerfile index 2d17df191..4f05a59d3 100644 --- a/scripts/docker/dev_x86_ubuntu.Dockerfile +++ b/scripts/docker/dev_x86_ubuntu.Dockerfile @@ -49,6 +49,6 @@ RUN pip3 install --upgrade pip && pip3 install -U setuptools # engine requirements RUN conda install -y pytorch-cpu -c pytorch -RUN pip3 install transformers==4.38.0 protobuf==3.18.0 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate +RUN pip3 install transformers==4.38.0 protobuf==3.18.3 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate WORKDIR /root/