Skip to content

Commit

Permalink
solve security issue; helper: bugfix, cpu platform check (#23)
Browse files Browse the repository at this point in the history
- check if the cpu can run bf16 gemm
- fix lost tokens bug
- solve github security issue: use protobuf==3.18.3
  • Loading branch information
laiwenzh authored Jun 30, 2024
1 parent e83d670 commit 7b2c119
Show file tree
Hide file tree
Showing 13 changed files with 52 additions and 14 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ This subsection lists the third-party dependencies for the different stages of D

## Model Inference Phase

- [protobuf](https://protobuf.dev/)(3.18): For parsing model files.
- [protobuf](https://protobuf.dev/)(3.18.3): For parsing model files.
- [pybind11](https://github.com/pybind/pybind11)(2.8): For binding python interfaces.
- [onednn](https://github.com/oneapi-src/oneDNN), [mkl](https://www.intel.com/content/www/us/en/docs/onemkl/get-started-guide/2023-0/overview.html): BLAS libraries, for accelerating GEMM calculations.
- [openmp](https://www.openmp.org/): A standard parallel programming library.
Expand Down
2 changes: 1 addition & 1 deletion README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ $$ x_{u8} = x_{fp32} / scale + zeropoint $$

## 模型推理阶段

- [protobuf](https://protobuf.dev/)(3.18): For parsing model files.
- [protobuf](https://protobuf.dev/)(3.18.3): For parsing model files.
- [pybind11](https://github.com/pybind/pybind11)(2.8): For binding python interfaces.
- [onednn](https://github.com/oneapi-src/oneDNN), [mkl](https://www.intel.com/content/www/us/en/docs/onemkl/get-started-guide/2023-0/overview.html): BLAS libraries, for accelerating GEMM calculations.
- [openmp](https://www.openmp.org/): A standard parallel programming library.
Expand Down
2 changes: 1 addition & 1 deletion conan/conanfile.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[requires]
protobuf/3.18.1
protobuf/3.18.3
gtest/1.11.0
glog/0.5.0
pybind11/2.8.1
Expand Down
2 changes: 1 addition & 1 deletion conan/conanfile_arm.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[requires]
protobuf/3.18.1
protobuf/3.18.3
gtest/1.11.0
glog/0.5.0
libunwind/1.7.2
Expand Down
4 changes: 2 additions & 2 deletions examples/python/0_basic/basic_example_qwen_v20.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,15 +139,15 @@ def done_callback(future):
## download model from huggingface
# original_model = {
# "source": "huggingface",
# "model_id": "Qwen/Qwen2-1.5B",
# "model_id": "Qwen/Qwen2-1.5B-Instruct",
# "revision": "",
# "model_path": ""
# }

## download model from modelscope
original_model = {
"source": "modelscope",
"model_id": "qwen/Qwen2-1.5B",
"model_id": "qwen/Qwen2-1.5B-Instruct",
"revision": "master",
"model_path": ""
}
Expand Down
2 changes: 1 addition & 1 deletion examples/python/model_config/config_chatglm4_9b.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"min_length": 0,
"max_length": 2048,
"no_repeat_ngram_size": 0,
"eos_token_id": 2,
"eos_token_id": 151329,
"seed": 1234,
"stop_words_ids": [
[
Expand Down
1 change: 1 addition & 0 deletions examples/python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ huggingface_hub
modelscope
gradio
tabulate
py-cpuinfo

# model requirements
sentencepiece
Expand Down
41 changes: 39 additions & 2 deletions python/dashinfer/helper/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,33 @@ def check_model_exist(self):
return False
return True

def _caniuse_bf16_gemm(self):
import cpuinfo
import platform
def get_architecture():
arch = platform.machine()
if arch.lower().startswith('arm') or arch.lower().startswith('aarch'):
return "ARM"
elif arch.lower().startswith('x86') or arch.lower().startswith('i686') or arch.lower().startswith('i386') or arch.lower().startswith('amd64'):
return "x86"
else:
return "Unknown"

def mayiuse_instruction_set(istrset):
info = cpuinfo.get_cpu_info()
flags = info.get('flags', [])
rt = istrset in flags
if rt == False:
raise ValueError(f"[Error] Current CPU does not support instruction set: {istrset}\n")

arch = get_architecture()
if arch == "ARM":
mayiuse_instruction_set("sve")
elif arch == "x86":
mayiuse_instruction_set("avx512_bf16")
else:
raise Exception("[Error] Unknown CPU platform\n")

def init_engine(self):

def get_physical_cores_per_numa_node():
Expand Down Expand Up @@ -274,7 +301,7 @@ def get_physical_cores_per_numa_node():
begin = time.time()

if self.check_model_exist() == False:
exit(-1)
sys.exit(-1)

as_model_config = allspark.AsModelConfig(
model_name=self.model_name,
Expand All @@ -300,6 +327,16 @@ def get_physical_cores_per_numa_node():
else:
as_model_config.num_threads = get_physical_cores_per_numa_node()

if self.engine_config["matmul_precision"] != "highest":
try:
self._caniuse_bf16_gemm()
except ValueError as e:
print(f"{str(e)} You need to set the `matmul_precision` field in the config_xxx.json file to `highest`")
exit(-1)
except Exception as e:
print(f"{str(e)}")
exit(-1)

as_model_config.matmul_precision = self.engine_config[
"matmul_precision"]

Expand Down Expand Up @@ -504,7 +541,7 @@ def process_one_request_impl(self, request, stream_mode=False):
request.out_text = ""

if (len(new_ids) > 0):
output_ids.append(new_ids[0])
output_ids.extend(new_ids)

request.out_tokens = output_ids
request.out_tokens_len = len(output_ids)
Expand Down
2 changes: 1 addition & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def os_script_exec(cmd: str):
ext_modules=[CMakeExtension("_allspark")],
cmdclass={"build_ext": CMakeBuild},
setup_requires=["jinja2"],
install_requires=["protobuf==3.18"],
install_requires=["protobuf==3.18.3"],
zip_safe=False,
python_requires=">=3.8",
extra_compile_args=["-O3"])
2 changes: 1 addition & 1 deletion scripts/docker/dev_arm_alinux.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,6 @@ RUN conda install -y pybind11
RUN pip3 install --upgrade pip && pip3 install -U setuptools

# engine requirements
RUN pip3 install torch transformers==4.38.0 protobuf==3.18.0 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate
RUN pip3 install torch transformers==4.38.0 protobuf==3.18.3 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate

WORKDIR /root/
2 changes: 1 addition & 1 deletion scripts/docker/dev_arm_centos8.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,6 @@ RUN conda install -y pybind11
RUN pip3 install --upgrade pip && pip3 install -U setuptools

# engine requirements
RUN pip3 install torch transformers==4.38.0 protobuf==3.18.0 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate
RUN pip3 install torch transformers==4.38.0 protobuf==3.18.3 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate

WORKDIR /root/
2 changes: 1 addition & 1 deletion scripts/docker/dev_x86_centos7.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,6 @@ RUN conda install -y pybind11

# engine requirements
RUN conda install -y pytorch-cpu -c pytorch
RUN pip3 install transformers==4.38.0 protobuf==3.18.0 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate
RUN pip3 install transformers==4.38.0 protobuf==3.18.3 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate

WORKDIR /root/
2 changes: 1 addition & 1 deletion scripts/docker/dev_x86_ubuntu.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,6 @@ RUN pip3 install --upgrade pip && pip3 install -U setuptools

# engine requirements
RUN conda install -y pytorch-cpu -c pytorch
RUN pip3 install transformers==4.38.0 protobuf==3.18.0 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate
RUN pip3 install transformers==4.38.0 protobuf==3.18.3 conan==1.60.0 pytest tokenizers scons wheel pandas tabulate

WORKDIR /root/

0 comments on commit 7b2c119

Please sign in to comment.