Skip to content

Commit

Permalink
Merge branch 'main' into fix-quantization-lib
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Sep 4, 2024
2 parents 1b66595 + 6dfec54 commit 459262b
Show file tree
Hide file tree
Showing 22 changed files with 95 additions and 50 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/test_api_rocm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ concurrency:

jobs:
run_api_rocm_tests:
runs-on: [self-hosted, amd-gpu, single-gpu, mi250]
runs-on: [self-hosted, amd-gpu, single-gpu]

container:
image: ghcr.io/huggingface/optimum-benchmark:latest-rocm
Expand All @@ -37,6 +37,7 @@ jobs:
--device /dev/kfd
--device /dev/dri
--env ROCR_VISIBLE_DEVICES
--env HIP_VISIBLE_DEVICES=0
--volume /mnt/cache/.cache/huggingface:/mnt/cache/

steps:
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ jobs:
--device /dev/kfd
--device /dev/dri
--env ROCR_VISIBLE_DEVICES
--env HIP_VISIBLE_DEVICES=0,1
--volume /mnt/cache/.cache/huggingface:/mnt/cache/

steps:
Expand All @@ -45,7 +46,7 @@ jobs:

- name: Install dependencies
run: |
pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq]
pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq] "deepspeed<0.15"
- name: Run tests
env:
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ jobs:
--device /dev/kfd
--device /dev/dri
--env ROCR_VISIBLE_DEVICES
--env HIP_VISIBLE_DEVICES=0
--volume /mnt/cache/.cache/huggingface:/mnt/cache/

steps:
Expand All @@ -52,4 +53,4 @@ jobs:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
PUSH_REPO_ID: optimum-benchmark/rocm
run: |
pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq or gptq)"
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,10 @@ test_cli_rocm_pytorch_multi_gpu:
test_cli_rocm_pytorch_single_gpu:
pytest -s -k "cli and rocm and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq)"

# llm-perf
test_cli_llama_cpp:
pytest -s -k "llama_cpp"

# llm-perf
install_llm_perf_cuda_pytorch:
pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq codecarbon
pip install -U transformers huggingface_hub[hf_transfer]
Expand Down
10 changes: 5 additions & 5 deletions docker/rocm/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

ARG ROCM_VERSION=5.7.1
ARG ROCM_VERSION=6.1.2
ARG UBUNTU_VERSION=22.04

FROM rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}

# Install necessary packages
ENV PATH="/opt/rocm/bin:${PATH}"
ENV DEBIAN_FRONTEND noninteractive
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
sudo build-essential git bash-completion \
python3.10 python3-pip python3.10-dev && \
Expand All @@ -29,13 +29,13 @@ RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-reco
cd /opt/rocm/share/amd_smi && pip install .

# Install PyTorch
ARG TORCH_ROCM=rocm5.7
ARG TORCH_ROCM=rocm6.1
ARG TORCH_VERSION=stable

RUN if [ "${TORCH_VERSION}" = "stable" ]; then \
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_ROCM} ; \
elif [ "${TORCH_VERSION}" = "nighly" ]; then \
pip install --no-cache-dir --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_ROCM} ; \
elif [ "${TORCH_VERSION}" = "nightly" ]; then \
pip install --no-cache-dir --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/${TORCH_ROCM} ; \
else \
pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_ROCM} ; \
fi
2 changes: 1 addition & 1 deletion docker/unroot/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG IMAGE
ARG IMAGE="optimum-benchmark:latest"

FROM $IMAGE

Expand Down
2 changes: 1 addition & 1 deletion optimum_benchmark/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .backends import (
BackendConfig,
IPEXConfig,
INCConfig,
IPEXConfig,
LlamaCppConfig,
LLMSwarmConfig,
ORTConfig,
Expand Down
2 changes: 1 addition & 1 deletion optimum_benchmark/backends/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .config import BackendConfig
from .ipex.config import IPEXConfig
from .llama_cpp.config import LlamaCppConfig
from .llm_swarm.config import LLMSwarmConfig
from .ipex.config import IPEXConfig
from .neural_compressor.config import INCConfig
from .onnxruntime.config import ORTConfig
from .openvino.config import OVConfig
Expand Down
3 changes: 2 additions & 1 deletion optimum_benchmark/backends/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,10 @@ def __post_init__(self):
if is_nvidia_system():
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = self.device_ids
LOGGER.info(f"CUDA_VISIBLE_DEVICES was set to {os.environ['CUDA_VISIBLE_DEVICES']}.")
elif is_rocm_system():
# https://rocm.docs.amd.com/en/latest/conceptual/gpu-isolation.html
os.environ["ROCR_VISIBLE_DEVICES"] = self.device_ids
LOGGER.info(f"ROCR_VISIBLE_DEVICES was set to {os.environ['ROCR_VISIBLE_DEVICES']}.")
else:
raise RuntimeError("CUDA device is only supported on systems with NVIDIA or ROCm drivers.")

Expand Down
4 changes: 0 additions & 4 deletions optimum_benchmark/backends/ipex/backend.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
import inspect
from collections import OrderedDict
from tempfile import TemporaryDirectory
from typing import Any, Dict

import torch
from hydra.utils import get_class

from ...generators.dataset_generator import DatasetGenerator
from ...import_utils import is_accelerate_available, is_torch_distributed_available
from ...task_utils import TEXT_GENERATION_TASKS
from ..base import Backend
from ..transformers_utils import fast_weights_init
from .config import IPEXConfig
Expand All @@ -33,7 +30,6 @@ def __init__(self, config: IPEXConfig) -> None:
else:
raise NotImplementedError(f"IPEXBackend does not support task {self.config.task}")


def load(self) -> None:
self.logger.info("\t+ Creating backend temporary directory")
self.tmpdir = TemporaryDirectory()
Expand Down
6 changes: 3 additions & 3 deletions optimum_benchmark/backends/ipex/config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from dataclasses import dataclass, field
from typing import Any, Dict, Optional
from dataclasses import dataclass
from typing import Optional

from ...import_utils import ipex_version
from ..config import BackendConfig

TORCH_DTYPES = ["bfloat16", "float16", "float32", "auto"]


@dataclass
class IPEXConfig(BackendConfig):
name: str = "ipex"
Expand Down Expand Up @@ -34,4 +35,3 @@ def __post_init__(self):

if self.torch_dtype is not None and self.torch_dtype not in TORCH_DTYPES:
raise ValueError(f"`torch_dtype` must be one of {TORCH_DTYPES}. Got {self.torch_dtype} instead.")

1 change: 0 additions & 1 deletion optimum_benchmark/backends/ipex/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,3 @@
"image-classification": "optimum.intel.IPEXModelForImageClassification",
"audio-classification": "optimum.intel.IPEXModelForAudioClassification",
}

2 changes: 1 addition & 1 deletion optimum_benchmark/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
Benchmark,
BenchmarkConfig,
EnergyStarConfig,
IPEXConfig,
INCConfig,
InferenceConfig,
InlineConfig,
IPEXConfig,
LlamaCppConfig,
LLMSwarmConfig,
ORTConfig,
Expand Down
3 changes: 3 additions & 0 deletions optimum_benchmark/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,14 +158,17 @@ def onnxruntime_version():
except importlib.metadata.PackageNotFoundError:
return None


def openvino_version():
if _openvino_available:
return importlib.metadata.version("openvino")


def ipex_version():
if _ipex_available:
return importlib.metadata.version("intel_extension_for_pytorch")


def neural_compressor_version():
if _neural_compressor_available:
return importlib.metadata.version("neural_compressor")
Expand Down
6 changes: 3 additions & 3 deletions optimum_benchmark/launchers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any])
@contextmanager
def device_isolation(self, pid: int, device_ids: Optional[str] = None):
if device_ids is None:
if is_nvidia_system():
device_ids = os.environ.get("CUDA_VISIBLE_DEVICES", None)
elif is_rocm_system():
if is_rocm_system():
device_ids = os.environ.get("ROCR_VISIBLE_DEVICES", None)
elif is_nvidia_system():
device_ids = os.environ.get("CUDA_VISIBLE_DEVICES", None)

self.device_isolation_process = Process(
target=assert_device_isolation,
Expand Down
25 changes: 14 additions & 11 deletions optimum_benchmark/system_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,9 @@ def get_gpu_vram_mb() -> List[int]:

def get_gpu_device_ids() -> str:
if is_nvidia_system():
if os.environ.get("CUDA_VISIBLE_DEVICES", None) is not None:
if os.environ.get("NVIDIA_VISIBLE_DEVICES", None) is not None:
device_ids = os.environ["NVIDIA_VISIBLE_DEVICES"]
elif os.environ.get("CUDA_VISIBLE_DEVICES", None) is not None:
device_ids = os.environ["CUDA_VISIBLE_DEVICES"]
else:
if not is_pynvml_available():
Expand All @@ -187,12 +189,12 @@ def get_gpu_device_ids() -> str:
device_ids = ",".join(str(i) for i in device_ids)
pynvml.nvmlShutdown()
elif is_rocm_system():
if os.environ.get("GPU_DEVICE_ORDINAL", None) is not None:
device_ids = os.environ["GPU_DEVICE_ORDINAL"]
if os.environ.get("ROCR_VISIBLE_DEVICES", None) is not None:
device_ids = os.environ["ROCR_VISIBLE_DEVICES"]
elif os.environ.get("HIP_VISIBLE_DEVICES", None) is not None:
device_ids = os.environ["HIP_VISIBLE_DEVICES"]
elif os.environ.get("ROCR_VISIBLE_DEVICES", None) is not None:
device_ids = os.environ["ROCR_VISIBLE_DEVICES"]
elif os.environ.get("CUDA_VISIBLE_DEVICES", None) is not None:
device_ids = os.environ["CUDA_VISIBLE_DEVICES"]
else:
if not is_amdsmi_available() or not is_pyrsmi_available():
raise ValueError(
Expand All @@ -201,17 +203,18 @@ def get_gpu_device_ids() -> str:
"or PyRSMI library from https://github.com/ROCm/pyrsmi."
)

if is_amdsmi_available():
if is_pyrsmi_available():
rocml.smi_initialize()
device_ids = list(range(rocml.smi_get_device_count()))
device_ids = ",".join(str(i) for i in device_ids)
rocml.smi_shutdown()

elif is_amdsmi_available():
amdsmi.amdsmi_init()
device_ids = list(range(len(amdsmi.amdsmi_get_processor_handles())))
device_ids = ",".join(str(i) for i in device_ids)
amdsmi.amdsmi_shut_down()

elif is_pyrsmi_available():
rocml.smi_initialize()
device_ids = list(range(rocml.smi_get_device_count()))
device_ids = ",".join(str(i) for i in device_ids)
rocml.smi_shutdown()
else:
raise ValueError("Couldn't infer GPU device ids.")

Expand Down
2 changes: 1 addition & 1 deletion optimum_benchmark/trackers/energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def aggregate(efficiencies: List["Efficiency"]) -> "Efficiency":
def from_energy(energy: "Energy", volume: int, unit: str) -> "Efficiency":
return Efficiency(value=volume / energy.total if energy.total > 0 else 0, unit=unit)

def log(self, prefix: str = "method"):
def log(self, prefix: str = ""):
LOGGER.info(f"\t\t+ {prefix} energy efficiency: {self.value:f} ({self.unit})")


Expand Down
2 changes: 1 addition & 1 deletion optimum_benchmark/trackers/latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def from_values(values: List[float], unit: str) -> "Latency":
values=values,
)

def log(self, prefix: str = "method"):
def log(self, prefix: str = ""):
stdev_percentage = 100 * self.stdev / self.mean if self.mean > 0 else 0
LOGGER.info(f"\t\t+ {prefix} latency:")
LOGGER.info(f"\t\t\t- count: {self.count}")
Expand Down
9 changes: 7 additions & 2 deletions optimum_benchmark/trackers/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def aggregate(memories: List["Memory"]) -> "Memory":
max_allocated=max_allocated,
)

def log(self, prefix: str = "forward"):
def log(self, prefix: str = ""):
LOGGER.info(f"\t\t+ {prefix} memory:")
if self.max_ram is not None:
LOGGER.info(f"\t\t\t- max RAM: {self.max_ram:f} ({self.unit})")
Expand Down Expand Up @@ -303,7 +303,12 @@ def monitor_gpu_vram_memory(monitored_pid: int, device_ids: List[int], connectio
device_handle = devices_handles[device_id]

try:
used_global_memory += rocml.smi_get_device_memory_used(device_id)
if is_amdsmi_available():
used_global_memory += amdsmi.amdsmi_get_gpu_memory_total(
device_handle, mem_type=amdsmi.AmdSmiMemoryType.VRAM
)
elif is_pyrsmi_available():
used_global_memory += rocml.smi_get_device_memory_used(device_id, type="VRAM")
except Exception as e:
LOGGER.warning(f"Could not get memory usage for device {device_id}: {e}")

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
"quality": ["ruff"],
"testing": ["pytest", "hydra-joblib-launcher"],
# optimum backends
"ipex":[f"optimum[ipex]>={MIN_OPTIMUM_VERSION}"],
"ipex": [f"optimum[ipex]>={MIN_OPTIMUM_VERSION}"],
"openvino": [f"optimum[openvino,nncf]>={MIN_OPTIMUM_VERSION}"],
"onnxruntime": [f"optimum[onnxruntime]>={MIN_OPTIMUM_VERSION}"],
"onnxruntime-gpu": [f"optimum[onnxruntime-gpu]>={MIN_OPTIMUM_VERSION}"],
Expand Down
Loading

0 comments on commit 459262b

Please sign in to comment.