Skip to content

Commit

Permalink
verify flow (#484)
Browse files Browse the repository at this point in the history
* Clean up and link up llm_inputs

Clean up and link up llm_inputs

* Connect metrics module and silence transformer import warning

* Rename test_llm_profile.py to test_metrics.py

* Rename again to prefix files with llm

* Remove ./ from constant path

* Add comment for silencing and handle extra args

* Handle pass through arguments

* Parse extra args correctly and update testing

* Add logging and fix some error checking

* extra commit from rebase

* Remove unused import
  • Loading branch information
debermudez committed Mar 12, 2024
1 parent bd07262 commit fcfc6e0
Show file tree
Hide file tree
Showing 9 changed files with 134 additions and 44 deletions.
4 changes: 4 additions & 0 deletions src/c++/perf_analyzer/genai-pa/genai_pa/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@

LOGGER_NAME: str = "genai-pa"

DEFAULT_HTTP_URL = "localhost:8000"
DEFAULT_GRPC_URL = "localhost:8001"


OPEN_ORCA = "openorca"
CNN_DAILY_MAIL = "cnn_dailymail"
DEFAULT_INPUT_DATA_JSON = "llm_inputs.json"
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class LlmInputs:
A library of methods that control the generation of LLM Inputs
"""

OUTPUT_FILENAME = DEFAULT_INPUT_DATA_JSON

OPEN_ORCA_URL = "https://datasets-server.huggingface.co/rows?dataset=Open-Orca%2FOpenOrca&config=default&split=train"
CNN_DAILYMAIL_URL = "https://datasets-server.huggingface.co/rows?dataset=cnn_dailymail&config=1.0.0&split=train"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,19 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import contextlib
import io
from dataclasses import dataclass
from itertools import pairwise

import numpy as np
from genai_pa.utils import load_json
from transformers import AutoTokenizer

# Silence tokenizer warning on import
with contextlib.redirect_stdout(io.StringIO()) as stdout, contextlib.redirect_stderr(
io.StringIO()
) as stderr:
from transformers import AutoTokenizer


@dataclass
Expand Down
51 changes: 49 additions & 2 deletions src/c++/perf_analyzer/genai-pa/genai_pa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,71 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import contextlib
import io
import logging
import sys

from genai_pa import parser
from genai_pa.constants import LOGGER_NAME
from genai_pa.exceptions import GenAiPAException
from genai_pa.llm_inputs.llm_inputs import LlmInputs

# Silence tokenizer warning on import
with contextlib.redirect_stdout(io.StringIO()) as stdout, contextlib.redirect_stderr(
io.StringIO()
) as stderr:
from genai_pa.llm_metrics import LLMProfileData
from transformers import AutoTokenizer as tokenizer
from transformers import logging as token_logger

token_logger.set_verbosity_error()


logging.basicConfig(level=logging.INFO, format="%(name)s - %(levelname)s - %(message)s")
logger = logging.getLogger(LOGGER_NAME)


def generate_inputs(args):
LlmInputs.create_openai_llm_inputs(
args.dataset,
LlmInputs.DEFAULT_STARTING_INDEX,
LlmInputs.DEFAULT_LENGTH,
args.model,
args.streaming,
)


def calculate_metrics(file: str) -> LLMProfileData:
t = tokenizer.from_pretrained("gpt2")
return LLMProfileData(file, t)


def report_output(metrics: LLMProfileData, args):
if "concurrency_range" in args:
infer_mode = "concurrency"
load_level = args.concurrency_range
elif "request_rate_range" in args:
infer_mode = "request_rate"
load_level = args.request_rate_range
else:
raise GenAiPAException(
"Neither concurrency_range nor request_rate_range was found in args when reporting metrics"
)
# TODO: metrics reporter class that consumes Stats class for nicer formatting
print(metrics.get_statistics(infer_mode, int(load_level)))


# Separate function that can raise exceptions used for testing
# to assert correct errors and messages.
# Optional argv used for testing - will default to sys.argv if None.
def run(argv=None):
try:
args = parser.parse_args(argv)
args.func(args)
args, extra_args = parser.parse_args(argv)
generate_inputs(args)
args.func(args, extra_args)
metrics = calculate_metrics(args.profile_export_file)
report_output(metrics, args)
except Exception as e:
raise GenAiPAException(e)

Expand Down
73 changes: 42 additions & 31 deletions src/c++/perf_analyzer/genai-pa/genai_pa/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@
import logging
from pathlib import Path

from genai_pa.constants import LOGGER_NAME
from genai_pa.constants import CNN_DAILY_MAIL, DEFAULT_HTTP_URL, LOGGER_NAME, OPEN_ORCA

logger = logging.getLogger(LOGGER_NAME)


def prune_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
def _prune_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
"""
Prune the parsed arguments to remove args with None or False values.
"""
Expand All @@ -42,7 +42,7 @@ def prune_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
)


def update_load_manager_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
def _update_load_manager_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
"""
Update GenAI-PA load manager attributes to PA format
"""
Expand All @@ -57,17 +57,16 @@ def update_load_manager_args(args: argparse.ArgumentParser) -> argparse.Argument
### Handlers ###


# NOTE: Placeholder
def handler(args):
def handler(args, extra_args):
from genai_pa.wrapper import Profiler

Profiler.run(model=args.model, args=args)
Profiler.run(model=args.model, args=args, extra_args=extra_args)


### Parsers ###


def add_model_args(parser):
def _add_model_args(parser):
model_group = parser.add_argument_group("Model")

model_group.add_argument(
Expand All @@ -79,9 +78,9 @@ def add_model_args(parser):
)


def add_profile_args(parser):
def _add_profile_args(parser):
profile_group = parser.add_argument_group("Profiling")
load_management_group = profile_group.add_mutually_exclusive_group()
load_management_group = profile_group.add_mutually_exclusive_group(required=True)

profile_group.add_argument(
"-b",
Expand Down Expand Up @@ -152,34 +151,42 @@ def add_profile_args(parser):
)


def add_endpoint_args(parser):
def _add_endpoint_args(parser):
endpoint_group = parser.add_argument_group("Endpoint")

endpoint_group.add_argument(
"-i",
type=str.lower,
choices=["http", "grpc"],
default="http",
required=False,
help=f"Sets the protocol used to communicate with inference service",
)

endpoint_group.add_argument(
"-u",
"--url",
type=str,
default="localhost:8001",
default=DEFAULT_HTTP_URL,
required=False,
dest="u",
metavar="URL",
help="URL of the endpoint to target for benchmarking.",
)


def add_dataset_args(parser):
pass

def _add_dataset_args(parser):
dataset_group = parser.add_argument_group("Dataset")
# TODO: Do we want to remove dataset and tokenizer?
# dataset_group.add_argument(
# "--dataset",
# type=str,
# default="OpenOrca",
# choices=["OpenOrca", "cnn_dailymail"],
# required=False,
# help="HuggingFace dataset to use for the benchmark.",
# )

dataset_group.add_argument(
"--dataset",
type=str.lower,
default=OPEN_ORCA,
choices=[OPEN_ORCA, CNN_DAILY_MAIL],
required=False,
help="HuggingFace dataset to use for benchmarking.",
)

# dataset_group.add_argument(
# "--tokenizer",
# type=str,
Expand All @@ -202,14 +209,18 @@ def parse_args(argv=None):
parser.set_defaults(func=handler)

# Conceptually group args for easier visualization
add_model_args(parser)
add_profile_args(parser)
add_endpoint_args(parser)
add_dataset_args(parser)
_add_model_args(parser)
_add_profile_args(parser)
_add_endpoint_args(parser)
_add_dataset_args(parser)

args = parser.parse_args(argv)
args, extra_args = parser.parse_known_args(argv)
if extra_args:
# strip off the "--" demarking the pass through arguments
extra_args = extra_args[1:]
logger.info(f"Additional pass through args: {extra_args}")

args = update_load_manager_args(args)
args = prune_args(args)
args = _update_load_manager_args(args)
args = _prune_args(args)

return args
return args, extra_args
19 changes: 16 additions & 3 deletions src/c++/perf_analyzer/genai-pa/genai_pa/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@

class Profiler:
@staticmethod
def run(model, args=None):
skip_args = ["model", "func"]
def build_cmd(model, args, extra_args):
skip_args = ["model", "func", "dataset"]
if hasattr(args, "version"):
cmd = f"perf_analyzer --version"
else:
Expand All @@ -52,9 +52,22 @@ def run(model, args=None):
cmd += f"-b {value} "
else:
if len(arg) == 1:
cmd += f"-{arg} {value}"
cmd += f"-{arg} {value} "
else:
arg = utils.convert_option_name(arg)
cmd += f"--{arg} {value} "

if extra_args is not None:
for arg in extra_args:
cmd += f"{arg} "
# TODO: Once the OpenAI endpoint support is in place in PA core,
# update the input-data option arg
# cmd += f"--input-data {DEFAULT_INPUT_DATA_JSON} -p 10000 -s 99"
cmd += f"--input-data ./input_data.json -p 10000 -s 99"
return cmd

@staticmethod
def run(model, args=None, extra_args=None):
cmd = Profiler.build_cmd(model, args, extra_args)
logger.info(f"Running Perf Analyzer : '{cmd}'")
subprocess.run(cmd, shell=True, check=True)
3 changes: 2 additions & 1 deletion src/c++/perf_analyzer/genai-pa/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ requires-python = ">=3.8,<4"
dependencies = [
"numpy",
"pytest",
"rich"
"rich",
"transformers"
]

# CLI Entrypoint
Expand Down
15 changes: 10 additions & 5 deletions src/c++/perf_analyzer/genai-pa/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,11 @@ def test_help_arguments_output_and_exit(self, arg, expected_output, capsys):
[
(["-b", "2"], {"batch_size": 2}),
(["--batch-size", "2"], {"batch_size": 2}),
(["--concurrency", "3"], {"concurrency_range": "3"}),
(["--max-threads", "4"], {"max_threads": 4}),
(
["--profile-export-file", "text.txt"],
{"profile_export_file": Path("text.txt")},
),
(["--request-rate", "1.5"], {"request_rate_range": "1.5"}),
(["--service-kind", "triton"], {"service_kind": "triton"}),
(["--service-kind", "openai"], {"service_kind": "openai"}),
# TODO: Remove streaming from implementation. It is invalid with HTTP.
Expand All @@ -76,8 +74,8 @@ def test_help_arguments_output_and_exit(self, arg, expected_output, capsys):
],
)
def test_arguments_output(self, arg, expected_attributes, capsys):
combined_args = ["--model", "test_model"] + arg
args = parser.parse_args(combined_args)
combined_args = ["--model", "test_model", "--concurrency", "2"] + arg
args, _ = parser.parse_args(combined_args)

# Check that the attributes are set correctly
for key, value in expected_attributes.items():
Expand All @@ -96,4 +94,11 @@ def test_arguments_model_not_provided(self):

def test_exception_on_nonzero_exit(self):
with pytest.raises(GenAiPAException) as e:
run(["-m", "nonexistent_model"])
run(["-m", "nonexistent_model", "--concurrency", "3"])

def test_pass_through_args(self):
args = ["-m", "test_model", "--concurrency", "1"]
other_args = ["--", "With", "great", "power"]
_, pass_through_args = parser.parse_args(args + other_args)

assert pass_through_args == other_args[1:]
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

import numpy as np
import pytest
from genai_pa.llm_profile import LLMMetrics, LLMProfileData
from genai_pa.llm_metrics import LLMMetrics, LLMProfileData
from genai_pa.utils import remove_file
from transformers import AutoTokenizer

Expand Down

0 comments on commit fcfc6e0

Please sign in to comment.