diff --git a/ramalama/cli.py b/ramalama/cli.py index ca84a931..68f6dea3 100644 --- a/ramalama/cli.py +++ b/ramalama/cli.py @@ -838,8 +838,17 @@ def run_parser(subparsers): def run_cli(args): - model = New(args.MODEL, args) - model.run(args) + try: + model = New(args.MODEL, args) + model.run(args) + + except KeyError as e: + try: + args.quiet = True + model = OCI(args.MODEL, args.engine, ignore_stderr=True) + model.run(args) + except Exception: + raise e def serve_parser(subparsers): @@ -863,8 +872,17 @@ def serve_parser(subparsers): def serve_cli(args): if not args.container: args.detach = False - model = New(args.MODEL, args) - model.serve(args) + + try: + model = New(args.MODEL, args) + model.serve(args) + except KeyError as e: + try: + args.quiet = True + model = OCI(args.MODEL, args.engine, ignore_stderr=True) + model.serve(args) + except Exception: + raise e def stop_parser(subparsers): @@ -969,8 +987,8 @@ def _rm_model(models, args): raise e try: # attempt to remove as a container image - m = OCI(model, args.engine) - m.remove(args, ignore_stderr=True) + m = OCI(model, args.engine, ignore_stderr=True) + m.remove(args) return except Exception: pass diff --git a/ramalama/kube.py b/ramalama/kube.py index 2a727c55..377c16ef 100644 --- a/ramalama/kube.py +++ b/ramalama/kube.py @@ -5,7 +5,7 @@ class Kube: - def __init__(self, model, args, exec_args): + def __init__(self, model, image, args, exec_args): self.ai_image = model if hasattr(args, "MODEL"): self.ai_image = args.MODEL @@ -18,8 +18,7 @@ def __init__(self, model, args, exec_args): self.model = model.removeprefix("oci://") self.args = args self.exec_args = exec_args - - self.image = args.image + self.image = image def gen_volumes(self): mounts = f"""\ diff --git a/ramalama/model.py b/ramalama/model.py index a8ce7408..ef08e1bb 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -447,6 +447,7 @@ def handle_runtime(self, args, exec_args, exec_model_path): return exec_args def generate_container_config(self, model_path, args, exec_args): + self.image = self._image(args) if args.generate == "quadlet": self.quadlet(model_path, args, exec_args) elif args.generate == "kube": @@ -488,17 +489,17 @@ def serve(self, args): self.execute_command(model_path, exec_args, args) def quadlet(self, model, args, exec_args): - quadlet = Quadlet(model, args, exec_args) + quadlet = Quadlet(model, self.image, args, exec_args) quadlet.generate() def quadlet_kube(self, model, args, exec_args): - kube = Kube(model, args, exec_args) + kube = Kube(model, self.image, args, exec_args) kube.generate() - quadlet = Quadlet(model, args, exec_args) + quadlet = Quadlet(model, self.image, args, exec_args) quadlet.kube() def kube(self, model, args, exec_args): - kube = Kube(model, args, exec_args) + kube = Kube(model, self.image, args, exec_args) kube.generate() def path(self, args): diff --git a/ramalama/oci.py b/ramalama/oci.py index cb69d59c..0f62138b 100644 --- a/ramalama/oci.py +++ b/ramalama/oci.py @@ -96,13 +96,14 @@ def list_models(args): class OCI(Model): - def __init__(self, model, conman): + def __init__(self, model, conman, ignore_stderr=False): super().__init__(model.removeprefix(prefix).removeprefix("docker://")) for t in MODEL_TYPES: if self.model.startswith(t + "://"): raise ValueError(f"{model} invalid: Only OCI Model types supported") self.type = "OCI" self.conman = conman + self.ignore_stderr = ignore_stderr def login(self, args): conman_args = [self.conman, "login"] @@ -278,7 +279,8 @@ def push(self, source, args): raise e def pull(self, args): - print(f"Downloading {self.model}...") + if not args.quiet: + print(f"Downloading {self.model}...") if not args.engine: raise NotImplementedError("OCI images require a container engine like Podman or Docker") @@ -290,7 +292,7 @@ def pull(self, args): if args.authfile: conman_args.extend([f"--authfile={args.authfile}"]) conman_args.extend([self.model]) - run_cmd(conman_args, debug=args.debug) + run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr) return MNT_FILE def _registry_reference(self): @@ -326,10 +328,10 @@ def remove(self, args, ignore_stderr=False): try: conman_args = [self.conman, "manifest", "rm", self.model] - run_cmd(conman_args, debug=args.debug, ignore_stderr=ignore_stderr) + run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr) except subprocess.CalledProcessError: conman_args = [self.conman, "rmi", f"--force={args.ignore}", self.model] - run_cmd(conman_args, debug=args.debug, ignore_stderr=ignore_stderr) + run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr) def exists(self, args): try: @@ -344,7 +346,7 @@ def exists(self, args): conman_args = [self.conman, "image", "inspect", self.model] try: - run_cmd(conman_args, debug=args.debug) + run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr) return self.model except Exception: return None diff --git a/ramalama/quadlet.py b/ramalama/quadlet.py index 56cb1db5..1eda445f 100644 --- a/ramalama/quadlet.py +++ b/ramalama/quadlet.py @@ -1,10 +1,10 @@ import os -from ramalama.common import MNT_DIR, MNT_FILE, default_image, get_env_vars +from ramalama.common import MNT_DIR, MNT_FILE, get_env_vars class Quadlet: - def __init__(self, model, args, exec_args): + def __init__(self, model, image, args, exec_args): self.ai_image = model if hasattr(args, "MODEL"): self.ai_image = args.MODEL @@ -17,6 +17,7 @@ def __init__(self, model, args, exec_args): self.model = model.removeprefix("oci://") self.args = args self.exec_args = exec_args + self.image = image def kube(self): outfile = self.name + ".kube" @@ -64,7 +65,7 @@ def generate(self): AddDevice=-/dev/dri AddDevice=-/dev/kfd Exec={" ".join(self.exec_args)} -Image={default_image()} +Image={self.image} {env_var_string} {volume} {name_string} diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats index cc9974cc..95f26f6c 100755 --- a/test/system/040-serve.bats +++ b/test/system/040-serve.bats @@ -239,7 +239,6 @@ verify_begin=".*run --rm -i --label ai.ramalama --name" is "$output" ".*command: \[\"--port\"\]" "command is correct" is "$output" ".*args: \['1234', '--model', '/mnt/models/model.file', '--max_model_len', '2048'\]" "args are correct" - is "$output" ".*image: quay.io/ramalama/ramalama" "image is correct" is "$output" ".*reference: ${ociimage}" "AI image should be created" is "$output" ".*pullPolicy: IfNotPresent" "pullPolicy should exist" @@ -257,7 +256,6 @@ verify_begin=".*run --rm -i --label ai.ramalama --name" is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file" run cat $name.yaml - is "$output" ".*image: quay.io/ramalama/ramalama" "Should container image" is "$output" ".*command: \[\"llama-server\"\]" "Should command" is "$output" ".*containerPort: 1234" "Should container container port" @@ -274,7 +272,6 @@ verify_begin=".*run --rm -i --label ai.ramalama --name" is "$output" ".*Generating quadlet file: ${name}.kube" "generate .kube file" run cat $name.yaml - is "$output" ".*image: quay.io/ramalama/ramalama" "Should container image" is "$output" ".*command: \[\"llama-server\"\]" "Should command" is "$output" ".*containerPort: 1234" "Should container container port" diff --git a/test/system/055-convert.bats b/test/system/055-convert.bats index 7a331b20..b5a057cd 100644 --- a/test/system/055-convert.bats +++ b/test/system/055-convert.bats @@ -37,20 +37,26 @@ load helpers @test "ramalama convert tiny to image" { skip_if_darwin + skip_if_docker run_ramalama pull tiny - run_ramalama convert tiny oci://ramalama/tiny + run_ramalama convert tiny oci://quay.io/ramalama/tiny run_ramalama list is "$output" ".*ramalama/tiny:latest" - run_ramalama rm ramalama/tiny + if is_container; then + cname=c_$(safename) + run_ramalama serve -n ${cname} -d quay.io/ramalama/tiny + run_ramalama stop ${cname} + fi + run_ramalama rm quay.io/ramalama/tiny run_ramalama list - assert "$output" !~ ".*ramalama/tiny" "image was removed" + assert "$output" !~ ".*quay.io/ramalama/tiny" "image was removed" - run_ramalama convert ollama://tinyllama oci://ramalama/tiny + run_ramalama convert ollama://tinyllama oci://quay.io/ramalama/tinyllama run_ramalama list - is "$output" ".*ramalama/tiny:latest" - run_ramalama rm ramalama/tiny + is "$output" ".*quay.io/ramalama/tinyllama:latest" + run_ramalama rm quay.io/ramalama/tinyllama run_ramalama list - assert "$output" !~ ".*ramalama/tiny" "image was removed" + assert "$output" !~ ".*ramalama/tinyllama" "image was removed" podman image prune --force }