Fix up handling of image selection on generate

Also fall back to trying OCI images on ramalama run and serve. Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
containers · Feb 21, 2025 · a706567 · a706567
1 parent c5054f1
commit a706567
Show file tree

Hide file tree

Showing 7 changed files with 56 additions and 32 deletions.
diff --git a/ramalama/cli.py b/ramalama/cli.py
@@ -838,8 +838,17 @@ def run_parser(subparsers):
 
 
 def run_cli(args):
-    model = New(args.MODEL, args)
-    model.run(args)
+    try:
+        model = New(args.MODEL, args)
+        model.run(args)
+
+    except KeyError as e:
+        try:
+            args.quiet = True
+            model = OCI(args.MODEL, args.engine, ignore_stderr=True)
+            model.run(args)
+        except Exception:
+            raise e
 
 
 def serve_parser(subparsers):
@@ -863,8 +872,17 @@ def serve_parser(subparsers):
 def serve_cli(args):
     if not args.container:
         args.detach = False
-    model = New(args.MODEL, args)
-    model.serve(args)
+
+    try:
+        model = New(args.MODEL, args)
+        model.serve(args)
+    except KeyError as e:
+        try:
+            args.quiet = True
+            model = OCI(args.MODEL, args.engine, ignore_stderr=True)
+            model.serve(args)
+        except Exception:
+            raise e
 
 
 def stop_parser(subparsers):
@@ -969,8 +987,8 @@ def _rm_model(models, args):
                         raise e
             try:
                 # attempt to remove as a container image
-                m = OCI(model, args.engine)
-                m.remove(args, ignore_stderr=True)
+                m = OCI(model, args.engine, ignore_stderr=True)
+                m.remove(args)
                 return
             except Exception:
                 pass

diff --git a/ramalama/kube.py b/ramalama/kube.py
@@ -5,7 +5,7 @@
 
 
 class Kube:
-    def __init__(self, model, args, exec_args):
+    def __init__(self, model, image, args, exec_args):
         self.ai_image = model
         if hasattr(args, "MODEL"):
             self.ai_image = args.MODEL
@@ -18,8 +18,7 @@ def __init__(self, model, args, exec_args):
         self.model = model.removeprefix("oci://")
         self.args = args
         self.exec_args = exec_args
-
-        self.image = args.image
+        self.image = image
 
     def gen_volumes(self):
         mounts = f"""\

diff --git a/ramalama/model.py b/ramalama/model.py
@@ -447,6 +447,7 @@ def handle_runtime(self, args, exec_args, exec_model_path):
         return exec_args
 
     def generate_container_config(self, model_path, args, exec_args):
+        self.image = self._image(args)
         if args.generate == "quadlet":
             self.quadlet(model_path, args, exec_args)
         elif args.generate == "kube":
@@ -488,17 +489,17 @@ def serve(self, args):
         self.execute_command(model_path, exec_args, args)
 
     def quadlet(self, model, args, exec_args):
-        quadlet = Quadlet(model, args, exec_args)
+        quadlet = Quadlet(model, self.image, args, exec_args)
         quadlet.generate()
 
     def quadlet_kube(self, model, args, exec_args):
-        kube = Kube(model, args, exec_args)
+        kube = Kube(model, self.image, args, exec_args)
         kube.generate()
-        quadlet = Quadlet(model, args, exec_args)
+        quadlet = Quadlet(model, self.image, args, exec_args)
         quadlet.kube()
 
     def kube(self, model, args, exec_args):
-        kube = Kube(model, args, exec_args)
+        kube = Kube(model, self.image, args, exec_args)
         kube.generate()
 
     def path(self, args):

diff --git a/ramalama/oci.py b/ramalama/oci.py
@@ -96,13 +96,14 @@ def list_models(args):
 
 
 class OCI(Model):
-    def __init__(self, model, conman):
+    def __init__(self, model, conman, ignore_stderr=False):
         super().__init__(model.removeprefix(prefix).removeprefix("docker://"))
         for t in MODEL_TYPES:
             if self.model.startswith(t + "://"):
                 raise ValueError(f"{model} invalid: Only OCI Model types supported")
         self.type = "OCI"
         self.conman = conman
+        self.ignore_stderr = ignore_stderr
 
     def login(self, args):
         conman_args = [self.conman, "login"]
@@ -278,7 +279,8 @@ def push(self, source, args):
             raise e
 
     def pull(self, args):
-        print(f"Downloading {self.model}...")
+        if not args.quiet:
+            print(f"Downloading {self.model}...")
         if not args.engine:
             raise NotImplementedError("OCI images require a container engine like Podman or Docker")
 
@@ -290,7 +292,7 @@ def pull(self, args):
         if args.authfile:
             conman_args.extend([f"--authfile={args.authfile}"])
         conman_args.extend([self.model])
-        run_cmd(conman_args, debug=args.debug)
+        run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr)
         return MNT_FILE
 
     def _registry_reference(self):
@@ -326,10 +328,10 @@ def remove(self, args, ignore_stderr=False):
 
         try:
             conman_args = [self.conman, "manifest", "rm", self.model]
-            run_cmd(conman_args, debug=args.debug, ignore_stderr=ignore_stderr)
+            run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr)
         except subprocess.CalledProcessError:
             conman_args = [self.conman, "rmi", f"--force={args.ignore}", self.model]
-            run_cmd(conman_args, debug=args.debug, ignore_stderr=ignore_stderr)
+            run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr)
 
     def exists(self, args):
         try:
@@ -344,7 +346,7 @@ def exists(self, args):
 
         conman_args = [self.conman, "image", "inspect", self.model]
         try:
-            run_cmd(conman_args, debug=args.debug)
+            run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr)
             return self.model
         except Exception:
             return None
diff --git a/ramalama/quadlet.py b/ramalama/quadlet.py
@@ -1,10 +1,10 @@
 import os
 
-from ramalama.common import MNT_DIR, MNT_FILE, default_image, get_env_vars
+from ramalama.common import MNT_DIR, MNT_FILE, get_env_vars
 
 
 class Quadlet:
-    def __init__(self, model, args, exec_args):
+    def __init__(self, model, image, args, exec_args):
         self.ai_image = model
         if hasattr(args, "MODEL"):
             self.ai_image = args.MODEL
@@ -17,6 +17,7 @@ def __init__(self, model, args, exec_args):
         self.model = model.removeprefix("oci://")
         self.args = args
         self.exec_args = exec_args
+        self.image = image
 
     def kube(self):
         outfile = self.name + ".kube"
@@ -64,7 +65,7 @@ def generate(self):
 AddDevice=-/dev/dri
 AddDevice=-/dev/kfd
 Exec={" ".join(self.exec_args)}
-Image={default_image()}
+Image={self.image}
 {env_var_string}
 {volume}
 {name_string}

diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats
@@ -239,7 +239,6 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
 	is "$output" ".*command: \[\"--port\"\]" "command is correct"
 	is "$output" ".*args: \['1234', '--model', '/mnt/models/model.file', '--max_model_len', '2048'\]" "args are correct"
 
-	is "$output" ".*image: quay.io/ramalama/ramalama" "image is correct"
 	is "$output" ".*reference: ${ociimage}" "AI image should be created"
 	is "$output" ".*pullPolicy: IfNotPresent" "pullPolicy should exist"
 
@@ -257,7 +256,6 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
     is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"
 
     run cat $name.yaml
-    is "$output" ".*image: quay.io/ramalama/ramalama" "Should container image"
     is "$output" ".*command: \[\"llama-server\"\]" "Should command"
     is "$output" ".*containerPort: 1234" "Should container container port"
 
@@ -274,7 +272,6 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
     is "$output" ".*Generating quadlet file: ${name}.kube" "generate .kube file"
 
     run cat $name.yaml
-    is "$output" ".*image: quay.io/ramalama/ramalama" "Should container image"
     is "$output" ".*command: \[\"llama-server\"\]" "Should command"
     is "$output" ".*containerPort: 1234" "Should container container port"
 

diff --git a/test/system/055-convert.bats b/test/system/055-convert.bats
@@ -37,20 +37,26 @@ load helpers
 
 @test "ramalama convert tiny to image" {
     skip_if_darwin
+    skip_if_docker
     run_ramalama pull tiny
-    run_ramalama convert tiny oci://ramalama/tiny
+    run_ramalama convert tiny oci://quay.io/ramalama/tiny
     run_ramalama list
     is "$output" ".*ramalama/tiny:latest"
-    run_ramalama rm ramalama/tiny
+    if is_container; then
+        cname=c_$(safename)
+    	run_ramalama serve -n ${cname} -d quay.io/ramalama/tiny
+	run_ramalama stop ${cname}
+    fi
+    run_ramalama rm quay.io/ramalama/tiny
     run_ramalama list
-    assert "$output" !~ ".*ramalama/tiny" "image was removed"
+    assert "$output" !~ ".*quay.io/ramalama/tiny" "image was removed"
 
-    run_ramalama convert ollama://tinyllama oci://ramalama/tiny
+    run_ramalama convert ollama://tinyllama oci://quay.io/ramalama/tinyllama
     run_ramalama list
-    is "$output" ".*ramalama/tiny:latest"
-    run_ramalama rm ramalama/tiny
+    is "$output" ".*quay.io/ramalama/tinyllama:latest"
+    run_ramalama rm quay.io/ramalama/tinyllama
     run_ramalama list
-    assert "$output" !~ ".*ramalama/tiny" "image was removed"
+    assert "$output" !~ ".*ramalama/tinyllama" "image was removed"
 
     podman image prune --force
 }