Skip to content

Commit

Permalink
Fix up handling of image selection on generate
Browse files Browse the repository at this point in the history
Also fall back to trying OCI images on ramalama run and serve.

Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
  • Loading branch information
rhatdan committed Feb 21, 2025
1 parent c5054f1 commit a706567
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 32 deletions.
30 changes: 24 additions & 6 deletions ramalama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,8 +838,17 @@ def run_parser(subparsers):


def run_cli(args):
model = New(args.MODEL, args)
model.run(args)
try:
model = New(args.MODEL, args)
model.run(args)

except KeyError as e:
try:
args.quiet = True
model = OCI(args.MODEL, args.engine, ignore_stderr=True)
model.run(args)
except Exception:
raise e


def serve_parser(subparsers):
Expand All @@ -863,8 +872,17 @@ def serve_parser(subparsers):
def serve_cli(args):
if not args.container:
args.detach = False
model = New(args.MODEL, args)
model.serve(args)

try:
model = New(args.MODEL, args)
model.serve(args)
except KeyError as e:
try:
args.quiet = True
model = OCI(args.MODEL, args.engine, ignore_stderr=True)
model.serve(args)
except Exception:
raise e


def stop_parser(subparsers):
Expand Down Expand Up @@ -969,8 +987,8 @@ def _rm_model(models, args):
raise e
try:
# attempt to remove as a container image
m = OCI(model, args.engine)
m.remove(args, ignore_stderr=True)
m = OCI(model, args.engine, ignore_stderr=True)
m.remove(args)
return
except Exception:
pass
Expand Down
5 changes: 2 additions & 3 deletions ramalama/kube.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


class Kube:
def __init__(self, model, args, exec_args):
def __init__(self, model, image, args, exec_args):
self.ai_image = model
if hasattr(args, "MODEL"):
self.ai_image = args.MODEL
Expand All @@ -18,8 +18,7 @@ def __init__(self, model, args, exec_args):
self.model = model.removeprefix("oci://")
self.args = args
self.exec_args = exec_args

self.image = args.image
self.image = image

def gen_volumes(self):
mounts = f"""\
Expand Down
9 changes: 5 additions & 4 deletions ramalama/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,7 @@ def handle_runtime(self, args, exec_args, exec_model_path):
return exec_args

def generate_container_config(self, model_path, args, exec_args):
self.image = self._image(args)
if args.generate == "quadlet":
self.quadlet(model_path, args, exec_args)
elif args.generate == "kube":
Expand Down Expand Up @@ -488,17 +489,17 @@ def serve(self, args):
self.execute_command(model_path, exec_args, args)

def quadlet(self, model, args, exec_args):
quadlet = Quadlet(model, args, exec_args)
quadlet = Quadlet(model, self.image, args, exec_args)
quadlet.generate()

def quadlet_kube(self, model, args, exec_args):
kube = Kube(model, args, exec_args)
kube = Kube(model, self.image, args, exec_args)
kube.generate()
quadlet = Quadlet(model, args, exec_args)
quadlet = Quadlet(model, self.image, args, exec_args)
quadlet.kube()

def kube(self, model, args, exec_args):
kube = Kube(model, args, exec_args)
kube = Kube(model, self.image, args, exec_args)
kube.generate()

def path(self, args):
Expand Down
14 changes: 8 additions & 6 deletions ramalama/oci.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,14 @@ def list_models(args):


class OCI(Model):
def __init__(self, model, conman):
def __init__(self, model, conman, ignore_stderr=False):
super().__init__(model.removeprefix(prefix).removeprefix("docker://"))
for t in MODEL_TYPES:
if self.model.startswith(t + "://"):
raise ValueError(f"{model} invalid: Only OCI Model types supported")
self.type = "OCI"
self.conman = conman
self.ignore_stderr = ignore_stderr

def login(self, args):
conman_args = [self.conman, "login"]
Expand Down Expand Up @@ -278,7 +279,8 @@ def push(self, source, args):
raise e

def pull(self, args):
print(f"Downloading {self.model}...")
if not args.quiet:
print(f"Downloading {self.model}...")
if not args.engine:
raise NotImplementedError("OCI images require a container engine like Podman or Docker")

Expand All @@ -290,7 +292,7 @@ def pull(self, args):
if args.authfile:
conman_args.extend([f"--authfile={args.authfile}"])
conman_args.extend([self.model])
run_cmd(conman_args, debug=args.debug)
run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr)
return MNT_FILE

def _registry_reference(self):
Expand Down Expand Up @@ -326,10 +328,10 @@ def remove(self, args, ignore_stderr=False):

try:
conman_args = [self.conman, "manifest", "rm", self.model]
run_cmd(conman_args, debug=args.debug, ignore_stderr=ignore_stderr)
run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr)
except subprocess.CalledProcessError:
conman_args = [self.conman, "rmi", f"--force={args.ignore}", self.model]
run_cmd(conman_args, debug=args.debug, ignore_stderr=ignore_stderr)
run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr)

def exists(self, args):
try:
Expand All @@ -344,7 +346,7 @@ def exists(self, args):

conman_args = [self.conman, "image", "inspect", self.model]
try:
run_cmd(conman_args, debug=args.debug)
run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr)
return self.model
except Exception:
return None
7 changes: 4 additions & 3 deletions ramalama/quadlet.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os

from ramalama.common import MNT_DIR, MNT_FILE, default_image, get_env_vars
from ramalama.common import MNT_DIR, MNT_FILE, get_env_vars


class Quadlet:
def __init__(self, model, args, exec_args):
def __init__(self, model, image, args, exec_args):
self.ai_image = model
if hasattr(args, "MODEL"):
self.ai_image = args.MODEL
Expand All @@ -17,6 +17,7 @@ def __init__(self, model, args, exec_args):
self.model = model.removeprefix("oci://")
self.args = args
self.exec_args = exec_args
self.image = image

def kube(self):
outfile = self.name + ".kube"
Expand Down Expand Up @@ -64,7 +65,7 @@ def generate(self):
AddDevice=-/dev/dri
AddDevice=-/dev/kfd
Exec={" ".join(self.exec_args)}
Image={default_image()}
Image={self.image}
{env_var_string}
{volume}
{name_string}
Expand Down
3 changes: 0 additions & 3 deletions test/system/040-serve.bats
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
is "$output" ".*command: \[\"--port\"\]" "command is correct"
is "$output" ".*args: \['1234', '--model', '/mnt/models/model.file', '--max_model_len', '2048'\]" "args are correct"

is "$output" ".*image: quay.io/ramalama/ramalama" "image is correct"
is "$output" ".*reference: ${ociimage}" "AI image should be created"
is "$output" ".*pullPolicy: IfNotPresent" "pullPolicy should exist"

Expand All @@ -257,7 +256,6 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"

run cat $name.yaml
is "$output" ".*image: quay.io/ramalama/ramalama" "Should container image"
is "$output" ".*command: \[\"llama-server\"\]" "Should command"
is "$output" ".*containerPort: 1234" "Should container container port"

Expand All @@ -274,7 +272,6 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
is "$output" ".*Generating quadlet file: ${name}.kube" "generate .kube file"

run cat $name.yaml
is "$output" ".*image: quay.io/ramalama/ramalama" "Should container image"
is "$output" ".*command: \[\"llama-server\"\]" "Should command"
is "$output" ".*containerPort: 1234" "Should container container port"

Expand Down
20 changes: 13 additions & 7 deletions test/system/055-convert.bats
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,26 @@ load helpers

@test "ramalama convert tiny to image" {
skip_if_darwin
skip_if_docker
run_ramalama pull tiny
run_ramalama convert tiny oci://ramalama/tiny
run_ramalama convert tiny oci://quay.io/ramalama/tiny
run_ramalama list
is "$output" ".*ramalama/tiny:latest"
run_ramalama rm ramalama/tiny
if is_container; then
cname=c_$(safename)
run_ramalama serve -n ${cname} -d quay.io/ramalama/tiny
run_ramalama stop ${cname}
fi
run_ramalama rm quay.io/ramalama/tiny
run_ramalama list
assert "$output" !~ ".*ramalama/tiny" "image was removed"
assert "$output" !~ ".*quay.io/ramalama/tiny" "image was removed"

run_ramalama convert ollama://tinyllama oci://ramalama/tiny
run_ramalama convert ollama://tinyllama oci://quay.io/ramalama/tinyllama
run_ramalama list
is "$output" ".*ramalama/tiny:latest"
run_ramalama rm ramalama/tiny
is "$output" ".*quay.io/ramalama/tinyllama:latest"
run_ramalama rm quay.io/ramalama/tinyllama
run_ramalama list
assert "$output" !~ ".*ramalama/tiny" "image was removed"
assert "$output" !~ ".*ramalama/tinyllama" "image was removed"

podman image prune --force
}
Expand Down

0 comments on commit a706567

Please sign in to comment.