From 172b836d2db43821cdf6373d8566793cebc5d522 Mon Sep 17 00:00:00 2001
From: Eric Curtin <ecurtin@redhat.com>
Date: Wed, 26 Feb 2025 12:34:30 +0000
Subject: [PATCH] benchmark failing because of lack of flag

Specifically priviledged because it's not present in the args
object.

Signed-off-by: Eric Curtin <ecurtin@redhat.com>
---
 docs/ramalama-bench.1.md   | 54 ++++++++++++++++++++++++++++++++++++++
 ramalama/cli.py            | 14 +++++-----
 test/system/002-bench.bats | 22 ++++++++++++++++
 3 files changed, 82 insertions(+), 8 deletions(-)
 create mode 100755 test/system/002-bench.bats

diff --git a/docs/ramalama-bench.1.md b/docs/ramalama-bench.1.md
index 85dcfe97..2cfa20f8 100644
--- a/docs/ramalama-bench.1.md
+++ b/docs/ramalama-bench.1.md
@@ -25,9 +25,22 @@ URL support means if a model is on a web site or even on your local system, you
 
 ## OPTIONS
 
+#### **--authfile**=*password*
+path of the authentication file for OCI registries
+
+#### **--device**
+Add a host device to the container. Optional permissions parameter  can
+be  used  to  specify device permissions by combining r for read, w for
+write, and m for mknod(2).
+
+Example: --device=/dev/dri/renderD128:/dev/xvdc:rwm
+
 #### **--help**, **-h**
 show this help message and exit
 
+#### **--name**, **-n**
+name of the container to run the Model in
+
 #### **--network**=*none*
 set the network mode for the container
 
@@ -35,6 +48,47 @@ set the network mode for the container
 number of gpu layers, 0 means CPU inferencing, 999 means use max layers (default: -1)
 The default -1, means use whatever is automatically deemed appropriate (0 or 999)
 
+#### **--privileged**
+By  default, RamaLama containers are unprivileged (=false) and cannot, for
+example, modify parts of the operating system. This is  because  by  de‐
+fault  a  container is only allowed limited access to devices. A "privi‐
+leged" container is given the same access to devices as the user launch‐
+ing the container, with the exception of virtual consoles  (/dev/tty\d+)
+when running in systemd mode (--systemd=always).
+
+A  privileged container turns off the security features that isolate the
+container from the host. Dropped Capabilities,  limited  devices,  read-
+only  mount points, Apparmor/SELinux separation, and Seccomp filters are
+all disabled.  Due to the disabled  security  features,  the  privileged
+field  should  almost never be set as containers can easily break out of
+confinement.
+
+Containers running in a user namespace (e.g., rootless containers)  can‐
+not have more privileges than the user that launched them.
+
+#### **--pull**=*policy*
+
+- **always**: Always pull the image and throw an error if the pull fails.
+- **missing**: Only pull the image when it does not exist in the local containers storage.  Throw an error if no image is found and the pull fails.
+- **never**: Never pull the image but use the one from the local containers storage.  Throw an error when no image is found.
+- **newer**: Pull if the image on the registry is newer than the one in the local containers storage.  An image is considered to be newer when the digests are different.  Comparing the time stamps is prone to errors.  Pull errors are suppressed if a local image was found.
+
+#### **--seed**=
+Specify seed rather than using random seed model interaction
+
+#### **--temp**="0.8"
+Temperature of the response from the AI Model
+llama.cpp explains this as:
+
+    The lower the number is, the more deterministic the response.
+
+    The higher the number is the more creative the response is, but more likely to hallucinate when set too high.
+
+        Usage: Lower numbers are good for virtual assistants where we need deterministic responses. Higher numbers are good for roleplay or creative tasks like editing stories
+
+#### **--tls-verify**=*true*
+require HTTPS and verify certificates when contacting OCI registries
+
 ## DESCRIPTION
 Benchmark specified AI Model.
 
diff --git a/ramalama/cli.py b/ramalama/cli.py
index c0d3dc33..f6b17456 100644
--- a/ramalama/cli.py
+++ b/ramalama/cli.py
@@ -452,14 +452,8 @@ def add_network_argument(parser, dflt="none"):
 
 def bench_parser(subparsers):
     parser = subparsers.add_parser("bench", aliases=["benchmark"], help="benchmark specified AI Model")
+    bench_run_serve_perplexity_args(parser)
     add_network_argument(parser)
-    parser.add_argument(
-        "--ngl",
-        dest="ngl",
-        type=int,
-        default=config.get("ngl", -1),
-        help="number of layers to offload to the gpu, if available",
-    )
     parser.add_argument("MODEL")  # positional argument
     parser.set_defaults(func=bench_cli)
 
@@ -781,7 +775,7 @@ def push_cli(args):
 
 
 def run_serve_perplexity_args(parser):
-    parser.add_argument("--authfile", help="path of the authentication file")
+    bench_run_serve_perplexity_args(parser)
     parser.add_argument(
         "-c",
         "--ctx-size",
@@ -789,6 +783,10 @@ def run_serve_perplexity_args(parser):
         default=config.get('ctx_size', 2048),
         help="size of the prompt context (0 = loaded from model)",
     )
+
+
+def bench_run_serve_perplexity_args(parser):
+    parser.add_argument("--authfile", help="path of the authentication file")
     parser.add_argument(
         "--device", dest="device", action='append', type=str, help="device to leak in to the running container"
     )
diff --git a/test/system/002-bench.bats b/test/system/002-bench.bats
new file mode 100755
index 00000000..6232de52
--- /dev/null
+++ b/test/system/002-bench.bats
@@ -0,0 +1,22 @@
+#!/usr/bin/env bats
+#
+# Simplest set of ramalama tests. If any of these fail, we have serious problems.
+#
+
+load helpers
+
+# Override standard setup! We don't yet trust ramalama-images or ramalama-rm
+function setup() {
+    # Makes test logs easier to read
+    BATS_TEST_NAME_PREFIX="[002] "
+}
+
+#### DO NOT ADD ANY TESTS HERE! ADD NEW TESTS AT BOTTOM!
+
+# bats test_tags=distro-integration
+@test "ramalama bench" {
+    run_ramalama bench smollm:135m
+    is "$output" ".*model.*size.*" "model and size in output"
+}
+
+# vim: filetype=sh