From d142e38bb45a399dc5a6a9ac76f735f5d7351ea1 Mon Sep 17 00:00:00 2001 From: Branden Butler Date: Wed, 6 Sep 2023 14:16:28 -0500 Subject: [PATCH 01/11] Add AutoEjectFIFO service --- .../org/myrobotlab/service/AutoEjectFIFO.java | 154 ++++++++++++++++++ .../org/myrobotlab/service/TestCatcher.java | 29 +++- .../service/meta/AutoEjectFIFOMeta.java | 10 ++ .../myrobotlab/service/AutoEjectFIFOTest.java | 98 +++++++++++ 4 files changed, 284 insertions(+), 7 deletions(-) create mode 100644 src/main/java/org/myrobotlab/service/AutoEjectFIFO.java create mode 100644 src/main/java/org/myrobotlab/service/meta/AutoEjectFIFOMeta.java create mode 100644 src/test/java/org/myrobotlab/service/AutoEjectFIFOTest.java diff --git a/src/main/java/org/myrobotlab/service/AutoEjectFIFO.java b/src/main/java/org/myrobotlab/service/AutoEjectFIFO.java new file mode 100644 index 0000000000..2c0ffa0861 --- /dev/null +++ b/src/main/java/org/myrobotlab/service/AutoEjectFIFO.java @@ -0,0 +1,154 @@ +package org.myrobotlab.service; + +import org.myrobotlab.framework.Service; +import org.myrobotlab.service.config.ServiceConfig; + +import java.util.List; +import java.util.concurrent.BlockingDeque; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * A simple service that acts as a circular FIFO queue. + * This queue can store a number of items, but once + * its max capacity is reached, any attempt to add more + * items ejects the oldest element, i.e. the head. + *

+ * This queue is not typed, i.e. it can store any type + * of object, with the downside that no type checking is + * performed. This is to allow the fifo to be used + * in any situation, since we don't currently have a way + * to create generic services. + * + * @author AutonomicPerfectionist + */ +public class AutoEjectFIFO extends Service { + public static final int DEFAULT_MAX_SIZE = 50; + + + /** + * Lock used to protect the fifo queue, + * used instead of synchronized block to allow + * multiple simultaneous readers so long as there + * is no writer writing to the queue. + */ + private final ReadWriteLock lock = new ReentrantReadWriteLock(); + + /** + * The actual queue, whose initial maximum size is set to + * {@link #DEFAULT_MAX_SIZE}. + */ + private BlockingDeque fifo = new LinkedBlockingDeque<>(DEFAULT_MAX_SIZE); + + + /** + * Constructor of service, reservedkey typically is a services name and inId + * will be its process id + * + * @param reservedKey the service name + * @param inId process id + */ + public AutoEjectFIFO(String reservedKey, String inId) { + super(reservedKey, inId); + } + + + /** + * Sets the size at which the FIFO will begin evicting + * elements. If smaller than the current number of items, + * then elements will be silently evicted. + * @param size The new max size + */ + public void setMaxSize(int size) { + lock.writeLock().lock(); + BlockingDeque newFifo = new LinkedBlockingDeque<>(size); + newFifo.addAll(fifo); + fifo = newFifo; + lock.writeLock().unlock(); + } + + /** + * Add a new element to the FIFO, if + * it's full then this will trigger an + * eviction + * @param item The new item to be added to the tail + */ + public void add(Object item) { + lock.writeLock().lock(); + try { + if (!fifo.offer(item)) { + Object head = fifo.removeFirst(); + invoke("publishEviction", head); + fifo.add(item); + } + invoke("publishItemAdded", item); + } catch (Exception e) { + error(e); + } finally { + lock.writeLock().unlock(); + } + + + } + + public void clear() { + lock.writeLock().lock(); + fifo.clear(); + lock.writeLock().unlock(); + invoke("publishClear"); + } + + public List getAll() { + lock.readLock().lock(); + List ret = List.copyOf(fifo); + lock.readLock().unlock(); + invoke("publishAll", ret); + + return ret; + } + + public Object getHead() { + lock.readLock().lock(); + Object head = fifo.peek(); + lock.readLock().unlock(); + invoke("publishHead", head); + return head; + + } + + public Object getTail() { + lock.readLock().lock(); + Object tail = fifo.peekLast(); + lock.readLock().unlock(); + invoke("publishTail", tail); + return tail; + } + + public Object publishItemAdded(Object item) { + return item; + } + + public void publishClear() { + // Do nothing + } + + public List publishAll(List items) { + return items; + } + + public Object publishHead(Object head) { + return head; + } + + public Object publishTail(Object tail) { + return tail; + } + + public Object publishEviction(Object evicted) { + return evicted; + } + + + +} diff --git a/src/main/java/org/myrobotlab/service/TestCatcher.java b/src/main/java/org/myrobotlab/service/TestCatcher.java index c5f6f4a3da..bde3d8254b 100644 --- a/src/main/java/org/myrobotlab/service/TestCatcher.java +++ b/src/main/java/org/myrobotlab/service/TestCatcher.java @@ -109,6 +109,8 @@ public Ball() { public BlockingQueue strings = new LinkedBlockingDeque<>(); + public BlockingQueue objects = new LinkedBlockingQueue<>(); + /** * awesome override to simulate remote services - e.g. in * Serial.addByteListener @@ -169,6 +171,9 @@ public void clear() { pinSet.clear(); methodsCalled.clear(); longs.clear(); + integers.clear(); + strings.clear(); + objects.clear(); } public Message getMsg(long timeout) throws InterruptedException { @@ -265,21 +270,25 @@ public void checkMsg(long timeout, String method, Object... checkParms) throws I throw new IOException(String.format("expected null parameters - got non-null")); } + // Never reached since msg.data.length is accessed above and would throw NPE + // Probably don't need this if we can assume that msg.data is always non-null + // and may just be empty if (checkParms != null && msg.data == null) { log.error("{}", msg); - throw new IOException(String.format("expected non null parameters - got null")); + throw new IOException("expected non null parameters - got null"); } if (!method.equals(msg.method)) { log.error("{}", msg); throw new IOException(String.format("unlike methods - expected %s got %s", method, msg.method)); } - - for (int i = 0; i < checkParms.length; ++i) { - Object expected = checkParms[i]; - Object got = msg.data[i]; - if (!expected.equals(got)) { - throw new IOException(String.format("unlike methods - expected %s got %s", method, msg.method)); + if (checkParms != null) { + for (int i = 0; i < checkParms.length; ++i) { + Object expected = checkParms[i]; + Object got = msg.data[i]; + if (!expected.equals(got)) { + throw new IOException(String.format("unlike methods - expected %s got %s", method, msg.method)); + } } } @@ -332,6 +341,12 @@ public double onDouble(double data) { return data; } + public Object onObject(Object data) { + log.info("onObject {}", data); + objects.add(data); + return data; + } + public int waitForThis(int data, long sleep) { sleep(sleep); log.info("waitForThis {}", data); diff --git a/src/main/java/org/myrobotlab/service/meta/AutoEjectFIFOMeta.java b/src/main/java/org/myrobotlab/service/meta/AutoEjectFIFOMeta.java new file mode 100644 index 0000000000..004e8b275c --- /dev/null +++ b/src/main/java/org/myrobotlab/service/meta/AutoEjectFIFOMeta.java @@ -0,0 +1,10 @@ +package org.myrobotlab.service.meta; + +import org.myrobotlab.service.meta.abstracts.MetaData; + +public class AutoEjectFIFOMeta extends MetaData { + public AutoEjectFIFOMeta() { + addDescription("A simple sized FIFO that will auto-eject the oldest element when it reaches the given max size."); + } + +} diff --git a/src/test/java/org/myrobotlab/service/AutoEjectFIFOTest.java b/src/test/java/org/myrobotlab/service/AutoEjectFIFOTest.java new file mode 100644 index 0000000000..ab033d6da8 --- /dev/null +++ b/src/test/java/org/myrobotlab/service/AutoEjectFIFOTest.java @@ -0,0 +1,98 @@ +package org.myrobotlab.service; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.myrobotlab.test.AbstractTest; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +public class AutoEjectFIFOTest extends AbstractTest { + + private AutoEjectFIFO fifo; + private TestCatcher catcher; + + + @Before + public void createService() throws Exception { + fifo = (AutoEjectFIFO) Runtime.start("fifo", "AutoEjectFIFO"); + catcher = (TestCatcher) Runtime.start("catcher", "TestCatcher"); + catcher.clear(); + fifo.clear(); + } + + @After + public void releaseService() { + Runtime.release(fifo.getFullName()); + Runtime.release(catcher.getFullName()); + } + + @Test + public void testAdd10() throws IOException, InterruptedException { + catcher.subscribe(fifo.getFullName(), "publishItemAdded", "onInteger"); + sleep(50); + List ints = new ArrayList<>(); + for (int i = 0; i < 10; i ++) { + fifo.add(i); + ints.add(i); + } + catcher.waitForMsgs(10, 2000); + assertEquals(10, catcher.integers.size()); + assertEquals(0, fifo.getHead()); + // Last element was 9 since we added 0-9, not 1-10 + assertEquals(9, fifo.getTail()); + assertArrayEquals(ints.toArray(), fifo.getAll().toArray()); + } + + @Test + public void testAddMax() throws IOException, InterruptedException { + catcher.subscribe(fifo.getFullName(), "publishItemAdded", "onInteger"); + sleep(50); + for (int i = 0; i < AutoEjectFIFO.DEFAULT_MAX_SIZE; i ++) { + fifo.add(i); + } + catcher.waitForMsgs(AutoEjectFIFO.DEFAULT_MAX_SIZE, 2000); + assertEquals(AutoEjectFIFO.DEFAULT_MAX_SIZE, catcher.integers.size()); + assertEquals(0, fifo.getHead()); + assertEquals(AutoEjectFIFO.DEFAULT_MAX_SIZE - 1, fifo.getTail()); + } + + @Test + public void testAddMaxPlusOne() throws IOException, InterruptedException { + catcher.subscribe(fifo.getFullName(), "publishItemAdded", "onInteger"); + catcher.subscribe(fifo.getFullName(), "publishEviction", "onObject"); + sleep(50); + for (int i = 0; i < AutoEjectFIFO.DEFAULT_MAX_SIZE + 1; i ++) { + fifo.add(i); + } + catcher.waitForMsgs(AutoEjectFIFO.DEFAULT_MAX_SIZE + 2, 2000); + assertEquals(AutoEjectFIFO.DEFAULT_MAX_SIZE + 1, catcher.integers.size()); + assertEquals(1, catcher.objects.size()); + + assertEquals(1, fifo.getHead()); + assertEquals(AutoEjectFIFO.DEFAULT_MAX_SIZE, fifo.getTail()); + } + + @Test + public void testAddMaxPlusTwo() throws IOException, InterruptedException { + catcher.subscribe(fifo.getFullName(), "publishItemAdded", "onInteger"); + catcher.subscribe(fifo.getFullName(), "publishEviction", "onObject"); + sleep(50); + for (int i = 0; i < AutoEjectFIFO.DEFAULT_MAX_SIZE + 2; i ++) { + fifo.add(i); + } + + // Two more adds plus 2 evictions + catcher.waitForMsgs(AutoEjectFIFO.DEFAULT_MAX_SIZE + 4, 2000); + assertEquals(AutoEjectFIFO.DEFAULT_MAX_SIZE + 2, catcher.integers.size()); + assertEquals(2, catcher.objects.size()); + + assertEquals(2, fifo.getHead()); + assertEquals(AutoEjectFIFO.DEFAULT_MAX_SIZE + 1, fifo.getTail()); + } +} From 19e76c0527872092f00040c7362d077f777a34ba Mon Sep 17 00:00:00 2001 From: Branden Butler Date: Wed, 6 Sep 2023 14:17:48 -0500 Subject: [PATCH 02/11] Add Llama service and update pom --- pom.xml | 11 +- .../java/org/myrobotlab/service/Llama.java | 154 ++++++++++++++++++ .../service/config/LlamaConfig.java | 41 +++++ .../myrobotlab/service/meta/LlamaMeta.java | 15 ++ .../WebGui/app/service/js/LlamaGui.js | 57 +++++++ .../WebGui/app/service/views/LlamaGui.html | 25 +++ 6 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 src/main/java/org/myrobotlab/service/Llama.java create mode 100644 src/main/java/org/myrobotlab/service/config/LlamaConfig.java create mode 100644 src/main/java/org/myrobotlab/service/meta/LlamaMeta.java create mode 100644 src/main/resources/resource/WebGui/app/service/js/LlamaGui.js create mode 100644 src/main/resources/resource/WebGui/app/service/views/LlamaGui.html diff --git a/pom.xml b/pom.xml index a00bd0db4f..bd904e8315 100644 --- a/pom.xml +++ b/pom.xml @@ -80,7 +80,7 @@ ${maven.build.timestamp} yyyyMMddHHmm - ${version} + ${version} ${git.branch} ${NODE_NAME} ${NODE_LABELS} @@ -614,6 +614,15 @@ + + + de.kherud + llama + 1.1.0 + provided + + + org.myrobotlab.audio diff --git a/src/main/java/org/myrobotlab/service/Llama.java b/src/main/java/org/myrobotlab/service/Llama.java new file mode 100644 index 0000000000..9492d0ba04 --- /dev/null +++ b/src/main/java/org/myrobotlab/service/Llama.java @@ -0,0 +1,154 @@ +package org.myrobotlab.service; + +import de.kherud.llama.LlamaModel; +import de.kherud.llama.Parameters; +import org.myrobotlab.framework.Service; +import org.myrobotlab.logging.Level; +import org.myrobotlab.logging.LoggingFactory; +import org.myrobotlab.programab.Response; +import org.myrobotlab.service.config.LlamaConfig; +import org.myrobotlab.service.data.Utterance; +import org.myrobotlab.service.interfaces.ResponsePublisher; +import org.myrobotlab.service.interfaces.UtterancePublisher; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.URL; +import java.nio.channels.Channels; +import java.nio.channels.FileChannel; +import java.nio.channels.ReadableByteChannel; +import java.util.stream.StreamSupport; + +public class Llama extends Service implements UtterancePublisher, ResponsePublisher { + private transient LlamaModel model; + + /** + * Constructor of service, reservedkey typically is a services name and inId + * will be its process id + * + * @param reservedKey the service name + * @param inId process id + */ + public Llama(String reservedKey, String inId) { + super(reservedKey, inId); + } + + public void loadModel(String modelPath) { + Parameters params = new Parameters.Builder() + .setNGpuLayers(0) + .setTemperature(0.7f) + .setPenalizeNl(true) + .setMirostat(Parameters.MiroStat.V2) + .setAntiPrompt(new String[]{config.userPrompt}) + .build(); + model = new LlamaModel(modelPath, params); + } + + public Response getResponse(String text) { + if (model == null) { + error("Model is not loaded."); + return null; + } + + String prompt = config.systemPrompt + config.systemMessage + "\n" + text + "\n"; + String response = StreamSupport.stream(model.generate(prompt).spliterator(), false) + .map(LlamaModel.Output::toString) + .reduce("", (a, b) -> a + b); + + Utterance utterance = new Utterance(); + utterance.username = getName(); + utterance.text = response; + utterance.isBot = true; + utterance.channel = ""; + utterance.channelType = ""; + utterance.channelBotName = getName(); + utterance.channelName = ""; + invoke("publishUtterance", utterance); + Response res = new Response("friend", getName(), response, null); + invoke("publishResponse", res); + return res; + } + + public String findModelPath(String model) { + // First, we loop over all user-defined + // model directories + for (String dir : config.modelPaths) { + File path = new File(dir + fs + model); + if (path.exists()) { + return path.getAbsolutePath(); + } + } + + // Now, we check our data directory for any downloaded models + File path = new File(getDataDir() + fs + model); + if (path.exists()) { + return path.getAbsolutePath(); + } else if (config.modelUrls.containsKey(model)){ + // Model was not in data but we do have a URL for it + try (FileOutputStream fileOutputStream = new FileOutputStream(path)){ + ReadableByteChannel readableByteChannel = Channels.newChannel(new URL(config.modelUrls.get(model)).openStream()); + FileChannel fileChannel = fileOutputStream.getChannel(); + info("Downloading model %s to path %s from URL %s", model, path, config.modelUrls.get(model)); + fileChannel.transferFrom(readableByteChannel, 0, Long.MAX_VALUE); + } catch (IOException e) { + throw new RuntimeException(e); + } + return path.getAbsolutePath(); + + } + + // Cannot find the model anywhere + error("Could not locate model {}, add its URL to download it or add a directory where it is located", model); + return null; + } + + @Override + public LlamaConfig apply(LlamaConfig c) { + super.apply(c); + + if (config.selectedModel != null && !config.selectedModel.isEmpty()) { + String modelPath = findModelPath(config.selectedModel); + if (modelPath != null) { + loadModel(modelPath); + } else { + error("Could not find selected model {}", config.selectedModel); + } + } + + return config; + } + + @Override + public Utterance publishUtterance(Utterance utterance) { + return utterance; + } + + @Override + public Response publishResponse(Response response) { + return response; + } + + public static void main(String[] args) { + try { + + LoggingFactory.init(Level.INFO); + + // Runtime runtime = Runtime.getInstance(); + // Runtime.startConfig("gpt3-01"); + + WebGui webgui = (WebGui) Runtime.create("webgui", "WebGui"); + webgui.autoStartBrowser(false); + webgui.startService(); + + + Llama llama = (Llama) Runtime.start("llama", "Llama"); + + System.out.println(llama.getResponse("Hello!").msg); + + + } catch (Exception e) { + log.error("main threw", e); + } + } +} diff --git a/src/main/java/org/myrobotlab/service/config/LlamaConfig.java b/src/main/java/org/myrobotlab/service/config/LlamaConfig.java new file mode 100644 index 0000000000..774d988f02 --- /dev/null +++ b/src/main/java/org/myrobotlab/service/config/LlamaConfig.java @@ -0,0 +1,41 @@ +package org.myrobotlab.service.config; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class LlamaConfig extends ServiceConfig { + + public String systemPrompt = ""; + + public String systemMessage = ""; + + /** + * The prompt that is prefixed to every user request. + * No whitespace is stripped, so ensure that + * the prompt is formatted so that a whitespace-stripped + * user request does not cause tokenizer errors. + */ + public String userPrompt = "### User:\n"; + + /** + * The prompt that the AI should use, should not + * have a trailing space. Any trailing space + * (but not newlines) are stripped to prevent + * tokenizer errors. + */ + public String assistantPrompt = "### Assistant:\n"; + + public String selectedModel = "llama-2-7b-guanaco-qlora.Q4_K_M.gguf"; + + public List modelPaths = new ArrayList<>(List.of( + + )); + + public Map modelUrls = new HashMap<>(Map.of( + "stablebeluga-7b.Q4_K_M.gguf", "https://huggingface.co/TheBloke/StableBeluga-7B-GGUF/resolve/main/stablebeluga-7b.Q4_K_M.gguf", + "llama-2-7b-guanaco-qlora.Q4_K_M.gguf", "https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF/resolve/main/llama-2-7b-guanaco-qlora.Q4_K_M.gguf" + )); + +} diff --git a/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java b/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java new file mode 100644 index 0000000000..e367cbb652 --- /dev/null +++ b/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java @@ -0,0 +1,15 @@ +package org.myrobotlab.service.meta; + +import org.myrobotlab.service.meta.abstracts.MetaData; + +public class LlamaMeta extends MetaData { + + public LlamaMeta() { + addDescription( + "A large language model inference engine based on the widely used " + + "llama.cpp project. Can run most GGUF models." + ); + + addDependency("de.kherud", "llama", "1.1.0"); + } +} diff --git a/src/main/resources/resource/WebGui/app/service/js/LlamaGui.js b/src/main/resources/resource/WebGui/app/service/js/LlamaGui.js new file mode 100644 index 0000000000..6269341523 --- /dev/null +++ b/src/main/resources/resource/WebGui/app/service/js/LlamaGui.js @@ -0,0 +1,57 @@ +angular.module('mrlapp.service.LlamaGui', []).controller('LlamaGuiCtrl', ['$scope', 'mrl', function($scope, mrl) { + console.info('LlamaGuiCtrl') + var _self = this + var msg = this.msg + $scope.utterances = [] + $scope.maxRecords = 500 + $scope.text = null + + // GOOD TEMPLATE TO FOLLOW + this.updateState = function(service) { + $scope.service = service + } + + + // init scope variables + $scope.onTime = null + $scope.onEpoch = null + + this.onMsg = function(inMsg) { + let data = inMsg.data[0] + switch (inMsg.method) { + case 'onState': + _self.updateState(data) + $scope.$apply() + break + case 'onUtterance': + $scope.utterances.push(data) + // remove the beginning if we are at maxRecords + if ($scope.utterances.length > $scope.maxRecords) { + $scope.utterances.shift() + } + $scope.$apply() + break + case 'onRequest': + request = {"username":"friend", "text":data} + $scope.utterances.push(request) + // remove the beginning if we are at maxRecords + if ($scope.utterances.length > $scope.maxRecords) { + $scope.utterances.shift() + } + $scope.$apply() + break + case 'onEpoch': + $scope.onEpoch = data + $scope.$apply() + break + default: + console.error("ERROR - unhandled method " + $scope.name + " " + inMsg.method) + break + } + } + + msg.subscribe('publishRequest') + msg.subscribe('publishUtterance') + msg.subscribe(this) +} +]) diff --git a/src/main/resources/resource/WebGui/app/service/views/LlamaGui.html b/src/main/resources/resource/WebGui/app/service/views/LlamaGui.html new file mode 100644 index 0000000000..fa528909aa --- /dev/null +++ b/src/main/resources/resource/WebGui/app/service/views/LlamaGui.html @@ -0,0 +1,25 @@ +
+
+ text
+ +
+
+
+ +
+ + + + + + + + +
+ {{e.username}} + + {{e.channel}} + + {{e.text}} +
+
From efaaa96f2bc30b0ef58b07e301f098f08284d077 Mon Sep 17 00:00:00 2001 From: Branden Butler Date: Thu, 7 Sep 2023 08:54:16 -0500 Subject: [PATCH 03/11] Add reset() to Llama and basic thread detection --- src/main/java/org/myrobotlab/service/Llama.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/org/myrobotlab/service/Llama.java b/src/main/java/org/myrobotlab/service/Llama.java index 9492d0ba04..7a7259a9a6 100644 --- a/src/main/java/org/myrobotlab/service/Llama.java +++ b/src/main/java/org/myrobotlab/service/Llama.java @@ -37,6 +37,7 @@ public Llama(String reservedKey, String inId) { public void loadModel(String modelPath) { Parameters params = new Parameters.Builder() .setNGpuLayers(0) + .setNThreads(java.lang.Runtime.getRuntime().availableProcessors()) .setTemperature(0.7f) .setPenalizeNl(true) .setMirostat(Parameters.MiroStat.V2) @@ -129,6 +130,11 @@ public Response publishResponse(Response response) { return response; } + public void reset() { + model.reset(); + + } + public static void main(String[] args) { try { From b5d78382e2a87cb43b4da1523e7e1d5634584dde Mon Sep 17 00:00:00 2001 From: Branden Butler Date: Sat, 9 Sep 2023 08:34:48 -0500 Subject: [PATCH 04/11] Update java-llama-cpp to 1.1.1 --- src/main/java/org/myrobotlab/service/meta/LlamaMeta.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java b/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java index e367cbb652..6613cce631 100644 --- a/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java +++ b/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java @@ -10,6 +10,6 @@ public LlamaMeta() { "llama.cpp project. Can run most GGUF models." ); - addDependency("de.kherud", "llama", "1.1.0"); + addDependency("de.kherud", "llama", "1.1.1"); } } From 0991c757392dc6a7e3bb97728a974c666054860c Mon Sep 17 00:00:00 2001 From: Branden Butler Date: Sat, 9 Sep 2023 08:35:42 -0500 Subject: [PATCH 05/11] Regenerate pom --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index bd904e8315..83171ddddc 100644 --- a/pom.xml +++ b/pom.xml @@ -618,7 +618,7 @@ de.kherud llama - 1.1.0 + 1.1.1 provided From 2d22fcd59e32200f2fed1d865f6c640dd1a7deae Mon Sep 17 00:00:00 2001 From: Branden Butler Date: Sun, 10 Sep 2023 15:17:03 -0500 Subject: [PATCH 06/11] Add physical and logical cores to the platform description --- pom.xml | 21 +++------- .../org/myrobotlab/framework/Platform.java | 41 +++++++++++++++++-- .../myrobotlab/service/meta/RuntimeMeta.java | 4 ++ 3 files changed, 47 insertions(+), 19 deletions(-) diff --git a/pom.xml b/pom.xml index 83171ddddc..0936d5a0df 100644 --- a/pom.xml +++ b/pom.xml @@ -550,22 +550,6 @@
- - - net.java.jinput - jinput - 2.0.9 - provided - - - jinput-natives - jinput-natives - 2.0.7 - provided - zip - - - org.apache.kafka @@ -1391,6 +1375,11 @@ 3.9.0 + + com.github.oshi + oshi-core + 6.4.5 + diff --git a/src/main/java/org/myrobotlab/framework/Platform.java b/src/main/java/org/myrobotlab/framework/Platform.java index 1b1ed4f2d5..d9574a6b21 100644 --- a/src/main/java/org/myrobotlab/framework/Platform.java +++ b/src/main/java/org/myrobotlab/framework/Platform.java @@ -19,6 +19,7 @@ import org.myrobotlab.logging.LoggerFactory; import org.myrobotlab.logging.LoggingFactory; import org.slf4j.Logger; +import oshi.SystemInfo; /** * The purpose of this class is to retrieve all the detailed information @@ -85,6 +86,10 @@ public class Platform implements Serializable { String shortCommit; + int numLogicalProcessors; + + int numPhysicalProcessors; + static Platform localInstance; /** @@ -108,11 +113,11 @@ public static Platform getLocalInstance() { // === OS === platform.os = System.getProperty("os.name").toLowerCase(); - if (platform.os.indexOf("win") >= 0) { + if (platform.os.contains("win")) { platform.os = OS_WINDOWS; - } else if (platform.os.indexOf("mac") >= 0) { + } else if (platform.os.contains("mac")) { platform.os = OS_MAC; - } else if (platform.os.indexOf("linux") >= 0) { + } else if (platform.os.contains("linux")) { platform.os = OS_LINUX; } @@ -248,6 +253,13 @@ public static Platform getLocalInstance() { } catch (Exception e) { } + // Logical and physical processor detection + + // availableProcessors returns the number of logical cores dedicated to the JVM + platform.numLogicalProcessors = java.lang.Runtime.getRuntime().availableProcessors(); + + platform.numPhysicalProcessors = new SystemInfo().getHardware().getProcessor().getPhysicalProcessorCount(); + localInstance = platform; } return localInstance; @@ -497,6 +509,29 @@ public Date getStartTime() { return startTime; } + /** + * Get the number of logical cores + * available to the VM. May be different + * from the number of logical cores in the + * system if the user only allocates + * some of them to the VM. + * @return The number of available logical cores + */ + public int getNumLogicalProcessors() { + return numLogicalProcessors; + } + + /** + * Get the number of physical cores in the system. + * This may be different from the number of cores allocated + * to the JVM, and on x86 will usually be different from + * the number of logical cores in the system. + * @return The number of physical cores in the system. + */ + public int getNumPhysicalProcessors() { + return numPhysicalProcessors; + } + /** * @return true if running in virtual mode * diff --git a/src/main/java/org/myrobotlab/service/meta/RuntimeMeta.java b/src/main/java/org/myrobotlab/service/meta/RuntimeMeta.java index f21e872171..2113faa571 100644 --- a/src/main/java/org/myrobotlab/service/meta/RuntimeMeta.java +++ b/src/main/java/org/myrobotlab/service/meta/RuntimeMeta.java @@ -54,6 +54,10 @@ public RuntimeMeta() { // force correct version of netty - needed for Vertx but not for Runtime ? addDependency("io.netty", "netty-all", "4.1.82.Final"); + + // Allows us to get much more detailed info about the system hardware + // MIT license + addDependency("com.github.oshi", "oshi-core", "6.4.5"); } } From 969eea77ef36e5977fb9cf8b4803fb5d527deb86 Mon Sep 17 00:00:00 2001 From: Branden Butler Date: Sun, 10 Sep 2023 15:17:40 -0500 Subject: [PATCH 07/11] Use only physical core count for number of llama inference threads --- src/main/java/org/myrobotlab/service/Llama.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/myrobotlab/service/Llama.java b/src/main/java/org/myrobotlab/service/Llama.java index 7a7259a9a6..364ff1e75a 100644 --- a/src/main/java/org/myrobotlab/service/Llama.java +++ b/src/main/java/org/myrobotlab/service/Llama.java @@ -2,6 +2,7 @@ import de.kherud.llama.LlamaModel; import de.kherud.llama.Parameters; +import org.myrobotlab.framework.Platform; import org.myrobotlab.framework.Service; import org.myrobotlab.logging.Level; import org.myrobotlab.logging.LoggingFactory; @@ -37,7 +38,7 @@ public Llama(String reservedKey, String inId) { public void loadModel(String modelPath) { Parameters params = new Parameters.Builder() .setNGpuLayers(0) - .setNThreads(java.lang.Runtime.getRuntime().availableProcessors()) + .setNThreads(Platform.getLocalInstance().getNumPhysicalProcessors()) .setTemperature(0.7f) .setPenalizeNl(true) .setMirostat(Parameters.MiroStat.V2) From fafa9811078e4224639ba1736a6db9314c316269 Mon Sep 17 00:00:00 2001 From: Branden Butler Date: Sun, 10 Sep 2023 15:30:53 -0500 Subject: [PATCH 08/11] Add Whisper service and its deps, and regen pom --- pom.xml | 25 +++++++++++++++++++ .../java/org/myrobotlab/service/Whisper.java | 17 +++++++++++++ .../myrobotlab/service/meta/WhisperMeta.java | 10 ++++++++ 3 files changed, 52 insertions(+) create mode 100644 src/main/java/org/myrobotlab/service/Whisper.java create mode 100644 src/main/java/org/myrobotlab/service/meta/WhisperMeta.java diff --git a/pom.xml b/pom.xml index 0936d5a0df..e899706f5f 100644 --- a/pom.xml +++ b/pom.xml @@ -550,6 +550,22 @@ + + + net.java.jinput + jinput + 2.0.9 + provided + + + jinput-natives + jinput-natives + 2.0.7 + provided + zip + + + org.apache.kafka @@ -1653,6 +1669,15 @@ + + + io.github.givimad + whisper-jni + 1.4.2-6 + provided + + + diff --git a/src/main/java/org/myrobotlab/service/Whisper.java b/src/main/java/org/myrobotlab/service/Whisper.java new file mode 100644 index 0000000000..0dcecdeedf --- /dev/null +++ b/src/main/java/org/myrobotlab/service/Whisper.java @@ -0,0 +1,17 @@ +package org.myrobotlab.service; + +import org.myrobotlab.framework.Service; +import org.myrobotlab.service.config.ServiceConfig; + +public class Whisper extends Service { + /** + * Constructor of service, reservedkey typically is a services name and inId + * will be its process id + * + * @param reservedKey the service name + * @param inId process id + */ + public Whisper(String reservedKey, String inId) { + super(reservedKey, inId); + } +} diff --git a/src/main/java/org/myrobotlab/service/meta/WhisperMeta.java b/src/main/java/org/myrobotlab/service/meta/WhisperMeta.java new file mode 100644 index 0000000000..2b370d21ca --- /dev/null +++ b/src/main/java/org/myrobotlab/service/meta/WhisperMeta.java @@ -0,0 +1,10 @@ +package org.myrobotlab.service.meta; + +import org.myrobotlab.service.meta.abstracts.MetaData; + +public class WhisperMeta extends MetaData { + public WhisperMeta() { + addDescription("A local speech recognition service leveraging the popular whisper.cpp project."); + addDependency("io.github.givimad", "whisper-jni", "1.4.2-6"); + } +} From 11191b863544f76b669a04e0e13a76504c4f83aa Mon Sep 17 00:00:00 2001 From: Branden Butler Date: Sun, 10 Sep 2023 18:11:39 -0500 Subject: [PATCH 09/11] Implement basic Whisper transcription --- .../java/org/myrobotlab/service/Whisper.java | 180 +++++++++++++++++- .../abstracts/AbstractSpeechRecognizer.java | 42 ++-- .../service/config/WhisperConfig.java | 23 +++ 3 files changed, 219 insertions(+), 26 deletions(-) create mode 100644 src/main/java/org/myrobotlab/service/config/WhisperConfig.java diff --git a/src/main/java/org/myrobotlab/service/Whisper.java b/src/main/java/org/myrobotlab/service/Whisper.java index 0dcecdeedf..1e7f7b887f 100644 --- a/src/main/java/org/myrobotlab/service/Whisper.java +++ b/src/main/java/org/myrobotlab/service/Whisper.java @@ -1,9 +1,43 @@ package org.myrobotlab.service; -import org.myrobotlab.framework.Service; -import org.myrobotlab.service.config.ServiceConfig; +import io.github.givimad.whisperjni.WhisperContext; +import io.github.givimad.whisperjni.WhisperFullParams; +import io.github.givimad.whisperjni.WhisperJNI; +import org.myrobotlab.framework.Platform; +import org.myrobotlab.service.abstracts.AbstractSpeechRecognizer; +import org.myrobotlab.service.config.LlamaConfig; +import org.myrobotlab.service.config.WhisperConfig; +import org.myrobotlab.service.data.Locale; + +import javax.sound.sampled.AudioFormat; +import javax.sound.sampled.AudioSystem; +import javax.sound.sampled.Line; +import javax.sound.sampled.LineUnavailableException; +import javax.sound.sampled.Mixer; +import javax.sound.sampled.TargetDataLine; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.URL; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.ShortBuffer; +import java.nio.channels.Channels; +import java.nio.channels.FileChannel; +import java.nio.channels.ReadableByteChannel; +import java.nio.file.Path; +import java.util.Map; + +public class Whisper extends AbstractSpeechRecognizer { + private transient WhisperJNI whisper; + + private transient WhisperContext ctx; + + private transient WhisperFullParams params; + + private transient Thread listeningThread = new Thread(); + -public class Whisper extends Service { /** * Constructor of service, reservedkey typically is a services name and inId * will be its process id @@ -14,4 +48,144 @@ public class Whisper extends Service { public Whisper(String reservedKey, String inId) { super(reservedKey, inId); } + + public void loadModel(String modelPath) { + try { + whisper = new WhisperJNI(); + WhisperJNI.loadLibrary(); + ctx = whisper.init(Path.of(modelPath)); + } catch (IOException e) { + throw new RuntimeException(e); + } + + params = new WhisperFullParams(); + params.nThreads = Platform.getLocalInstance().getNumPhysicalProcessors(); + params.printRealtime = true; + params.printProgress = true; + + } + + public String findModelPath(String modelName) { + // First, we loop over all user-defined + // model directories + for (String dir : config.modelPaths) { + File path = new File(dir + fs + modelName); + if (path.exists()) { + return path.getAbsolutePath(); + } + } + + // Now, we check our data directory for any downloaded models + File path = new File(getDataDir() + fs + modelName); + if (path.exists()) { + return path.getAbsolutePath(); + } else if (config.modelUrls.containsKey(modelName)) { + // Model was not in data but we do have a URL for it + try (FileOutputStream fileOutputStream = new FileOutputStream(path)) { + ReadableByteChannel readableByteChannel = Channels.newChannel(new URL(config.modelUrls.get(modelName)).openStream()); + FileChannel fileChannel = fileOutputStream.getChannel(); + info("Downloading model %s to path %s from URL %s", modelName, path, config.modelUrls.get(modelName)); + fileChannel.transferFrom(readableByteChannel, 0, Long.MAX_VALUE); + } catch (IOException e) { + throw new RuntimeException(e); + } + return path.getAbsolutePath(); + } + // Cannot find the model anywhere + error("Could not locate model {}, add its URL to download it or add a directory where it is located", modelName); + return null; + } + + @Override + public void startListening() { + + listeningThread = new Thread(() -> { + AudioFormat format = new AudioFormat(16000.0f, 16, 1, true, false); + TargetDataLine microphone = null; + + Mixer.Info[] mixerInfos = AudioSystem.getMixerInfo(); + for (Mixer.Info info: mixerInfos){ + Mixer m = AudioSystem.getMixer(info); + Line.Info[] lineInfos = m.getTargetLineInfo(); + for (Line.Info lineInfo:lineInfos){ + System.out.println (info.getName()+"---"+lineInfo); + // Hard-code for my mic right now + if (info.getName().contains("U0x46d0x825")) { + try { + microphone = (TargetDataLine) m.getLine(lineInfo); + microphone.open(format); + System.out.println("Sample rate: " + format.getSampleRate()); + } catch (LineUnavailableException e) { + throw new RuntimeException(e); + } + } + + } + + } + + int numBytesRead; + + microphone.start(); + while(config.listening) { + int CHUNK_SIZE = (int)((format.getFrameSize() * format.getFrameRate())) * 5; + ByteBuffer captureBuffer = ByteBuffer.allocate(CHUNK_SIZE); + captureBuffer.order(ByteOrder.LITTLE_ENDIAN); + numBytesRead = microphone.read(captureBuffer.array(), 0, CHUNK_SIZE); + System.out.println("Num bytes read=" + numBytesRead); + ShortBuffer shortBuffer = captureBuffer.asShortBuffer(); + // transform the samples to f32 samples + float[] samples = new float[captureBuffer.capacity() / 2]; + int index = 0; + shortBuffer.position(0); + while (shortBuffer.hasRemaining()) { + samples[index++] = Float.max(-1f, Float.min(((float) shortBuffer.get()) / (float) Short.MAX_VALUE, 1f)); + } + int result = whisper.full(ctx, params, samples, samples.length); + if(result != 0) { + throw new RuntimeException("Transcription failed with code " + result); + } + int numSegments = whisper.fullNSegments(ctx); + System.out.println("Inference done, numSegments=" + numSegments); + for (int i = 0; i < numSegments; i++) { + System.out.println(whisper.fullGetSegmentText(ctx, i)); + invoke("publishRecognized", whisper.fullGetSegmentText(ctx, i)); + } + + } + microphone.close(); + }); + super.startListening(); + + listeningThread.start(); + } + + @Override + public WhisperConfig apply(WhisperConfig c) { + super.apply(c); + + if (config.selectedModel != null && !config.selectedModel.isEmpty()) { + String modelPath = findModelPath(config.selectedModel); + if (modelPath != null) { + loadModel(modelPath); + } else { + error("Could not find selected model {}", config.selectedModel); + } + } + + return config; + } + + /** + * locales this service supports - implementation can simply get + * runtime.getLocales() if acceptable or create their own locales + * + * @return map of string to locale + */ + @Override + public Map getLocales() { + return null; + } + + } diff --git a/src/main/java/org/myrobotlab/service/abstracts/AbstractSpeechRecognizer.java b/src/main/java/org/myrobotlab/service/abstracts/AbstractSpeechRecognizer.java index fa54857772..dcaf1b5cde 100644 --- a/src/main/java/org/myrobotlab/service/abstracts/AbstractSpeechRecognizer.java +++ b/src/main/java/org/myrobotlab/service/abstracts/AbstractSpeechRecognizer.java @@ -168,7 +168,7 @@ public void clearLock() { */ @Override public String getWakeWord() { - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; return c.wakeWord; } @@ -177,17 +177,16 @@ public String getWakeWord() { */ @Override public boolean isListening() { - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; return c.listening; } @Override @Deprecated /* use publishListening(boolean event) */ public void listeningEvent(Boolean event) { - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; c.listening = event; broadcastState(); - return; } @Override @@ -213,12 +212,12 @@ public void onEndSpeaking(String utterance) { // affect "recognizing" // FIXME - add a deta time after ... - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; if (c.afterSpeakingPauseMs > 0) { // remove previous one shot - because we are "sliding" the window of // stopping the publishing of recognized words - addTaskOneShot(c.afterSpeakingPauseMs, "setSpeaking", new Object[] { false }); + addTaskOneShot(c.afterSpeakingPauseMs, "setSpeaking", false); log.warn("isSpeaking = false will occur in {} ms", c.afterSpeakingPauseMs); } else { setSpeaking(false, null); @@ -233,17 +232,16 @@ public void onAudioStart(AudioData data) { purgeTask("setSpeaking"); // isSpeaking = true; setSpeaking(true, data.getFileName()); - return; } @Override public void onAudioEnd(AudioData data) { log.info("sound stopped {}", data); - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; if (c.afterSpeakingPauseMs > 0) { // remove previous one shot - because we are "sliding" the window of // stopping the publishing of recognized words - addTaskOneShot(c.afterSpeakingPauseMs, "setSpeaking", new Object[] { false }); + addTaskOneShot(c.afterSpeakingPauseMs, "setSpeaking", false); log.warn("isSpeaking = false will occur in {} ms", c.afterSpeakingPauseMs); } else { setSpeaking(false, null); @@ -264,7 +262,7 @@ public boolean setSpeaking(boolean b, String utterance) { ListeningEvent event = new ListeningEvent(); - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; event.isRecording = c.recording; event.isListening = c.listening; event.isAwake = isAwake; @@ -289,7 +287,6 @@ public void onStartSpeaking(String utterance) { purgeTask("setSpeaking"); // isSpeaking = true; setSpeaking(true, utterance); - return; } @Override @@ -304,11 +301,10 @@ public void pauseListening() { public ListeningEvent[] processResults(ListeningEvent[] results) { // at the moment its simply invoking other methods, but if a new speech // recognizer is created - it might need more processing - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; - for (int i = 0; i < results.length; ++i) { - ListeningEvent event = results[i]; + for (ListeningEvent event : results) { event.isRecording = c.recording; event.isListening = c.listening; event.isAwake = isAwake; @@ -366,7 +362,7 @@ public void setAwake(boolean b) { } public void setAwake(boolean b, String text) { - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; if (!b && isSpeaking) { log.info("bot is speaking - bot doesn't get tired when talking about self sliding idle timeout"); @@ -463,7 +459,7 @@ public void setLowerCase(boolean b) { */ @Override public void setWakeWord(String word) { - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; if (word == null || word.trim().length() == 0) { word = null; @@ -487,7 +483,7 @@ public void setWakeWord(String word) { * */ public void setWakeWordTimeout(Integer wakeWordTimeoutSeconds) { - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; c.wakeWordIdleTimeoutSeconds = wakeWordTimeoutSeconds; broadcastState(); } @@ -496,7 +492,7 @@ public void setWakeWordTimeout(Integer wakeWordTimeoutSeconds) { @Override public void startListening() { log.debug("Start listening event seen."); - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; c.listening = true; c.recording = true; broadcastState(); @@ -518,7 +514,7 @@ public void setAutoListen(Boolean value) { */ @Override public void startRecording() { - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; c.recording = true; broadcastState(); } @@ -531,7 +527,7 @@ public void startRecording() { @Override public void stopListening() { log.debug("stopListening()"); - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; c.listening = false; broadcastState(); } @@ -542,7 +538,7 @@ public void stopListening() { @Override public void stopRecording() { - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; c.recording = false; broadcastState(); } @@ -555,13 +551,13 @@ public void stopService() { } public long setAfterSpeakingPause(long ms) { - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; c.afterSpeakingPauseMs = ms; return c.afterSpeakingPauseMs; } public long getAfterSpeakingPause() { - SpeechRecognizerConfig c = (SpeechRecognizerConfig)config; + SpeechRecognizerConfig c = config; return c.afterSpeakingPauseMs; } diff --git a/src/main/java/org/myrobotlab/service/config/WhisperConfig.java b/src/main/java/org/myrobotlab/service/config/WhisperConfig.java new file mode 100644 index 0000000000..73b2f69e9d --- /dev/null +++ b/src/main/java/org/myrobotlab/service/config/WhisperConfig.java @@ -0,0 +1,23 @@ +package org.myrobotlab.service.config; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class WhisperConfig extends SpeechRecognizerConfig { + public String selectedModel = "ggml-tiny.en.bin"; + + public List modelPaths = new ArrayList<>(List.of( + + )); + + public Map modelUrls = new HashMap<>(Map.of( + "ggml-tiny.bin", "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin", + "ggml-small.bin", "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin", + "ggml-tiny.en.bin", "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin", + "ggml-small.en.bin", "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin", + "ggml-medium-q5_0.bin", "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin", + "ggml-medium.en-q5_0.bin", "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en-q5_0.bin" + )); +} From 96f39a02e2f03b1f508941b55b1dd55654e00fac Mon Sep 17 00:00:00 2001 From: Branden Butler Date: Tue, 19 Sep 2023 18:04:49 -0500 Subject: [PATCH 10/11] Update java-llama-cpp to 1.1.4 for bundled lib --- pom.xml | 2 +- src/main/java/org/myrobotlab/service/meta/LlamaMeta.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index e899706f5f..39e660949f 100644 --- a/pom.xml +++ b/pom.xml @@ -618,7 +618,7 @@ de.kherud llama - 1.1.1 + 1.1.4 provided diff --git a/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java b/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java index 6613cce631..12d1cef47c 100644 --- a/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java +++ b/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java @@ -10,6 +10,6 @@ public LlamaMeta() { "llama.cpp project. Can run most GGUF models." ); - addDependency("de.kherud", "llama", "1.1.1"); + addDependency("de.kherud", "llama", "1.1.4"); } } From 1af19221a7466c36ab59b90ef87d73a6f508447e Mon Sep 17 00:00:00 2001 From: Branden Butler Date: Sun, 22 Oct 2023 18:40:52 -0500 Subject: [PATCH 11/11] Update java-llama to 2.0 --- pom.xml | 13 ++++++++++-- .../java/org/myrobotlab/service/Llama.java | 21 ++++++++++++------- .../myrobotlab/service/meta/LlamaMeta.java | 2 +- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/pom.xml b/pom.xml index 39e660949f..3a9b2a51bd 100644 --- a/pom.xml +++ b/pom.xml @@ -13,6 +13,9 @@ # fast build mvn -DskipTests package -o + # execute + mvn exec:java -Dexec.mainClass=org.myrobotlab.service.Runtime -Dexec.args="-s webgui WebGui intro Intro python Python" + # specific test mvn test -Dtest="org.myrobotlab.service.WebGuiTest#postTest" @@ -80,7 +83,7 @@ ${maven.build.timestamp} yyyyMMddHHmm - ${version} + ${version} ${git.branch} ${NODE_NAME} ${NODE_LABELS} @@ -618,7 +621,7 @@ de.kherud llama - 1.1.4 + 2.2.1 provided @@ -1027,6 +1030,12 @@ jovr 1.8.0.0 provided + + + net.java.dev.jna + jna + + slick-util diff --git a/src/main/java/org/myrobotlab/service/Llama.java b/src/main/java/org/myrobotlab/service/Llama.java index 364ff1e75a..324407e6a1 100644 --- a/src/main/java/org/myrobotlab/service/Llama.java +++ b/src/main/java/org/myrobotlab/service/Llama.java @@ -1,7 +1,8 @@ package org.myrobotlab.service; +import de.kherud.llama.InferenceParameters; import de.kherud.llama.LlamaModel; -import de.kherud.llama.Parameters; +import de.kherud.llama.ModelParameters; import org.myrobotlab.framework.Platform; import org.myrobotlab.framework.Service; import org.myrobotlab.logging.Level; @@ -36,25 +37,29 @@ public Llama(String reservedKey, String inId) { } public void loadModel(String modelPath) { - Parameters params = new Parameters.Builder() + ModelParameters params = new ModelParameters.Builder() .setNGpuLayers(0) .setNThreads(Platform.getLocalInstance().getNumPhysicalProcessors()) - .setTemperature(0.7f) - .setPenalizeNl(true) - .setMirostat(Parameters.MiroStat.V2) - .setAntiPrompt(new String[]{config.userPrompt}) .build(); model = new LlamaModel(modelPath, params); } public Response getResponse(String text) { + InferenceParameters inferenceParameters = new InferenceParameters.Builder() + .setTemperature(0.7f) + .setPenalizeNl(true) + .setMirostat(InferenceParameters.MiroStat.V2) + .setAntiPrompt(new String[]{config.userPrompt}) + .build(); + + if (model == null) { error("Model is not loaded."); return null; } String prompt = config.systemPrompt + config.systemMessage + "\n" + text + "\n"; - String response = StreamSupport.stream(model.generate(prompt).spliterator(), false) + String response = StreamSupport.stream(model.generate(prompt, inferenceParameters).spliterator(), false) .map(LlamaModel.Output::toString) .reduce("", (a, b) -> a + b); @@ -132,7 +137,7 @@ public Response publishResponse(Response response) { } public void reset() { - model.reset(); + model.close(); } diff --git a/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java b/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java index 12d1cef47c..570f6e9fb6 100644 --- a/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java +++ b/src/main/java/org/myrobotlab/service/meta/LlamaMeta.java @@ -10,6 +10,6 @@ public LlamaMeta() { "llama.cpp project. Can run most GGUF models." ); - addDependency("de.kherud", "llama", "1.1.4"); + addDependency("de.kherud", "llama", "2.2.1"); } }