Skip to content

Commit e66f467

Browse files
authored
feat: Add llama.cpp provider and fim template (#118)
1 parent c9a3cda commit e66f467

13 files changed

+349
-21
lines changed

CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ add_qtc_plugin(QodeAssist
6969
templates/Llama2.hpp
7070
templates/CodeLlamaQMLFim.hpp
7171
templates/GoogleAI.hpp
72+
templates/LlamaCppFim.hpp
7273
providers/Providers.hpp
7374
providers/OllamaProvider.hpp providers/OllamaProvider.cpp
7475
providers/ClaudeProvider.hpp providers/ClaudeProvider.cpp
@@ -78,6 +79,7 @@ add_qtc_plugin(QodeAssist
7879
providers/OpenAICompatProvider.hpp providers/OpenAICompatProvider.cpp
7980
providers/OpenRouterAIProvider.hpp providers/OpenRouterAIProvider.cpp
8081
providers/GoogleAIProvider.hpp providers/GoogleAIProvider.cpp
82+
providers/LlamaCppProvider.hpp providers/LlamaCppProvider.cpp
8183
QodeAssist.qrc
8284
LSPCompletion.hpp
8385
LLMSuggestion.hpp LLMSuggestion.cpp

llmcore/ContextData.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ struct Message
3030
QString content;
3131

3232
// clang-format off
33-
auto operator<=>(const Message&) const = default;
33+
bool operator==(const Message&) const = default;
3434
// clang-format on
3535
};
3636

llmcore/ProviderID.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ enum class ProviderID {
2828
OpenAICompatible,
2929
MistralAI,
3030
OpenRouter,
31-
GoogleAI
31+
GoogleAI,
32+
LlamaCpp
3233
};
3334
}

providers/LlamaCppProvider.cpp

+213
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
/*
2+
* Copyright (C) 2024 Petr Mironychev
3+
*
4+
* This file is part of QodeAssist.
5+
*
6+
* QodeAssist is free software: you can redistribute it and/or modify
7+
* it under the terms of the GNU General Public License as published by
8+
* the Free Software Foundation, either version 3 of the License, or
9+
* (at your option) any later version.
10+
*
11+
* QodeAssist is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
* GNU General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU General Public License
17+
* along with QodeAssist. If not, see <https://www.gnu.org/licenses/>.
18+
*/
19+
20+
#include "LlamaCppProvider.hpp"
21+
22+
#include <QEventLoop>
23+
#include <QJsonArray>
24+
#include <QJsonDocument>
25+
#include <QJsonObject>
26+
#include <QNetworkReply>
27+
28+
#include "llmcore/OpenAIMessage.hpp"
29+
#include "llmcore/ValidationUtils.hpp"
30+
#include "logger/Logger.hpp"
31+
#include "settings/ChatAssistantSettings.hpp"
32+
#include "settings/CodeCompletionSettings.hpp"
33+
34+
namespace QodeAssist::Providers {
35+
36+
QString LlamaCppProvider::name() const
37+
{
38+
return "llama.cpp";
39+
}
40+
41+
QString LlamaCppProvider::url() const
42+
{
43+
return "http://localhost:8080";
44+
}
45+
46+
QString LlamaCppProvider::completionEndpoint() const
47+
{
48+
return "/infill";
49+
}
50+
51+
QString LlamaCppProvider::chatEndpoint() const
52+
{
53+
return "/v1/chat/completions";
54+
}
55+
56+
bool LlamaCppProvider::supportsModelListing() const
57+
{
58+
return false;
59+
}
60+
61+
void LlamaCppProvider::prepareRequest(
62+
QJsonObject &request,
63+
LLMCore::PromptTemplate *prompt,
64+
LLMCore::ContextData context,
65+
LLMCore::RequestType type)
66+
{
67+
if (!prompt->isSupportProvider(providerID())) {
68+
LOG_MESSAGE(QString("Template %1 doesn't support %2 provider").arg(name(), prompt->name()));
69+
}
70+
71+
prompt->prepareRequest(request, context);
72+
73+
auto applyModelParams = [&request](const auto &settings) {
74+
request["max_tokens"] = settings.maxTokens();
75+
request["temperature"] = settings.temperature();
76+
77+
if (settings.useTopP())
78+
request["top_p"] = settings.topP();
79+
if (settings.useTopK())
80+
request["top_k"] = settings.topK();
81+
if (settings.useFrequencyPenalty())
82+
request["frequency_penalty"] = settings.frequencyPenalty();
83+
if (settings.usePresencePenalty())
84+
request["presence_penalty"] = settings.presencePenalty();
85+
};
86+
87+
if (type == LLMCore::RequestType::CodeCompletion) {
88+
applyModelParams(Settings::codeCompletionSettings());
89+
} else {
90+
applyModelParams(Settings::chatAssistantSettings());
91+
}
92+
}
93+
94+
bool LlamaCppProvider::handleResponse(QNetworkReply *reply, QString &accumulatedResponse)
95+
{
96+
QByteArray data = reply->readAll();
97+
if (data.isEmpty()) {
98+
return false;
99+
}
100+
101+
bool isDone = data.contains("\"stop\":true") || data.contains("data: [DONE]");
102+
103+
QByteArrayList lines = data.split('\n');
104+
for (const QByteArray &line : lines) {
105+
if (line.trimmed().isEmpty()) {
106+
continue;
107+
}
108+
109+
if (line == "data: [DONE]") {
110+
isDone = true;
111+
continue;
112+
}
113+
114+
QByteArray jsonData = line;
115+
if (line.startsWith("data: ")) {
116+
jsonData = line.mid(6);
117+
}
118+
119+
QJsonParseError error;
120+
QJsonDocument doc = QJsonDocument::fromJson(jsonData, &error);
121+
if (doc.isNull()) {
122+
continue;
123+
}
124+
125+
QJsonObject obj = doc.object();
126+
127+
if (obj.contains("content")) {
128+
QString content = obj["content"].toString();
129+
if (!content.isEmpty()) {
130+
accumulatedResponse += content;
131+
}
132+
} else if (obj.contains("choices")) {
133+
auto message = LLMCore::OpenAIMessage::fromJson(obj);
134+
if (message.hasError()) {
135+
LOG_MESSAGE("Error in llama.cpp response: " + message.error);
136+
continue;
137+
}
138+
139+
QString content = message.getContent();
140+
if (!content.isEmpty()) {
141+
accumulatedResponse += content;
142+
}
143+
144+
if (message.isDone()) {
145+
isDone = true;
146+
}
147+
}
148+
149+
if (obj["stop"].toBool()) {
150+
isDone = true;
151+
}
152+
}
153+
154+
return isDone;
155+
}
156+
157+
QList<QString> LlamaCppProvider::getInstalledModels(const QString &url)
158+
{
159+
return {};
160+
}
161+
162+
QList<QString> LlamaCppProvider::validateRequest(
163+
const QJsonObject &request, LLMCore::TemplateType type)
164+
{
165+
if (type == LLMCore::TemplateType::FIM) {
166+
const auto infillReq = QJsonObject{
167+
{"model", {}},
168+
{"input_prefix", {}},
169+
{"input_suffix", {}},
170+
{"prompt", {}},
171+
{"temperature", {}},
172+
{"top_p", {}},
173+
{"top_k", {}},
174+
{"max_tokens", {}},
175+
{"frequency_penalty", {}},
176+
{"presence_penalty", {}},
177+
{"stop", QJsonArray{}},
178+
{"stream", {}}};
179+
180+
return LLMCore::ValidationUtils::validateRequestFields(request, infillReq);
181+
} else {
182+
const auto chatReq = QJsonObject{
183+
{"model", {}},
184+
{"messages", QJsonArray{{QJsonObject{{"role", {}}, {"content", {}}}}}},
185+
{"temperature", {}},
186+
{"max_tokens", {}},
187+
{"top_p", {}},
188+
{"top_k", {}},
189+
{"frequency_penalty", {}},
190+
{"presence_penalty", {}},
191+
{"stop", QJsonArray{}},
192+
{"stream", {}}};
193+
194+
return LLMCore::ValidationUtils::validateRequestFields(request, chatReq);
195+
}
196+
}
197+
198+
QString LlamaCppProvider::apiKey() const
199+
{
200+
return {};
201+
}
202+
203+
void LlamaCppProvider::prepareNetworkRequest(QNetworkRequest &networkRequest) const
204+
{
205+
networkRequest.setHeader(QNetworkRequest::ContentTypeHeader, "application/json");
206+
}
207+
208+
LLMCore::ProviderID LlamaCppProvider::providerID() const
209+
{
210+
return LLMCore::ProviderID::LlamaCpp;
211+
}
212+
213+
} // namespace QodeAssist::Providers

providers/LlamaCppProvider.hpp

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* Copyright (C) 2024 Petr Mironychev
3+
*
4+
* This file is part of QodeAssist.
5+
*
6+
* QodeAssist is free software: you can redistribute it and/or modify
7+
* it under the terms of the GNU General Public License as published by
8+
* the Free Software Foundation, either version 3 of the License, or
9+
* (at your option) any later version.
10+
*
11+
* QodeAssist is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
* GNU General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU General Public License
17+
* along with QodeAssist. If not, see <https://www.gnu.org/licenses/>.
18+
*/
19+
20+
#pragma once
21+
22+
#include "llmcore/Provider.hpp"
23+
24+
namespace QodeAssist::Providers {
25+
26+
class LlamaCppProvider : public LLMCore::Provider
27+
{
28+
public:
29+
QString name() const override;
30+
QString url() const override;
31+
QString completionEndpoint() const override;
32+
QString chatEndpoint() const override;
33+
bool supportsModelListing() const override;
34+
void prepareRequest(
35+
QJsonObject &request,
36+
LLMCore::PromptTemplate *prompt,
37+
LLMCore::ContextData context,
38+
LLMCore::RequestType type) override;
39+
bool handleResponse(QNetworkReply *reply, QString &accumulatedResponse) override;
40+
QList<QString> getInstalledModels(const QString &url) override;
41+
QList<QString> validateRequest(const QJsonObject &request, LLMCore::TemplateType type) override;
42+
QString apiKey() const override;
43+
void prepareNetworkRequest(QNetworkRequest &networkRequest) const override;
44+
LLMCore::ProviderID providerID() const override;
45+
};
46+
47+
} // namespace QodeAssist::Providers

providers/Providers.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "providers/ClaudeProvider.hpp"
2424
#include "providers/GoogleAIProvider.hpp"
2525
#include "providers/LMStudioProvider.hpp"
26+
#include "providers/LlamaCppProvider.hpp"
2627
#include "providers/MistralAIProvider.hpp"
2728
#include "providers/OllamaProvider.hpp"
2829
#include "providers/OpenAICompatProvider.hpp"
@@ -42,6 +43,7 @@ inline void registerProviders()
4243
providerManager.registerProvider<OpenRouterProvider>();
4344
providerManager.registerProvider<MistralAIProvider>();
4445
providerManager.registerProvider<GoogleAIProvider>();
46+
providerManager.registerProvider<LlamaCppProvider>();
4547
}
4648

4749
} // namespace QodeAssist::Providers

templates/Alpaca.hpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,11 @@ class Alpaca : public LLMCore::PromptTemplate
7575
bool isSupportProvider(LLMCore::ProviderID id) const override
7676
{
7777
switch (id) {
78-
case QodeAssist::LLMCore::ProviderID::Ollama:
79-
case QodeAssist::LLMCore::ProviderID::LMStudio:
80-
case QodeAssist::LLMCore::ProviderID::OpenRouter:
81-
case QodeAssist::LLMCore::ProviderID::OpenAICompatible:
78+
case LLMCore::ProviderID::Ollama:
79+
case LLMCore::ProviderID::LMStudio:
80+
case LLMCore::ProviderID::OpenRouter:
81+
case LLMCore::ProviderID::OpenAICompatible:
82+
case LLMCore::ProviderID::LlamaCpp:
8283
return true;
8384
default:
8485
return false;

templates/ChatML.hpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,11 @@ class ChatML : public LLMCore::PromptTemplate
7676
bool isSupportProvider(LLMCore::ProviderID id) const override
7777
{
7878
switch (id) {
79-
case QodeAssist::LLMCore::ProviderID::Ollama:
80-
case QodeAssist::LLMCore::ProviderID::LMStudio:
81-
case QodeAssist::LLMCore::ProviderID::OpenRouter:
82-
case QodeAssist::LLMCore::ProviderID::OpenAICompatible:
79+
case LLMCore::ProviderID::Ollama:
80+
case LLMCore::ProviderID::LMStudio:
81+
case LLMCore::ProviderID::OpenRouter:
82+
case LLMCore::ProviderID::OpenAICompatible:
83+
case LLMCore::ProviderID::LlamaCpp:
8384
return true;
8485
default:
8586
return false;

templates/Llama2.hpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,11 @@ class Llama2 : public LLMCore::PromptTemplate
7373
bool isSupportProvider(LLMCore::ProviderID id) const override
7474
{
7575
switch (id) {
76-
case QodeAssist::LLMCore::ProviderID::Ollama:
77-
case QodeAssist::LLMCore::ProviderID::LMStudio:
78-
case QodeAssist::LLMCore::ProviderID::OpenRouter:
79-
case QodeAssist::LLMCore::ProviderID::OpenAICompatible:
76+
case LLMCore::ProviderID::Ollama:
77+
case LLMCore::ProviderID::LMStudio:
78+
case LLMCore::ProviderID::OpenRouter:
79+
case LLMCore::ProviderID::OpenAICompatible:
80+
case LLMCore::ProviderID::LlamaCpp:
8081
return true;
8182
default:
8283
return false;

templates/Llama3.hpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,11 @@ class Llama3 : public LLMCore::PromptTemplate
8080
bool isSupportProvider(LLMCore::ProviderID id) const override
8181
{
8282
switch (id) {
83-
case QodeAssist::LLMCore::ProviderID::Ollama:
84-
case QodeAssist::LLMCore::ProviderID::LMStudio:
85-
case QodeAssist::LLMCore::ProviderID::OpenRouter:
86-
case QodeAssist::LLMCore::ProviderID::OpenAICompatible:
83+
case LLMCore::ProviderID::Ollama:
84+
case LLMCore::ProviderID::LMStudio:
85+
case LLMCore::ProviderID::OpenRouter:
86+
case LLMCore::ProviderID::OpenAICompatible:
87+
case LLMCore::ProviderID::LlamaCpp:
8788
return true;
8889
default:
8990
return false;

0 commit comments

Comments
 (0)