Skip to content

Commit

Permalink
manual merge
Browse files Browse the repository at this point in the history
Signed-off-by: Dave Lee <dave@gray101.com>
  • Loading branch information
dave-gray101 committed Feb 5, 2025
2 parents 6b8d969 + 28a1310 commit 6050d80
Show file tree
Hide file tree
Showing 25 changed files with 429 additions and 32 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/notify-models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
- uses: GrantBirki/git-diff-action@v2.8.0
id: git-diff-action
with:
json_diff_file_output: diff.json
Expand Down Expand Up @@ -99,7 +99,7 @@ jobs:
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
- uses: GrantBirki/git-diff-action@v2.8.0
id: git-diff-action
with:
json_diff_file_output: diff.json
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=53debe6f3c9cca87e9520a83ee8c14d88977afa4
CPPLLAMA_VERSION?=3ec9fd4b77b6aca03a3c2bf678eae3f9517d6904

# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
Expand Down
7 changes: 7 additions & 0 deletions backend/backend.proto
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,11 @@ message Reply {
double timing_token_generation = 5;
}

message GrammarTrigger {
string word = 1;
bool at_start = 2;
}

message ModelOptions {
string Model = 1;
int32 ContextSize = 2;
Expand Down Expand Up @@ -247,6 +252,8 @@ message ModelOptions {

string CacheTypeKey = 63;
string CacheTypeValue = 64;

repeated GrammarTrigger GrammarTriggers = 65;
}

message Result {
Expand Down
32 changes: 32 additions & 0 deletions backend/cpp/llama/grpc-server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,9 @@ struct llama_server_context
bool add_bos_token = true;
bool has_eos_token = true;

bool grammar_lazy = false;
std::vector<common_grammar_trigger> grammar_trigger_words;

int32_t n_ctx; // total context for all clients / slots

// system prompt
Expand Down Expand Up @@ -706,6 +709,8 @@ struct llama_server_context
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
slot->sparams.grammar_trigger_words = grammar_trigger_words;
slot->sparams.grammar_lazy = grammar_lazy;

if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
// Might be better to reject the request with a 400 ?
Expand Down Expand Up @@ -2374,6 +2379,21 @@ static void params_parse(const backend::ModelOptions* request,
if ( request->ropefreqscale() != 0.0f ) {
params.rope_freq_scale = request->ropefreqscale();
}

if (request->grammartriggers_size() > 0) {
LOG_INFO("configuring grammar triggers", {});
llama.grammar_lazy = true;
for (int i = 0; i < request->grammartriggers_size(); i++) {
common_grammar_trigger trigger;
trigger.word = request->grammartriggers(i).word();
trigger.at_start = request->grammartriggers(i).at_start();
llama.grammar_trigger_words.push_back(trigger);
LOG_INFO("grammar trigger", {
{ "word", trigger.word },
{ "at_start", trigger.at_start }
});
}
}
}


Expand Down Expand Up @@ -2522,6 +2542,18 @@ class BackendServiceImpl final : public backend::Backend::Service {
return grpc::Status::OK;
}

grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){
json data = parse_options(false, request, llama);

std::vector<llama_token> tokens = llama.tokenize(data["prompt"],false);

for (int i=0 ; i< tokens.size(); i++){
response->add_tokens(tokens[i]);
}

return grpc::Status::OK;
}

grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
llama_client_slot* active_slot = llama.get_active_slot();

Expand Down
2 changes: 1 addition & 1 deletion backend/python/transformers/requirements-cpu.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ accelerate
transformers
bitsandbytes
outetts
sentence-transformers==3.4.0
sentence-transformers==3.4.1
2 changes: 1 addition & 1 deletion backend/python/transformers/requirements-cublas11.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ accelerate
transformers
bitsandbytes
outetts
sentence-transformers==3.4.0
sentence-transformers==3.4.1
2 changes: 1 addition & 1 deletion backend/python/transformers/requirements-cublas12.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ numba==0.60.0
transformers
bitsandbytes
outetts
sentence-transformers==3.4.0
sentence-transformers==3.4.1
2 changes: 1 addition & 1 deletion backend/python/transformers/requirements-hipblas.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ numba==0.60.0
bitsandbytes
outetts
bitsandbytes
sentence-transformers==3.4.0
sentence-transformers==3.4.1
2 changes: 1 addition & 1 deletion backend/python/transformers/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ numba==0.60.0
intel-extension-for-transformers
bitsandbytes
outetts
sentence-transformers==3.4.0
sentence-transformers==3.4.1
10 changes: 10 additions & 0 deletions core/backend/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,19 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
nGPULayers = *c.NGPULayers
}

triggers := make([]*pb.GrammarTrigger, 0)
for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers {
triggers = append(triggers, &pb.GrammarTrigger{
Word: t.Word,
AtStart: t.AtStart,
})

}

return &pb.ModelOptions{
CUDA: c.CUDA || c.Diffusers.CUDA,
SchedulerType: c.Diffusers.SchedulerType,
GrammarTriggers: triggers,
PipelineType: c.Diffusers.PipelineType,
CFGScale: c.CFGScale,
LoraAdapter: c.LoraAdapter,
Expand Down
4 changes: 4 additions & 0 deletions core/backend/tokenize.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
return schema.TokenizeResponse{}, err
}

if resp.Tokens == nil {
resp.Tokens = make([]int32, 0)
}

return schema.TokenizeResponse{
Tokens: resp.Tokens,
}, nil
Expand Down
6 changes: 4 additions & 2 deletions core/gallery/models_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@ var _ = Describe("Model test", func() {
defer os.RemoveAll(tempdir)

gallery := []GalleryModel{{
Name: "bert",
URL: bertEmbeddingsURL,
Metadata: Metadata{
Name: "bert",
URL: bertEmbeddingsURL,
},
}}
out, err := yaml.Marshal(gallery)
Expect(err).ToNot(HaveOccurred())
Expand Down
12 changes: 8 additions & 4 deletions core/gallery/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,21 @@ import (
// It is used to install the model by resolving the URL and downloading the files.
// The other fields are used to override the configuration of the model.
type GalleryModel struct {
Metadata `json:",inline" yaml:",inline"`
// config_file is read in the situation where URL is blank - and therefore this is a base config.
ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
// Overrides are used to override the configuration of the model located at URL
Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
}

type Metadata struct {
URL string `json:"url,omitempty" yaml:"url,omitempty"`
Name string `json:"name,omitempty" yaml:"name,omitempty"`
Description string `json:"description,omitempty" yaml:"description,omitempty"`
License string `json:"license,omitempty" yaml:"license,omitempty"`
URLs []string `json:"urls,omitempty" yaml:"urls,omitempty"`
Icon string `json:"icon,omitempty" yaml:"icon,omitempty"`
Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"`
// config_file is read in the situation where URL is blank - and therefore this is a base config.
ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
// Overrides are used to override the configuration of the model located at URL
Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
// AdditionalFiles are used to add additional files to the model
AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
// Gallery is a reference to the gallery which contains the model
Expand Down
6 changes: 5 additions & 1 deletion core/gallery/request_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@ import (
var _ = Describe("Gallery API tests", func() {
Context("requests", func() {
It("parses github with a branch", func() {
req := GalleryModel{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"}
req := GalleryModel{
Metadata: Metadata{
URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main",
},
}
e, err := GetGalleryConfigFromURL(req.URL, "")
Expect(err).ToNot(HaveOccurred())
Expect(e.Name).To(Equal("gpt4all-j"))
Expand Down
16 changes: 10 additions & 6 deletions core/http/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,14 +299,18 @@ var _ = Describe("API test", func() {

g := []gallery.GalleryModel{
{
Name: "bert",
URL: bertEmbeddingsURL,
Metadata: gallery.Metadata{
Name: "bert",
URL: bertEmbeddingsURL,
},
},
{
Name: "bert2",
URL: bertEmbeddingsURL,
Overrides: map[string]interface{}{"foo": "bar"},
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
Metadata: gallery.Metadata{
Name: "bert2",
URL: bertEmbeddingsURL,
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
},
Overrides: map[string]interface{}{"foo": "bar"},
},
}
out, err := yaml.Marshal(g)
Expand Down
18 changes: 12 additions & 6 deletions core/http/endpoints/localai/gallery.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,19 +117,25 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
// @Router /models/available [get]
func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)

models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
if err != nil {
return err
}
log.Debug().Msgf("Models found from galleries: %+v", models)
for _, m := range models {
log.Debug().Msgf("Model found from galleries: %+v", m)

log.Debug().Msgf("Available %d models from %d galleries\n", len(models), len(mgs.galleries))

m := []gallery.Metadata{}

for _, mm := range models {
m = append(m, mm.Metadata)
}
dat, err := json.Marshal(models)

log.Debug().Msgf("Models %#v", m)

dat, err := json.Marshal(m)
if err != nil {
return err
return fmt.Errorf("could not marshal models: %w", err)
}
return c.Send(dat)
}
Expand Down
2 changes: 1 addition & 1 deletion core/http/endpoints/localai/tokenize.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

// TokenizeEndpoint exposes a REST API to tokenize the content
// @Summary Tokenize the input.
// @Param request body schema.TokenizeRequest true "Request"
// @Success 200 {object} schema.TokenizeResponse "Response"
// @Router /v1/tokenize [post]
func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
Expand All @@ -29,7 +30,6 @@ func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, app
if err != nil {
return err
}

return ctx.JSON(tokenResponse)
}
}
23 changes: 22 additions & 1 deletion docs/content/docs/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ icon = "info"
</a>
</p>

<p align="center">
<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
</p>

<p align="center">
<a href="https://twitter.com/LocalAI_API" target="blank">
<img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
Expand Down Expand Up @@ -118,7 +122,24 @@ To help the project you can:

## 🌟 Star history

[![LocalAI Star history Chart](https://api.star-history.com/svg?repos=go-skynet/LocalAI&type=Date)](https://star-history.com/#go-skynet/LocalAI&Date)
[![LocalAI Star history Chart](https://api.star-history.com/svg?repos=mudler/LocalAI&type=Date)](https://star-history.com/#mudler/LocalAI&Date)

## ❤️ Sponsors

> Do you find LocalAI useful?
Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.

A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):

<p align="center">
<a href="https://www.spectrocloud.com/" target="blank">
<img width=200 src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
</a>
<a href="https://www.premai.io/" target="blank">
<img width=200 src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
</a>
</p>

## 📖 License

Expand Down
2 changes: 1 addition & 1 deletion docs/themes/hugo-theme-relearn
Loading

0 comments on commit 6050d80

Please sign in to comment.