From 0e4602aa34f313a1ac215f2a252ca8332297e1c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jose=20Luis=20L=C3=B3pez=20L=C3=B3pez?=
 <joseluisll@gmail.com>
Date: Mon, 13 May 2024 12:42:46 +0200
Subject: [PATCH 1/8] =?UTF-8?q?Return=20quickly=20if=20there=20is=20no=20m?=
 =?UTF-8?q?odel=20in=20bearer=20token,=20an=20inputModel=20is=20specified?=
 =?UTF-8?q?=20but=20it=20doen=C2=B4t=20exist=20in=20modelPath?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jose Luis López López <joseluisll@gmail.com>
---
 core/http/ctx/fiber.go | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/core/http/ctx/fiber.go b/core/http/ctx/fiber.go
index ffb631112abe..6f3d6c06d59f 100644
--- a/core/http/ctx/fiber.go
+++ b/core/http/ctx/fiber.go
@@ -38,6 +38,17 @@ func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput stri
 	if bearerExists {
 		log.Debug().Msgf("Using model from bearer token: %s", bearer)
 		modelInput = bearer
+		//Takes precedence, then return inmediatly
+		return modelInput,nil
 	}
+
+	//if a model is specified in modelInput, and there is no bearertoken,but it doesn´t exists in the model path, return QUICKLY. https://github.com/mudler/LocalAI/issues/1076
+	modelExists := modelInput != "" && loader.ExistsInModelPath(modelInput)
+	if !modelExists{
+		log.Debug().Msgf("The specified model does not exist in the model PATH, returning error")
+			return "", fmt.Errorf("no model specified")
+	}
+
+	//A model is specified, or the first available model is picked
 	return modelInput, nil
 }

From 1766deec45a6cc2b4ef31273ca6a2bf8233f5ae3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Serta=C3=A7=20=C3=96zercan?=
 <852750+sozercan@users.noreply.github.com>
Date: Mon, 13 May 2024 02:37:52 -0700
Subject: [PATCH 2/8] feat: auto select llama-cpp cpu variant (#2305)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* auto select cpu variant

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>

* remove cuda target for now

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>

* fix metal

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>

* fix path

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>

---------

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
Signed-off-by: Jose Luis López López <joseluisll@gmail.com>
---
 Makefile                  |  42 +++++++-------
 go.mod                    |  77 ++++++++++++-------------
 go.sum                    |  18 +-----
 pkg/model/initializers.go | 114 +++++++++++++++++++++++++++-----------
 4 files changed, 140 insertions(+), 111 deletions(-)

diff --git a/Makefile b/Makefile
index 7f3b9cf84007..9725faf1bdfa 100644
--- a/Makefile
+++ b/Makefile
@@ -70,7 +70,7 @@ UNAME_S := $(shell uname -s)
 endif
 
 ifeq ($(OS),Darwin)
-	
+
 	ifeq ($(OSX_SIGNING_IDENTITY),)
 		OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
 	endif
@@ -154,8 +154,8 @@ endif
 
 ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
 ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
-ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
-ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-noavx
+ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
+ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
 ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
@@ -652,30 +652,30 @@ else
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
 endif
 
-backend-assets/grpc/llama-cpp: backend-assets/grpc
-	$(info ${GREEN}I llama-cpp build info:standard${RESET})
-	cp -rf backend/cpp/llama backend/cpp/llama-default
-	$(MAKE) -C backend/cpp/llama-default purge
-	$(MAKE) VARIANT="llama-default" build-llama-cpp-grpc-server
-	cp -rfv backend/cpp/llama-default/grpc-server backend-assets/grpc/llama-cpp
-# TODO: every binary should have its own folder instead, so can have different metal implementations
-ifeq ($(BUILD_TYPE),metal)
-	cp backend/cpp/llama-default/llama.cpp/build/bin/default.metallib backend-assets/grpc/
-endif
+backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
+	cp -rf backend/cpp/llama backend/cpp/llama-avx2
+	$(MAKE) -C backend/cpp/llama-avx2 purge
+	$(info ${GREEN}I llama-cpp build info:avx2${RESET})
+	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
 
-backend-assets/grpc/llama-cpp-noavx: backend-assets/grpc
-	cp -rf backend/cpp/llama backend/cpp/llama-noavx
-	$(MAKE) -C backend/cpp/llama-noavx purge
-	$(info ${GREEN}I llama-cpp build info:noavx${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF" $(MAKE) VARIANT="llama-noavx" build-llama-cpp-grpc-server
-	cp -rfv backend/cpp/llama-noavx/grpc-server backend-assets/grpc/llama-cpp-noavx
+backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
+	cp -rf backend/cpp/llama backend/cpp/llama-avx
+	$(MAKE) -C backend/cpp/llama-avx purge
+	$(info ${GREEN}I llama-cpp build info:avx${RESET})
+	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
 
 backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-fallback
 	$(MAKE) -C backend/cpp/llama-fallback purge
 	$(info ${GREEN}I llama-cpp build info:fallback${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
+	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
+# TODO: every binary should have its own folder instead, so can have different metal implementations
+ifeq ($(BUILD_TYPE),metal)
+	cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
+endif
 
 backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
@@ -719,7 +719,7 @@ docker:
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
 		-t $(DOCKER_IMAGE) .
-	
+
 docker-aio:
 	@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
 	docker build \
diff --git a/go.mod b/go.mod
index 75fd5a82afe2..833fea9f8c19 100644
--- a/go.mod
+++ b/go.mod
@@ -7,8 +7,11 @@ toolchain go1.22.2
 require (
 	github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf
 	github.com/Masterminds/sprig/v3 v3.2.3
+	github.com/alecthomas/kong v0.9.0
 	github.com/charmbracelet/glamour v0.7.0
+	github.com/chasefleming/elem-go v0.25.0
 	github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df
+	github.com/elliotchance/orderedmap/v2 v2.2.0
 	github.com/fsnotify/fsnotify v1.7.0
 	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e
 	github.com/go-audio/wav v1.1.0
@@ -18,10 +21,13 @@ require (
 	github.com/gofiber/swagger v1.0.0
 	github.com/gofiber/template/html/v2 v2.1.1
 	github.com/google/uuid v1.5.0
-	github.com/hashicorp/go-multierror v1.1.1
 	github.com/hpcloud/tail v1.0.0
 	github.com/imdario/mergo v0.3.16
+	github.com/jaypipes/ghw v0.12.0
+	github.com/joho/godotenv v1.5.1
+	github.com/klauspost/cpuid/v2 v2.2.7
 	github.com/mholt/archiver/v3 v3.5.1
+	github.com/microcosm-cc/bluemonday v1.0.26
 	github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c
 	github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af
 	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530
@@ -35,6 +41,7 @@ require (
 	github.com/russross/blackfriday v1.6.0
 	github.com/sashabaranov/go-openai v1.20.4
 	github.com/schollz/progressbar/v3 v3.13.1
+	github.com/shirou/gopsutil/v3 v3.23.9
 	github.com/stretchr/testify v1.9.0
 	github.com/swaggo/swag v1.16.3
 	github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701
@@ -43,23 +50,13 @@ require (
 	go.opentelemetry.io/otel/exporters/prometheus v0.42.0
 	go.opentelemetry.io/otel/metric v1.19.0
 	go.opentelemetry.io/otel/sdk/metric v1.19.0
+	golang.org/x/sys v0.19.0
 	google.golang.org/grpc v1.59.0
 	google.golang.org/protobuf v1.33.0
 	gopkg.in/yaml.v2 v2.4.0
 	gopkg.in/yaml.v3 v3.0.1
 )
 
-require (
-	github.com/go-ole/go-ole v1.2.6 // indirect
-	github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
-	github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
-	github.com/shirou/gopsutil/v3 v3.23.9
-	github.com/shoenig/go-m1cpu v0.1.6 // indirect
-	github.com/tklauser/go-sysconf v0.3.12 // indirect
-	github.com/tklauser/numcpus v0.6.1 // indirect
-	github.com/yusufpapurcu/wmi v1.2.3 // indirect
-)
-
 require (
 	github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect
 	github.com/KyleBanks/depth v1.2.1 // indirect
@@ -69,12 +66,12 @@ require (
 	github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect
 	github.com/StackExchange/wmi v1.2.1 // indirect
 	github.com/alecthomas/chroma/v2 v2.8.0 // indirect
+	github.com/andybalholm/brotli v1.0.5 // indirect
 	github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
 	github.com/aymerick/douceur v0.2.0 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cenkalti/backoff/v4 v4.1.3 // indirect
 	github.com/cespare/xxhash/v2 v2.2.0 // indirect
-	github.com/chasefleming/elem-go v0.25.0 // indirect
 	github.com/containerd/continuity v0.3.0 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/dlclark/regexp2 v1.8.1 // indirect
@@ -84,34 +81,45 @@ require (
 	github.com/docker/go-units v0.4.0 // indirect
 	github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
 	github.com/ghodss/yaml v1.0.0 // indirect
+	github.com/go-audio/audio v1.0.0 // indirect
+	github.com/go-audio/riff v1.0.0 // indirect
+	github.com/go-logr/logr v1.2.4 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/go-ole/go-ole v1.2.6 // indirect
 	github.com/go-openapi/jsonpointer v0.21.0 // indirect
 	github.com/go-openapi/jsonreference v0.21.0 // indirect
 	github.com/go-openapi/spec v0.21.0 // indirect
 	github.com/go-openapi/swag v0.23.0 // indirect
+	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
+	github.com/gofiber/contrib/fiberzerolog v1.0.0
 	github.com/gofiber/template v1.8.3 // indirect
 	github.com/gofiber/utils v1.1.0 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/protobuf v1.5.3 // indirect
 	github.com/golang/snappy v0.0.2 // indirect
+	github.com/google/go-cmp v0.6.0 // indirect
+	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
 	github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
 	github.com/gorilla/css v1.0.1 // indirect
 	github.com/huandu/xstrings v1.3.3 // indirect
-	github.com/jaypipes/ghw v0.12.0 // indirect
 	github.com/jaypipes/pcidb v1.0.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
-	github.com/klauspost/cpuid/v2 v2.2.7 // indirect
+	github.com/klauspost/compress v1.17.0 // indirect
 	github.com/klauspost/pgzip v1.2.5 // indirect
 	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
+	github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
 	github.com/mailru/easyjson v0.7.7 // indirect
+	github.com/mattn/go-colorable v0.1.13 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/mattn/go-runewidth v0.0.15 // indirect
 	github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
-	github.com/microcosm-cc/bluemonday v1.0.26 // indirect
 	github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
 	github.com/mitchellh/copystructure v1.0.0 // indirect
 	github.com/mitchellh/go-homedir v1.1.0 // indirect
 	github.com/mitchellh/mapstructure v1.5.0 // indirect
 	github.com/mitchellh/reflectwalk v1.0.0 // indirect
 	github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 // indirect
+	github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760
 	github.com/muesli/reflow v0.3.0 // indirect
 	github.com/muesli/termenv v0.15.2 // indirect
 	github.com/nwaples/rardecode v1.1.0 // indirect
@@ -123,53 +131,38 @@ require (
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pkoukk/tiktoken-go v0.1.2 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
+	github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
 	github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect
 	github.com/prometheus/common v0.44.0 // indirect
 	github.com/prometheus/procfs v0.11.1 // indirect
+	github.com/rivo/uniseg v0.2.0 // indirect
+	github.com/shoenig/go-m1cpu v0.1.6 // indirect
 	github.com/shopspring/decimal v1.2.0 // indirect
 	github.com/sirupsen/logrus v1.8.1 // indirect
 	github.com/spf13/cast v1.3.1 // indirect
 	github.com/swaggo/files/v2 v2.0.0 // indirect
+	github.com/tklauser/go-sysconf v0.3.12 // indirect
+	github.com/tklauser/numcpus v0.6.1 // indirect
 	github.com/ulikunitz/xz v0.5.9 // indirect
+	github.com/valyala/bytebufferpool v1.0.0 // indirect
+	github.com/valyala/tcplisten v1.0.0 // indirect
 	github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect
 	github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
 	github.com/xeipuuv/gojsonschema v1.2.0 // indirect
 	github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
 	github.com/yuin/goldmark v1.5.4 // indirect
 	github.com/yuin/goldmark-emoji v1.0.2 // indirect
+	github.com/yusufpapurcu/wmi v1.2.3 // indirect
 	go.opentelemetry.io/otel/sdk v1.19.0 // indirect
 	go.opentelemetry.io/otel/trace v1.19.0 // indirect
 	golang.org/x/crypto v0.22.0 // indirect
 	golang.org/x/mod v0.16.0 // indirect
+	golang.org/x/net v0.24.0 // indirect
 	golang.org/x/term v0.19.0 // indirect
+	golang.org/x/text v0.14.0 // indirect
+	golang.org/x/tools v0.19.0 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
 	gopkg.in/fsnotify.v1 v1.4.7 // indirect
 	gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
 	howett.net/plist v1.0.0 // indirect
 )
-
-require (
-	github.com/alecthomas/kong v0.9.0
-	github.com/andybalholm/brotli v1.0.5 // indirect
-	github.com/go-audio/audio v1.0.0 // indirect
-	github.com/go-audio/riff v1.0.0 // indirect
-	github.com/go-logr/logr v1.2.4 // indirect
-	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
-	github.com/gofiber/contrib/fiberzerolog v1.0.0
-	github.com/google/go-cmp v0.6.0 // indirect
-	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
-	github.com/hashicorp/errwrap v1.0.0 // indirect
-	github.com/joho/godotenv v1.5.1
-	github.com/klauspost/compress v1.17.0 // indirect
-	github.com/mattn/go-colorable v0.1.13 // indirect
-	github.com/mattn/go-isatty v0.0.20 // indirect
-	github.com/mattn/go-runewidth v0.0.15 // indirect
-	github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760
-	github.com/rivo/uniseg v0.2.0 // indirect
-	github.com/valyala/bytebufferpool v1.0.0 // indirect
-	github.com/valyala/tcplisten v1.0.0 // indirect
-	golang.org/x/net v0.24.0 // indirect
-	golang.org/x/sys v0.19.0 // indirect
-	golang.org/x/text v0.14.0 // indirect
-	golang.org/x/tools v0.19.0 // indirect
-)
diff --git a/go.sum b/go.sum
index 7f23b64d63ea..da2cef048842 100644
--- a/go.sum
+++ b/go.sum
@@ -67,6 +67,8 @@ github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df/go.mod h1:gWy7
 github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
 github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
 github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
+github.com/elliotchance/orderedmap/v2 v2.2.0 h1:7/2iwO98kYT4XkOjA9mBEIwvi4KpGB4cyHeOFOnj4Vk=
+github.com/elliotchance/orderedmap/v2 v2.2.0/go.mod h1:85lZyVbpGaGvHvnKa7Qhx7zncAdBIBq6u56Hb1PRU5Q=
 github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
 github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
 github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
@@ -149,14 +151,8 @@ github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3
 github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
 github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
-github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c=
 github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
 github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
-github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
-github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
-github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
-github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
 github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
 github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
@@ -299,8 +295,6 @@ github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJ
 github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
 github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
 github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
-github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A=
-github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
 github.com/rs/zerolog v1.32.0 h1:keLypqrlIjaFsbmJOBdB/qvyF8KEtCWHwobLp5l/mQ0=
 github.com/rs/zerolog v1.32.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
 github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww=
@@ -391,8 +385,6 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
-golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
-golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
 golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30=
 golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M=
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
@@ -411,8 +403,6 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v
 golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
 golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
-golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
-golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
 golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w=
 golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -450,16 +440,12 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
-golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
 golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
 golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
-golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8=
-golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
 golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q=
 golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index d5c77c3f390f..4ccc21145458 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -13,6 +13,9 @@ import (
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 	"github.com/phayes/freeport"
 	"github.com/rs/zerolog/log"
+	"golang.org/x/sys/cpu"
+
+	"github.com/elliotchance/orderedmap/v2"
 )
 
 var Aliases map[string]string = map[string]string{
@@ -24,8 +27,11 @@ var Aliases map[string]string = map[string]string{
 
 const (
 	LlamaGGML = "llama-ggml"
-	LLamaCPP  = "llama-cpp"
 
+	LLamaCPP         = "llama-cpp"
+	LLamaCPPCUDA12   = "llama-cpp-cuda12"
+	LLamaCPPAVX2     = "llama-cpp-avx2"
+	LLamaCPPAVX      = "llama-cpp-avx"
 	LLamaCPPFallback = "llama-cpp-fallback"
 
 	Gpt4AllLlamaBackend = "gpt4all-llama"
@@ -50,14 +56,14 @@ func backendPath(assetDir, backend string) string {
 
 // backendsInAssetDir returns the list of backends in the asset directory
 // that should be loaded
-func backendsInAssetDir(assetDir string) ([]string, error) {
+func backendsInAssetDir(assetDir string) (*orderedmap.OrderedMap[string, any], error) {
 	// Exclude backends from automatic loading
 	excludeBackends := []string{LocalStoreBackend}
 	entry, err := os.ReadDir(backendPath(assetDir, ""))
 	if err != nil {
 		return nil, err
 	}
-	var backends []string
+	backends := make(map[string][]string)
 ENTRY:
 	for _, e := range entry {
 		for _, exclude := range excludeBackends {
@@ -66,7 +72,28 @@ ENTRY:
 			}
 		}
 		if !e.IsDir() {
-			backends = append(backends, e.Name())
+			//backends = append(backends, e.Name())
+			if !strings.Contains(e.Name(), LLamaCPP) {
+				backends[e.Name()] = []string{}
+			}
+		}
+	}
+
+	foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback := false, false, false
+	if _, ok := backends[LLamaCPP]; !ok {
+		for _, e := range entry {
+			if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
+				backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX2)
+				foundLCPPAVX2 = true
+			}
+			if strings.Contains(e.Name(), LLamaCPPAVX) && !foundLCPPAVX {
+				backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX)
+				foundLCPPAVX = true
+			}
+			if strings.Contains(e.Name(), LLamaCPPFallback) && !foundLCPPFallback {
+				backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPFallback)
+				foundLCPPFallback = true
+			}
 		}
 	}
 
@@ -77,37 +104,40 @@ ENTRY:
 	// First has more priority
 	priorityList := []string{
 		// First llama.cpp and llama-ggml
-		LLamaCPP, LLamaCPPFallback, LlamaGGML, Gpt4All,
+		LLamaCPP, LlamaGGML, Gpt4All,
 	}
+
 	toTheEnd := []string{
 		// last has to be huggingface
 		LCHuggingFaceBackend,
 		// then bert embeddings
 		BertEmbeddingsBackend,
 	}
-	slices.Reverse(priorityList)
-	slices.Reverse(toTheEnd)
-
-	// order certain backends first
-	for _, b := range priorityList {
-		for i, be := range backends {
-			if be == b {
-				backends = append([]string{be}, append(backends[:i], backends[i+1:]...)...)
-				break
-			}
+
+	// create an ordered map
+	orderedBackends := orderedmap.NewOrderedMap[string, any]()
+	// add priorityList first
+	for _, p := range priorityList {
+		if _, ok := backends[p]; ok {
+			orderedBackends.Set(p, backends[p])
 		}
 	}
-	// make sure that some others are pushed at the end
-	for _, b := range toTheEnd {
-		for i, be := range backends {
-			if be == b {
-				backends = append(append(backends[:i], backends[i+1:]...), be)
-				break
+
+	for k, v := range backends {
+		if !slices.Contains(toTheEnd, k) {
+			if _, ok := orderedBackends.Get(k); !ok {
+				orderedBackends.Set(k, v)
 			}
 		}
 	}
 
-	return backends, nil
+	for _, t := range toTheEnd {
+		if _, ok := backends[t]; ok {
+			orderedBackends.Set(t, backends[t])
+		}
+	}
+
+	return orderedBackends, nil
 }
 
 // starts the grpcModelProcess for the backend, and returns a grpc client
@@ -159,6 +189,21 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 			}
 		} else {
 			grpcProcess := backendPath(o.assetDir, backend)
+
+			// for llama-cpp, check CPU capabilities and load the appropriate variant
+			if backend == LLamaCPP {
+				if cpu.X86.HasAVX2 {
+					log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
+					grpcProcess = backendPath(o.assetDir, LLamaCPPAVX2)
+				} else if cpu.X86.HasAVX {
+					log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
+					grpcProcess = backendPath(o.assetDir, LLamaCPPAVX)
+				} else {
+					log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
+					grpcProcess = backendPath(o.assetDir, LLamaCPPFallback)
+				}
+			}
+
 			// Check if the file exists
 			if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
 				return "", fmt.Errorf("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS", grpcProcess)
@@ -301,25 +346,30 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
 	var err error
 
 	// autoload also external backends
-	allBackendsToAutoLoad := []string{}
+	allBackendsToAutoLoad := orderedmap.NewOrderedMap[string, any]()
 	autoLoadBackends, err := backendsInAssetDir(o.assetDir)
 	if err != nil {
 		return nil, err
 	}
 	log.Debug().Msgf("Loading from the following backends (in order): %+v", autoLoadBackends)
-	allBackendsToAutoLoad = append(allBackendsToAutoLoad, autoLoadBackends...)
+
+	for _, k := range autoLoadBackends.Keys() {
+		v, _ := autoLoadBackends.Get(k)
+		allBackendsToAutoLoad.Set(k, v)
+	}
+
 	for _, b := range o.externalBackends {
-		allBackendsToAutoLoad = append(allBackendsToAutoLoad, b)
+		allBackendsToAutoLoad.Set(b, []string{})
 	}
 
 	if o.model != "" {
-		log.Info().Msgf("Trying to load the model '%s' with all the available backends: %s", o.model, strings.Join(allBackendsToAutoLoad, ", "))
+		log.Info().Msgf("Trying to load the model '%s' with the backend '%s'", o.model, allBackendsToAutoLoad.Keys())
 	}
 
-	for _, b := range allBackendsToAutoLoad {
-		log.Info().Msgf("[%s] Attempting to load", b)
+	for _, key := range allBackendsToAutoLoad.Keys() {
+		log.Info().Msgf("[%s] Attempting to load", key)
 		options := []Option{
-			WithBackendString(b),
+			WithBackendString(key),
 			WithModel(o.model),
 			WithLoadGRPCLoadModelOpts(o.gRPCOptions),
 			WithThreads(o.threads),
@@ -332,14 +382,14 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
 
 		model, modelerr := ml.BackendLoader(options...)
 		if modelerr == nil && model != nil {
-			log.Info().Msgf("[%s] Loads OK", b)
+			log.Info().Msgf("[%s] Loads OK", key)
 			return model, nil
 		} else if modelerr != nil {
 			err = errors.Join(err, modelerr)
-			log.Info().Msgf("[%s] Fails: %s", b, modelerr.Error())
+			log.Info().Msgf("[%s] Fails: %s", key, modelerr.Error())
 		} else if model == nil {
 			err = errors.Join(err, fmt.Errorf("backend returned no usable model"))
-			log.Info().Msgf("[%s] Fails: %s", b, "backend returned no usable model")
+			log.Info().Msgf("[%s] Fails: %s", key, "backend returned no usable model")
 		}
 	}
 

From 2e38c4ef12dcf5918cf24dfbef0a2e2250bbabe4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 13 May 2024 18:44:10 +0200
Subject: [PATCH 3/8] models(gallery): add aura-llama-Abliterated (#2309)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Jose Luis López López <joseluisll@gmail.com>
---
 gallery/index.yaml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 66faea5f6a10..d50885d758ba 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -370,6 +370,27 @@
     - filename: L3-Solana-8B-v1.q5_K_M.gguf
       sha256: 9b8cd2c3beaab5e4f82efd10e7d44f099ad40a4e0ee286ca9fce02c8eec26d2f
       uri: huggingface://Sao10K/L3-Solana-8B-v1-GGUF/L3-Solana-8B-v1.q5_K_M.gguf
+- !!merge <<: *llama3
+  name: "aura-llama-abliterated"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/AwLNDVB-GIY7k0wnVV_TX.png
+  license: apache-2.0
+  urls:
+    - https://huggingface.co/TheSkullery/Aura-Llama-Abliterated
+    - https://huggingface.co/mudler/Aura-Llama-Abliterated-Q4_K_M-GGUF
+  description: |
+    Aura-llama is using the methodology presented by SOLAR for scaling LLMs called depth up-scaling (DUS), which encompasses architectural modifications with continued pretraining. Using the solar paper as a base, I integrated Llama-3 weights into the upscaled layers, and In the future plan to continue training the model.
+
+    Aura-llama is a merge of the following models to create a base model to work from:
+
+        meta-llama/Meta-Llama-3-8B-Instruct
+        meta-llama/Meta-Llama-3-8B-Instruct
+  overrides:
+    parameters:
+      model: aura-llama-abliterated.Q4_K_M.gguf
+  files:
+    - filename: aura-llama-abliterated.Q4_K_M.gguf
+      sha256: ad4a16b90f1ffb5b49185b3fd00ed7adb1cda69c4fad0a1d987bd344ce601dcd
+      uri: huggingface://mudler/Aura-Llama-Abliterated-Q4_K_M-GGUF/aura-llama-abliterated.Q4_K_M.gguf
 - !!merge <<: *llama3
   name: "average_normie_l3_v1_8b-gguf-iq-imatrix"
   urls:

From 2f01e6f41adb19c98853eba44770088b6accf7eb Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 13 May 2024 18:44:25 +0200
Subject: [PATCH 4/8] models(gallery): add Bunny-llama (#2311)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Jose Luis López López <joseluisll@gmail.com>
---
 gallery/index.yaml | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index d50885d758ba..93cb489b62d6 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -852,6 +852,33 @@
     - filename: Llava_1.5_Llama3_mmproj_updated.gguf
       sha256: 4f2bb77ca60f2c932d1c6647d334f5d2cd71966c19e850081030c9883ef1906c
       uri: https://huggingface.co/ChaoticNeutrals/LLaVA-Llama-3-8B-mmproj-Updated/resolve/main/llava-v1.5-8B-Updated-Stop-Token/mmproj-model-f16.gguf
+- !!merge <<: *llama3
+  name: "bunny-llama-3-8b-v"
+  urls:
+    - https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf
+  description: |
+      Bunny is a family of lightweight but powerful multimodal models. It offers multiple plug-and-play vision encoders, like EVA-CLIP, SigLIP and language backbones, including Llama-3-8B, Phi-1.5, StableLM-2, Qwen1.5, MiniCPM and Phi-2. To compensate for the decrease in model size, we construct more informative training data by curated selection from a broader data source.
+
+      We provide Bunny-Llama-3-8B-V, which is built upon SigLIP and Llama-3-8B-Instruct. More details about this model can be found in GitHub.
+  icon: https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf/resolve/main/icon.png
+  tags:
+    - llm
+    - multimodal
+    - gguf
+    - gpu
+    - llama3
+    - cpu
+  overrides:
+    mmproj: Bunny-Llama-3-8B-Q4_K_M-mmproj.gguf
+    parameters:
+      model: Bunny-Llama-3-8B-Q4_K_M.gguf
+  files:
+    - filename: Bunny-Llama-3-8B-Q4_K_M-mmproj.gguf
+      sha256: 96d033387a91e56cf97fa5d60e02c0128ce07c8fa83aaaefb74ec40541615ea5
+      uri: huggingace://BAAI/Bunny-Llama-3-8B-V-gguf/mmproj-model-f16.gguf
+    - filename: Bunny-Llama-3-8B-Q4_K_M.gguf
+      sha256: 88f0a61f947dbf129943328be7262ae82e3a582a0c75e53544b07f70355a7c30
+      uri: huggingace://BAAI/Bunny-Llama-3-8B-V-gguf/ggml-model-Q4_K_M.gguf
 - !!merge <<: *llama3
   name: "llava-llama-3-8b-v1_1"
   description: |

From 78eba71c0f521117956ed279e179460965e485e5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 13 May 2024 18:44:44 +0200
Subject: [PATCH 5/8] models(gallery): add lumimaidv2 (#2312)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Jose Luis López López <joseluisll@gmail.com>
---
 gallery/index.yaml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 93cb489b62d6..f7dbf50fa753 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -572,6 +572,29 @@
     - filename: Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf
       sha256: 1199440aa13c55f5f2cad1cb215535306f21e52a81de23f80a9e3586c8ac1c50
       uri: huggingface://Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix/Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf
+- !!merge <<: *llama3
+  name: "llama-3-lumimaid-v2-8b-v0.1-oas-iq-imatrix"
+  urls:
+    - https://huggingface.co/Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/JUxfdTot7v7LTdIGYyzYM.png
+  license: cc-by-nc-4.0
+  description: |
+    This model uses the Llama3 prompting format.
+
+    Llama3 trained on our RP datasets, we tried to have a balance between the ERP and the RP, not too horny, but just enough.
+
+    We also added some non-RP dataset, making the model less dumb overall. It should look like a 40%/60% ratio for Non-RP/RP+ERP data.
+
+    "This model received the Orthogonal Activation Steering treatment, meaning it will rarely refuse any request."
+
+    This is v2!
+  overrides:
+    parameters:
+      model: v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf
+  files:
+    - filename: v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf
+      sha256: b00b4cc2ea4e06db592e5f581171758387106626bcbf445c03a1cb7b424be881
+      uri: huggingface://Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix/v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf
 - !!merge <<: *llama3
   name: "suzume-llama-3-8B-multilingual"
   urls:

From 118ff66930727d762e55243de373fbaa4be807b5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 13 May 2024 18:45:58 +0200
Subject: [PATCH 6/8] models(gallery): add orthocopter (#2313)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Jose Luis López López <joseluisll@gmail.com>
---
 gallery/index.yaml | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index f7dbf50fa753..88431cdd1f45 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -485,6 +485,24 @@
     - filename: Llama-3-Unholy-8B.q8_0.gguf
       uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q8_0.gguf
       sha256: 419dd76f61afe586076323c17c3a1c983e591472717f1ea178167ede4dc864df
+- !!merge <<: *llama3
+  name: "orthocopter_8b-imatrix"
+  urls:
+    - https://huggingface.co/Lewdiculous/Orthocopter_8B-GGUF-Imatrix
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/cxM5EaC6ilXnSo_10stA8.png
+  description: |
+    This model is thanks to the hard work of lucyknada with the Edgerunners. Her work produced the following model, which I used as the base:
+
+    https://huggingface.co/Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-10fail-1000total
+
+    I then applied two handwritten datasets over top of this and the results are pretty nice, with no refusals and plenty of personality.
+  overrides:
+    parameters:
+      model: Orthocopter_8B-Q4_K_M-imat.gguf
+  files:
+    - filename: Orthocopter_8B-Q4_K_M-imat.gguf
+      uri: huggingface://Lewdiculous/Orthocopter_8B-GGUF-Imatrix/Orthocopter_8B-Q4_K_M-imat.gguf
+      sha256: ce93366c9eb20329530b19b9d6841a973d458bcdcfa8a521e9f9d0660cc94578
 - !!merge <<: *llama3
   name: "therapyllama-8b-v1"
   urls:

From 52bd3dc2ef41b2b7f77f5febc61eda324729ee66 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 13 May 2024 19:07:51 +0200
Subject: [PATCH 7/8] feat(llama.cpp): add `flash_attention` and
 `no_kv_offloading` (#2310)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

feat(llama.cpp): add flash_attn and no_kv_offload

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Jose Luis López López <joseluisll@gmail.com>
---
 backend/backend.proto             | 3 +++
 backend/cpp/llama/grpc-server.cpp | 3 +++
 core/backend/options.go           | 2 ++
 core/config/backend_config.go     | 3 +++
 4 files changed, 11 insertions(+)

diff --git a/backend/backend.proto b/backend/backend.proto
index 778a96ffac97..cb87fe02d46f 100644
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -212,6 +212,9 @@ message ModelOptions {
   float YarnBetaSlow = 47;
 
   string Type = 49;
+
+  bool FlashAttention = 56;
+  bool NoKVOffload = 57;
 }
 
 message Result {
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index 6fb086585f4e..f9673b33ccfa 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -2254,6 +2254,9 @@ static void params_parse(const backend::ModelOptions* request,
     }
     params.use_mlock = request->mlock();
     params.use_mmap = request->mmap();
+    params.flash_attn = request->flashattention();
+    params.no_kv_offload = request->nokvoffload();
+
     params.embedding = request->embeddings();
 
     if (request->ropescaling() == "none")   { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
diff --git a/core/backend/options.go b/core/backend/options.go
index 4a7435e68586..c638ebd5ed83 100644
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -77,6 +77,8 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		MaxModelLen:          int32(c.MaxModelLen),
 		TensorParallelSize:   int32(c.TensorParallelSize),
 		MMProj:               c.MMProj,
+		FlashAttention:       c.FlashAttention,
+		NoKVOffload:          c.NoKVOffloading,
 		YarnExtFactor:        c.YarnExtFactor,
 		YarnAttnFactor:       c.YarnAttnFactor,
 		YarnBetaFast:         c.YarnBetaFast,
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index cb1b7c2a360b..41c792fb1da3 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -132,6 +132,9 @@ type LLMConfig struct {
 	TensorParallelSize   int     `yaml:"tensor_parallel_size"`   // vLLM
 	MMProj               string  `yaml:"mmproj"`
 
+	FlashAttention bool `yaml:"flash_attention"`
+	NoKVOffloading bool `yaml:"no_kv_offloading"`
+
 	RopeScaling string `yaml:"rope_scaling"`
 	ModelType   string `yaml:"type"`
 

From fa02b567cff3e84fa24a362a38d3d4ca548f108d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jose=20Luis=20L=C3=B3pez=20L=C3=B3pez?=
 <joseluisll@gmail.com>
Date: Mon, 13 May 2024 20:39:53 +0200
Subject: [PATCH 8/8] better message
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Modified the message to report that the specified model doesn´t exist in the model Path.

Signed-off-by: Jose Luis López López <joseluisll@gmail.com>
---
 core/http/ctx/fiber.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/http/ctx/fiber.go b/core/http/ctx/fiber.go
index 6f3d6c06d59f..5b2e1e0702e2 100644
--- a/core/http/ctx/fiber.go
+++ b/core/http/ctx/fiber.go
@@ -46,7 +46,7 @@ func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput stri
 	modelExists := modelInput != "" && loader.ExistsInModelPath(modelInput)
 	if !modelExists{
 		log.Debug().Msgf("The specified model does not exist in the model PATH, returning error")
-			return "", fmt.Errorf("no model specified")
+			return "", fmt.Errorf("invalid model filename")
 	}
 
 	//A model is specified, or the first available model is picked