Skip to content

Commit 0cd8d48

Browse files
committed
Merge branch 'concedo_experimental' into croco_nex_0
2 parents 1820d97 + f4e2f4b commit 0cd8d48

File tree

7 files changed

+149
-134
lines changed

7 files changed

+149
-134
lines changed

Diff for: Makefile

+118-118
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,14 @@ endif
3030
# Mac OS + Arm can report x86_64
3131
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
3232
ifeq ($(UNAME_S),Darwin)
33-
ifneq ($(UNAME_P),arm)
34-
SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
35-
ifeq ($(SYSCTL_M),1)
36-
# UNAME_P := arm
37-
# UNAME_M := arm64
38-
warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
39-
endif
40-
endif
33+
ifneq ($(UNAME_P),arm)
34+
SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
35+
ifeq ($(SYSCTL_M),1)
36+
# UNAME_P := arm
37+
# UNAME_M := arm64
38+
warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
39+
endif
40+
endif
4141
endif
4242

4343
#
@@ -112,9 +112,9 @@ ifeq ($(UNAME_S),Darwin)
112112
CFLAGS += -pthread
113113
CXXFLAGS += -pthread
114114
CLANG_VER = $(shell clang -v 2>&1 | head -n 1 | awk 'BEGIN {FS="[. ]"};{print $$1 $$2 $$4}')
115-
ifeq ($(CLANG_VER),Appleclang15)
116-
LDFLAGS += -ld_classic
117-
endif
115+
ifeq ($(CLANG_VER),Appleclang15)
116+
LDFLAGS += -ld_classic
117+
endif
118118
endif
119119
ifeq ($(UNAME_S),FreeBSD)
120120
CFLAGS += -pthread
@@ -149,48 +149,48 @@ CXXV := $(shell $(CXX) --version | head -n 1)
149149
# TODO: probably these flags need to be tweaked on some architectures
150150
# feel free to update the Makefile for your architecture and send a pull request or issue
151151
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
152-
# Use all CPU extensions that are available:
152+
# Use all CPU extensions that are available:
153153
# old library NEEDS mf16c to work. so we must build with it. new one doesnt
154-
ifeq ($(OS),Windows_NT)
155-
ifdef LLAMA_PORTABLE
156-
CFLAGS +=
157-
NONECFLAGS +=
158-
SIMPLECFLAGS += -mavx -msse3 -mssse3
159-
SIMPLERCFLAGS += -msse3 -mssse3
160-
ifdef LLAMA_NOAVX2
161-
FULLCFLAGS += -msse3 -mssse3 -mavx
162-
else
163-
FULLCFLAGS += -mavx2 -msse3 -mssse3 -mfma -mf16c -mavx
164-
endif
165-
else
166-
CFLAGS += -march=native -mtune=native
167-
endif
168-
else
169-
ifdef LLAMA_PORTABLE
170-
CFLAGS +=
171-
NONECFLAGS +=
172-
SIMPLECFLAGS += -mavx -msse3 -mssse3
173-
SIMPLERCFLAGS += -msse3 -mssse3
174-
ifdef LLAMA_NOAVX2
175-
FULLCFLAGS += -msse3 -mssse3 -mavx
176-
else
177-
FULLCFLAGS += -mavx2 -msse3 -mssse3 -mfma -mf16c -mavx
178-
endif
179-
else
180-
CFLAGS += -march=native -mtune=native
181-
endif
182-
endif
154+
ifeq ($(OS),Windows_NT)
155+
ifdef LLAMA_PORTABLE
156+
CFLAGS +=
157+
NONECFLAGS +=
158+
SIMPLECFLAGS += -mavx -msse3 -mssse3
159+
SIMPLERCFLAGS += -msse3 -mssse3
160+
ifdef LLAMA_NOAVX2
161+
FULLCFLAGS += -msse3 -mssse3 -mavx
162+
else
163+
FULLCFLAGS += -mavx2 -msse3 -mssse3 -mfma -mf16c -mavx
164+
endif
165+
else
166+
CFLAGS += -march=native -mtune=native
167+
endif
168+
else
169+
ifdef LLAMA_PORTABLE
170+
CFLAGS +=
171+
NONECFLAGS +=
172+
SIMPLECFLAGS += -mavx -msse3 -mssse3
173+
SIMPLERCFLAGS += -msse3 -mssse3
174+
ifdef LLAMA_NOAVX2
175+
FULLCFLAGS += -msse3 -mssse3 -mavx
176+
else
177+
FULLCFLAGS += -mavx2 -msse3 -mssse3 -mfma -mf16c -mavx
178+
endif
179+
else
180+
CFLAGS += -march=native -mtune=native
181+
endif
182+
endif
183183
endif
184184

185185
ifndef LLAMA_NO_ACCELERATE
186-
# Mac M1 - include Accelerate framework.
187-
# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
188-
ifeq ($(UNAME_S),Darwin)
189-
CFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
190-
CXXFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
191-
LDFLAGS += -framework Accelerate
192-
OBJS += ggml-blas.o
193-
endif
186+
# Mac M1 - include Accelerate framework.
187+
# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
188+
ifeq ($(UNAME_S),Darwin)
189+
CFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
190+
CXXFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
191+
LDFLAGS += -framework Accelerate
192+
OBJS += ggml-blas.o
193+
endif
194194
endif
195195

196196
# it is recommended to use the CMAKE file to build for cublas if you can - will likely work better
@@ -315,17 +315,17 @@ ggml_v3-cuda.o: otherarch/ggml_v3-cuda.cu otherarch/ggml_v3-cuda.h
315315
endif # LLAMA_CUBLAS
316316

317317
ifdef LLAMA_HIPBLAS
318-
ifeq ($(wildcard /opt/rocm),)
319-
ROCM_PATH ?= /usr
320-
GPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
321-
HCC := $(ROCM_PATH)/bin/hipcc
322-
HCXX := $(ROCM_PATH)/bin/hipcc
323-
else
324-
ROCM_PATH ?= /opt/rocm
325-
GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
326-
HCC := $(ROCM_PATH)/llvm/bin/clang
327-
HCXX := $(ROCM_PATH)/llvm/bin/clang++
328-
endif
318+
ifeq ($(wildcard /opt/rocm),)
319+
ROCM_PATH ?= /usr
320+
GPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
321+
HCC := $(ROCM_PATH)/bin/hipcc
322+
HCXX := $(ROCM_PATH)/bin/hipcc
323+
else
324+
ROCM_PATH ?= /opt/rocm
325+
GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
326+
HCC := $(ROCM_PATH)/llvm/bin/clang
327+
HCXX := $(ROCM_PATH)/llvm/bin/clang++
328+
endif
329329
LLAMA_CUDA_DMMV_X ?= 32
330330
LLAMA_CUDA_MMV_Y ?= 1
331331
LLAMA_CUDA_KQUANTS_ITER ?= 2
@@ -370,26 +370,26 @@ ggml-metal.o: ggml/src/ggml-metal/ggml-metal.m ggml/src/ggml-metal/ggml-metal-im
370370
endif # LLAMA_METAL
371371

372372
ifneq ($(filter aarch64%,$(UNAME_M)),)
373-
# Apple M1, M2, etc.
374-
# Raspberry Pi 3, 4, Zero 2 (64-bit)
375-
ifdef LLAMA_PORTABLE
376-
CFLAGS +=
377-
CXXFLAGS +=
378-
else
379-
# sve is cooked on termux so we are disabling it
380-
ifeq ($(UNAME_O), Android)
381-
ifneq ($(findstring clang, $(CCV)), )
382-
CFLAGS += -mcpu=native+nosve
383-
CXXFLAGS += -mcpu=native+nosve
384-
else
385-
CFLAGS += -mcpu=native
386-
CXXFLAGS += -mcpu=native
387-
endif
388-
else
389-
CFLAGS += -mcpu=native
390-
CXXFLAGS += -mcpu=native
391-
endif
392-
endif
373+
# Apple M1, M2, etc.
374+
# Raspberry Pi 3, 4, Zero 2 (64-bit)
375+
ifdef LLAMA_PORTABLE
376+
CFLAGS +=
377+
CXXFLAGS +=
378+
else
379+
# sve is cooked on termux so we are disabling it
380+
ifeq ($(UNAME_O), Android)
381+
ifneq ($(findstring clang, $(CCV)), )
382+
CFLAGS += -mcpu=native+nosve
383+
CXXFLAGS += -mcpu=native+nosve
384+
else
385+
CFLAGS += -mcpu=native
386+
CXXFLAGS += -mcpu=native
387+
endif
388+
else
389+
CFLAGS += -mcpu=native
390+
CXXFLAGS += -mcpu=native
391+
endif
392+
endif
393393
endif
394394

395395
ifneq ($(filter armv6%,$(UNAME_M)),)
@@ -409,10 +409,10 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
409409
endif
410410
ifneq ($(filter ppc64%,$(UNAME_M)),)
411411
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
412-
ifneq (,$(findstring POWER9,$(POWER9_M)))
413-
CFLAGS += -mcpu=power9
414-
CXXFLAGS += -mcpu=power9
415-
endif
412+
ifneq (,$(findstring POWER9,$(POWER9_M)))
413+
CFLAGS += -mcpu=power9
414+
CXXFLAGS += -mcpu=power9
415+
endif
416416
endif
417417

418418

@@ -427,49 +427,49 @@ NOTIFY_MSG =
427427

428428
ifeq ($(OS),Windows_NT)
429429
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
430-
ifdef LLAMA_PORTABLE
430+
ifdef LLAMA_PORTABLE
431431
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
432432
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
433-
endif
433+
endif
434434

435-
ifdef LLAMA_CLBLAST
435+
ifdef LLAMA_CLBLAST
436436
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS)
437-
endif
438-
ifdef LLAMA_VULKAN
437+
endif
438+
ifdef LLAMA_VULKAN
439439
VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ lib/vulkan-1.lib -shared -o $@.dll $(LDFLAGS)
440-
endif
441-
442-
ifdef LLAMA_CUBLAS
443-
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.dll $(CUBLASLD_FLAGS) $(LDFLAGS)
444-
endif
445-
ifdef LLAMA_HIPBLAS
446-
HIPBLAS_BUILD = $(HCXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o $@.dll $(HIPLDFLAGS) $(LDFLAGS)
447-
endif
440+
endif
441+
442+
ifdef LLAMA_CUBLAS
443+
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.dll $(CUBLASLD_FLAGS) $(LDFLAGS)
444+
endif
445+
ifdef LLAMA_HIPBLAS
446+
HIPBLAS_BUILD = $(HCXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o $@.dll $(HIPLDFLAGS) $(LDFLAGS)
447+
endif
448448
else
449449
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
450-
ifdef LLAMA_PORTABLE
451-
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
450+
ifdef LLAMA_PORTABLE
451+
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
452452
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
453453
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
454-
endif
455-
endif
456-
457-
ifdef LLAMA_CLBLAST
458-
ifeq ($(UNAME_S),Darwin)
459-
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -shared -o $@.so $(LDFLAGS)
460-
else
461-
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -shared -o $@.so $(LDFLAGS)
462-
endif
463-
endif
464-
ifdef LLAMA_CUBLAS
465-
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.so $(CUBLASLD_FLAGS) $(LDFLAGS)
466-
endif
467-
ifdef LLAMA_HIPBLAS
468-
HIPBLAS_BUILD = $(HCXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o $@.so $(HIPLDFLAGS) $(LDFLAGS)
469-
endif
470-
ifdef LLAMA_VULKAN
471-
VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ -lvulkan -shared -o $@.so $(LDFLAGS)
472-
endif
454+
endif
455+
endif
456+
457+
ifdef LLAMA_CLBLAST
458+
ifeq ($(UNAME_S),Darwin)
459+
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -shared -o $@.so $(LDFLAGS)
460+
else
461+
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -shared -o $@.so $(LDFLAGS)
462+
endif
463+
endif
464+
ifdef LLAMA_CUBLAS
465+
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.so $(CUBLASLD_FLAGS) $(LDFLAGS)
466+
endif
467+
ifdef LLAMA_HIPBLAS
468+
HIPBLAS_BUILD = $(HCXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o $@.so $(HIPLDFLAGS) $(LDFLAGS)
469+
endif
470+
ifdef LLAMA_VULKAN
471+
VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ -lvulkan -shared -o $@.so $(LDFLAGS)
472+
endif
473473
endif
474474

475475
ifndef LLAMA_CLBLAST

Diff for: examples/llava/clip.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -1744,7 +1744,7 @@ uint8_t* scale_down_image(uint8_t* input_image, int& nx, int& ny, int nc, int ma
17441744
return resized_image;
17451745
}
17461746

1747-
bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img) {
1747+
bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img, const int maxdims) {
17481748
int nx, ny, nc;
17491749
auto * data = stbi_load_from_memory(bytes, bytes_length, &nx, &ny, &nc, 3);
17501750
if (!data) {
@@ -1753,7 +1753,6 @@ bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length
17531753
}
17541754

17551755
float maxaspect = 4.0f;
1756-
int maxdims = 2048;
17571756

17581757
//check if image needs downscaling
17591758
if (nx > maxdims || ny > maxdims) {

Diff for: examples/llava/clip.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch * batch);
7676
CLIP_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img);
7777

7878
/** interpret bytes as an image file with length bytes_length, and use the result to populate img */
79-
CLIP_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img);
79+
CLIP_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img, const int maxdims);
8080

8181
/** preprocess img and store the result in res_imgs, pad_to_square may be overridden to false depending on model configuration */
8282
CLIP_API bool clip_image_preprocess(struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32_batch * res_imgs );

Diff for: examples/llava/llava.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,7 @@ bool llava_eval_image_embed(llama_context * ctx_llama, const struct llava_image_
473473

474474
struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length) {
475475
clip_image_u8 * img = clip_image_u8_init();
476-
if (!clip_image_load_from_bytes(image_bytes, image_bytes_length, img)) {
476+
if (!clip_image_load_from_bytes(image_bytes, image_bytes_length, img, 2048)) {
477477
clip_image_u8_free(img);
478478
LOG_ERR("%s: can't load image from bytes, is it a valid image?", __func__);
479479
return NULL;

Diff for: expose.h

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ struct load_model_inputs
4545
const int draft_gpulayers = 999;
4646
const float draft_gpusplit[tensor_split_max] = {};
4747
const char * mmproj_filename = nullptr;
48+
const int visionmaxres = 2048;
4849
const bool use_mmap = false;
4950
const bool use_mlock = false;
5051
const bool use_smartcontext = false;

Diff for: gpttype_adapter.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ static clip_image_u8 * clp_img_data = nullptr; //most recent image
103103
static std::vector<llava_image> llava_images;
104104
static std::string llava_composite_image_signature = ""; //for identifying when the llava images change, we need to invalidate the cache
105105
static int current_llava_identifier = LLAVA_TOKEN_IDENTIFIER_A;
106+
static int vision_max_res = 2048;
106107

107108
static kcpp_params * kcpp_data = nullptr;
108109
static int max_context_limit_at_load = 0;
@@ -1951,6 +1952,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
19511952
= gpt2_ctx_v1.hparams.n_ctx = gpt2_ctx_v2.hparams.n_ctx = gpt2_ctx_v3.hparams.n_ctx
19521953
= mpt_ctx_v3.hparams.n_ctx = kcpp_data->n_ctx;
19531954

1955+
vision_max_res = inputs.visionmaxres;
1956+
19541957
//determine rope scaling params
19551958
float rope_freq_scale = 1.0f;
19561959
float rope_freq_base = 10000.0f;
@@ -2198,7 +2201,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
21982201
}
21992202
if(file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL)
22002203
{
2201-
printf("Qwen2VL detected! Mrope will be used!\n");
2204+
printf("Qwen2VL detected! Mrope will be used, and context shift will be disabled!\n");
2205+
kcpp_data->use_contextshift = false;
22022206
}
22032207
model_params.main_gpu = cu_parseinfo_maindevice;
22042208

@@ -3432,7 +3436,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
34323436
{
34333437
std::string llava_image = llava_images[i].b64data;
34343438
const std::vector<uint8_t> image_buffer = kcpp_base64_decode(llava_image);
3435-
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), clp_img_data))
3439+
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), clp_img_data, vision_max_res))
34363440
{
34373441
//failed to load image
34383442
printf("\nError: Clip image %d failed to load!",i);

0 commit comments

Comments
 (0)