@@ -30,14 +30,14 @@ endif
30
30
# Mac OS + Arm can report x86_64
31
31
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
32
32
ifeq ($(UNAME_S ) ,Darwin)
33
- ifneq ($(UNAME_P),arm)
34
- SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
35
- ifeq ($(SYSCTL_M),1)
36
- # UNAME_P := arm
37
- # UNAME_M := arm64
38
- warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\# issuecomment-1282546789)
39
- endif
40
- endif
33
+ ifneq ($(UNAME_P ) ,arm)
34
+ SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
35
+ ifeq ($(SYSCTL_M ) ,1)
36
+ # UNAME_P := arm
37
+ # UNAME_M := arm64
38
+ warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\# issuecomment-1282546789)
39
+ endif
40
+ endif
41
41
endif
42
42
43
43
#
@@ -112,9 +112,9 @@ ifeq ($(UNAME_S),Darwin)
112
112
CFLAGS += -pthread
113
113
CXXFLAGS += -pthread
114
114
CLANG_VER = $(shell clang -v 2>&1 | head -n 1 | awk 'BEGIN {FS="[. ]"};{print $$1 $$2 $$4}')
115
- ifeq ($(CLANG_VER),Appleclang15)
116
- LDFLAGS += -ld_classic
117
- endif
115
+ ifeq ($(CLANG_VER ) ,Appleclang15)
116
+ LDFLAGS += -ld_classic
117
+ endif
118
118
endif
119
119
ifeq ($(UNAME_S ) ,FreeBSD)
120
120
CFLAGS += -pthread
@@ -149,48 +149,48 @@ CXXV := $(shell $(CXX) --version | head -n 1)
149
149
# TODO: probably these flags need to be tweaked on some architectures
150
150
# feel free to update the Makefile for your architecture and send a pull request or issue
151
151
ifeq ($(UNAME_M ) ,$(filter $(UNAME_M ) ,x86_64 i686 amd64) )
152
- # Use all CPU extensions that are available:
152
+ # Use all CPU extensions that are available:
153
153
# old library NEEDS mf16c to work. so we must build with it. new one doesnt
154
- ifeq ($(OS),Windows_NT)
155
- ifdef LLAMA_PORTABLE
156
- CFLAGS +=
157
- NONECFLAGS +=
158
- SIMPLECFLAGS += -mavx -msse3 -mssse3
159
- SIMPLERCFLAGS += -msse3 -mssse3
160
- ifdef LLAMA_NOAVX2
161
- FULLCFLAGS += -msse3 -mssse3 -mavx
162
- else
163
- FULLCFLAGS += -mavx2 -msse3 -mssse3 -mfma -mf16c -mavx
164
- endif
165
- else
166
- CFLAGS += -march=native -mtune=native
167
- endif
168
- else
169
- ifdef LLAMA_PORTABLE
170
- CFLAGS +=
171
- NONECFLAGS +=
172
- SIMPLECFLAGS += -mavx -msse3 -mssse3
173
- SIMPLERCFLAGS += -msse3 -mssse3
174
- ifdef LLAMA_NOAVX2
175
- FULLCFLAGS += -msse3 -mssse3 -mavx
176
- else
177
- FULLCFLAGS += -mavx2 -msse3 -mssse3 -mfma -mf16c -mavx
178
- endif
179
- else
180
- CFLAGS += -march=native -mtune=native
181
- endif
182
- endif
154
+ ifeq ($(OS ) ,Windows_NT)
155
+ ifdef LLAMA_PORTABLE
156
+ CFLAGS +=
157
+ NONECFLAGS +=
158
+ SIMPLECFLAGS += -mavx -msse3 -mssse3
159
+ SIMPLERCFLAGS += -msse3 -mssse3
160
+ ifdef LLAMA_NOAVX2
161
+ FULLCFLAGS += -msse3 -mssse3 -mavx
162
+ else
163
+ FULLCFLAGS += -mavx2 -msse3 -mssse3 -mfma -mf16c -mavx
164
+ endif
165
+ else
166
+ CFLAGS += -march=native -mtune=native
167
+ endif
168
+ else
169
+ ifdef LLAMA_PORTABLE
170
+ CFLAGS +=
171
+ NONECFLAGS +=
172
+ SIMPLECFLAGS += -mavx -msse3 -mssse3
173
+ SIMPLERCFLAGS += -msse3 -mssse3
174
+ ifdef LLAMA_NOAVX2
175
+ FULLCFLAGS += -msse3 -mssse3 -mavx
176
+ else
177
+ FULLCFLAGS += -mavx2 -msse3 -mssse3 -mfma -mf16c -mavx
178
+ endif
179
+ else
180
+ CFLAGS += -march=native -mtune=native
181
+ endif
182
+ endif
183
183
endif
184
184
185
185
ifndef LLAMA_NO_ACCELERATE
186
- # Mac M1 - include Accelerate framework.
187
- # `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
188
- ifeq ($(UNAME_S),Darwin)
189
- CFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
190
- CXXFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
191
- LDFLAGS += -framework Accelerate
192
- OBJS += ggml-blas.o
193
- endif
186
+ # Mac M1 - include Accelerate framework.
187
+ # `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
188
+ ifeq ($(UNAME_S ) ,Darwin)
189
+ CFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
190
+ CXXFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
191
+ LDFLAGS += -framework Accelerate
192
+ OBJS += ggml-blas.o
193
+ endif
194
194
endif
195
195
196
196
# it is recommended to use the CMAKE file to build for cublas if you can - will likely work better
@@ -315,17 +315,17 @@ ggml_v3-cuda.o: otherarch/ggml_v3-cuda.cu otherarch/ggml_v3-cuda.h
315
315
endif # LLAMA_CUBLAS
316
316
317
317
ifdef LLAMA_HIPBLAS
318
- ifeq ($(wildcard /opt/rocm),)
319
- ROCM_PATH ?= /usr
320
- GPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
321
- HCC := $(ROCM_PATH)/bin/hipcc
322
- HCXX := $(ROCM_PATH)/bin/hipcc
323
- else
324
- ROCM_PATH ?= /opt/rocm
325
- GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
326
- HCC := $(ROCM_PATH)/llvm/bin/clang
327
- HCXX := $(ROCM_PATH)/llvm/bin/clang++
328
- endif
318
+ ifeq ($(wildcard /opt/rocm) ,)
319
+ ROCM_PATH ?= /usr
320
+ GPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
321
+ HCC := $(ROCM_PATH)/bin/hipcc
322
+ HCXX := $(ROCM_PATH)/bin/hipcc
323
+ else
324
+ ROCM_PATH ?= /opt/rocm
325
+ GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
326
+ HCC := $(ROCM_PATH)/llvm/bin/clang
327
+ HCXX := $(ROCM_PATH)/llvm/bin/clang++
328
+ endif
329
329
LLAMA_CUDA_DMMV_X ?= 32
330
330
LLAMA_CUDA_MMV_Y ?= 1
331
331
LLAMA_CUDA_KQUANTS_ITER ?= 2
@@ -370,26 +370,26 @@ ggml-metal.o: ggml/src/ggml-metal/ggml-metal.m ggml/src/ggml-metal/ggml-metal-im
370
370
endif # LLAMA_METAL
371
371
372
372
ifneq ($(filter aarch64% ,$(UNAME_M ) ) ,)
373
- # Apple M1, M2, etc.
374
- # Raspberry Pi 3, 4, Zero 2 (64-bit)
375
- ifdef LLAMA_PORTABLE
376
- CFLAGS +=
377
- CXXFLAGS +=
378
- else
379
- # sve is cooked on termux so we are disabling it
380
- ifeq ($(UNAME_O), Android)
381
- ifneq ($(findstring clang, $(CCV)), )
382
- CFLAGS += -mcpu=native+nosve
383
- CXXFLAGS += -mcpu=native+nosve
384
- else
385
- CFLAGS += -mcpu=native
386
- CXXFLAGS += -mcpu=native
387
- endif
388
- else
389
- CFLAGS += -mcpu=native
390
- CXXFLAGS += -mcpu=native
391
- endif
392
- endif
373
+ # Apple M1, M2, etc.
374
+ # Raspberry Pi 3, 4, Zero 2 (64-bit)
375
+ ifdef LLAMA_PORTABLE
376
+ CFLAGS +=
377
+ CXXFLAGS +=
378
+ else
379
+ # sve is cooked on termux so we are disabling it
380
+ ifeq ($(UNAME_O ) , Android)
381
+ ifneq ($(findstring clang, $(CCV ) ) , )
382
+ CFLAGS += -mcpu=native+nosve
383
+ CXXFLAGS += -mcpu=native+nosve
384
+ else
385
+ CFLAGS += -mcpu=native
386
+ CXXFLAGS += -mcpu=native
387
+ endif
388
+ else
389
+ CFLAGS += -mcpu=native
390
+ CXXFLAGS += -mcpu=native
391
+ endif
392
+ endif
393
393
endif
394
394
395
395
ifneq ($(filter armv6% ,$(UNAME_M ) ) ,)
@@ -409,10 +409,10 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
409
409
endif
410
410
ifneq ($(filter ppc64% ,$(UNAME_M ) ) ,)
411
411
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
412
- ifneq (,$(findstring POWER9,$(POWER9_M)))
413
- CFLAGS += -mcpu=power9
414
- CXXFLAGS += -mcpu=power9
415
- endif
412
+ ifneq (,$(findstring POWER9,$(POWER9_M ) ) )
413
+ CFLAGS += -mcpu=power9
414
+ CXXFLAGS += -mcpu=power9
415
+ endif
416
416
endif
417
417
418
418
@@ -427,49 +427,49 @@ NOTIFY_MSG =
427
427
428
428
ifeq ($(OS ) ,Windows_NT)
429
429
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
430
- ifdef LLAMA_PORTABLE
430
+ ifdef LLAMA_PORTABLE
431
431
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
432
432
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
433
- endif
433
+ endif
434
434
435
- ifdef LLAMA_CLBLAST
435
+ ifdef LLAMA_CLBLAST
436
436
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS)
437
- endif
438
- ifdef LLAMA_VULKAN
437
+ endif
438
+ ifdef LLAMA_VULKAN
439
439
VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ lib/vulkan-1.lib -shared -o $@.dll $(LDFLAGS)
440
- endif
441
-
442
- ifdef LLAMA_CUBLAS
443
- CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.dll $(CUBLASLD_FLAGS) $(LDFLAGS)
444
- endif
445
- ifdef LLAMA_HIPBLAS
446
- HIPBLAS_BUILD = $(HCXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o $@.dll $(HIPLDFLAGS) $(LDFLAGS)
447
- endif
440
+ endif
441
+
442
+ ifdef LLAMA_CUBLAS
443
+ CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.dll $(CUBLASLD_FLAGS) $(LDFLAGS)
444
+ endif
445
+ ifdef LLAMA_HIPBLAS
446
+ HIPBLAS_BUILD = $(HCXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o $@.dll $(HIPLDFLAGS) $(LDFLAGS)
447
+ endif
448
448
else
449
449
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
450
- ifdef LLAMA_PORTABLE
451
- ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
450
+ ifdef LLAMA_PORTABLE
451
+ ifeq ($(UNAME_M ) ,$(filter $(UNAME_M ) ,x86_64 i686 amd64) )
452
452
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
453
453
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
454
- endif
455
- endif
456
-
457
- ifdef LLAMA_CLBLAST
458
- ifeq ($(UNAME_S),Darwin)
459
- CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -shared -o $@.so $(LDFLAGS)
460
- else
461
- CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -shared -o $@.so $(LDFLAGS)
462
- endif
463
- endif
464
- ifdef LLAMA_CUBLAS
465
- CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.so $(CUBLASLD_FLAGS) $(LDFLAGS)
466
- endif
467
- ifdef LLAMA_HIPBLAS
468
- HIPBLAS_BUILD = $(HCXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o $@.so $(HIPLDFLAGS) $(LDFLAGS)
469
- endif
470
- ifdef LLAMA_VULKAN
471
- VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ -lvulkan -shared -o $@.so $(LDFLAGS)
472
- endif
454
+ endif
455
+ endif
456
+
457
+ ifdef LLAMA_CLBLAST
458
+ ifeq ($(UNAME_S ) ,Darwin)
459
+ CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -shared -o $@.so $(LDFLAGS)
460
+ else
461
+ CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -shared -o $@.so $(LDFLAGS)
462
+ endif
463
+ endif
464
+ ifdef LLAMA_CUBLAS
465
+ CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.so $(CUBLASLD_FLAGS) $(LDFLAGS)
466
+ endif
467
+ ifdef LLAMA_HIPBLAS
468
+ HIPBLAS_BUILD = $(HCXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o $@.so $(HIPLDFLAGS) $(LDFLAGS)
469
+ endif
470
+ ifdef LLAMA_VULKAN
471
+ VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ -lvulkan -shared -o $@.so $(LDFLAGS)
472
+ endif
473
473
endif
474
474
475
475
ifndef LLAMA_CLBLAST
0 commit comments