Skip to content

Commit 3faf670

Browse files
committed
try to add ios-xcode-build
1 parent 05c55e5 commit 3faf670

File tree

5 files changed

+49
-28
lines changed

5 files changed

+49
-28
lines changed

Package.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ var sources = [
1414
"ggml/src/ggml-backend.cpp",
1515
"ggml/src/ggml-quants.c",
1616
"ggml/src/ggml-aarch64.c",
17+
"ggml/src/ggml-fp8.cpp",
1718
]
1819

1920
var resources: [Resource] = []
@@ -75,5 +76,5 @@ let package = Package(
7576
linkerSettings: linkerSettings
7677
)
7778
],
78-
cxxLanguageStandard: .cxx11
79+
cxxLanguageStandard: .cxx17
7980
)

ggml/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,11 +183,11 @@ option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
183183
set(CMAKE_C_STANDARD 11)
184184
set(CMAKE_C_STANDARD_REQUIRED true)
185185

186-
#if (GGML_SYCL)
186+
if (GGML_SYCL)
187187
set(CMAKE_CXX_STANDARD 17)
188-
#else()
189-
# set(CMAKE_CXX_STANDARD 11)
190-
#endif()
188+
else()
189+
set(CMAKE_CXX_STANDARD 11)
190+
endif()
191191
set(CMAKE_CXX_STANDARD_REQUIRED true)
192192

193193
set(THREADS_PREFER_PTHREAD_FLAG ON)

ggml/src/CMakeLists.txt

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,10 @@ if (GGML_OPENMP_SIMD)
178178
# OpenMP_RUNTIME_MSVC=experimental / if (MSVC)
179179
message(STATUS "Using openmp_simd.")
180180
add_compile_definitions(GGML_USE_OPENMP_SIMD)
181-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp-simd")
181+
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp-simd")
182+
set(OPENMP_SIMD_FLAGS -fopenmp-simd)
182183
else()
183-
message(FATAL_ERROR, "C++ compiler lacks OPENMP_SIMD support.")
184+
message(WARNING "C++ compiler lacks OPENMP_SIMD support.")
184185
endif()
185186
endif()
186187

@@ -1377,6 +1378,13 @@ endif()
13771378
file(GLOB GGML_HEADERS_FP8 "ggml-fp8.h")
13781379
file(GLOB GGML_SOURCES_FP8 "ggml-fp8.cpp")
13791380

1381+
#set_source_files_properties( ${GGML_SOURCES_FP8} PROPERTIES CXX_STANDARD 17)
1382+
#set_source_files_properties( ${GGML_SOURCES_FP8} PROPERTIES COMPILE_FLAGS "-std=c++17")
1383+
if (OPENMP_SIMD_FLAGS)
1384+
set_source_files_properties( ${GGML_SOURCES_FP8} PROPERTIES COMPILE_FLAGS ${OPENMP_SIMD_FLAGS})
1385+
#set_source_files_properties( ${GGML_SOURCES_FP8} PROPERTIES CMAKE_CXX_FLAGS ${OPENMP_SIMD_FLAGS})
1386+
endif()
1387+
13801388
# ggml
13811389

13821390
add_library(ggml
@@ -1413,6 +1421,8 @@ target_include_directories(ggml PUBLIC ../include)
14131421
target_include_directories(ggml PRIVATE . ${GGML_EXTRA_INCLUDES})
14141422
target_link_directories (ggml PRIVATE ${GGML_EXTRA_LIBDIRS})
14151423
target_compile_features (ggml PRIVATE c_std_11) # don't bump
1424+
target_compile_features (ggml PRIVATE cxx_std_17)
1425+
#target_compile_features (ggml PRIVATE cxx_constexpr)
14161426

14171427
list(APPEND GGML_EXTRA_LIBS_PRIVATE Threads::Threads)
14181428

ggml/src/ggml-common.h

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
typedef uint16_t ggml_half;
77
typedef uint32_t ggml_half2;
88

9-
#define GGML_COMMON_AGGR
9+
#define GGML_COMMON_AGGR_U
10+
#define GGML_COMMON_AGGR_S
1011

1112
#define GGML_COMMON_DECL
1213
#elif defined(GGML_COMMON_DECL_CPP)
@@ -15,7 +16,8 @@ typedef uint32_t ggml_half2;
1516
typedef uint16_t ggml_half;
1617
typedef uint32_t ggml_half2;
1718

18-
#define GGML_COMMON_AGGR data
19+
#define GGML_COMMON_AGGR_U data
20+
#define GGML_COMMON_AGGR_S data
1921

2022
#define GGML_COMMON_DECL
2123
#elif defined(GGML_COMMON_DECL_METAL)
@@ -24,7 +26,8 @@ typedef uint32_t ggml_half2;
2426
typedef half ggml_half;
2527
typedef half2 ggml_half2;
2628

27-
#define GGML_COMMON_AGGR
29+
#define GGML_COMMON_AGGR_U
30+
#define GGML_COMMON_AGGR_S
2831

2932
#define GGML_COMMON_DECL
3033
#elif defined(GGML_COMMON_DECL_CUDA)
@@ -38,7 +41,8 @@ typedef half2 ggml_half2;
3841
typedef half ggml_half;
3942
typedef half2 ggml_half2;
4043

41-
#define GGML_COMMON_AGGR data
44+
#define GGML_COMMON_AGGR_U
45+
#define GGML_COMMON_AGGR_S data
4246

4347
#define GGML_COMMON_DECL
4448
#elif defined(GGML_COMMON_DECL_HIP)
@@ -48,7 +52,8 @@ typedef half2 ggml_half2;
4852
typedef half ggml_half;
4953
typedef half2 ggml_half2;
5054

51-
#define GGML_COMMON_AGGR data
55+
#define GGML_COMMON_AGGR_U
56+
#define GGML_COMMON_AGGR_S data
5257

5358
#define GGML_COMMON_DECL
5459
#elif defined(GGML_COMMON_DECL_SYCL)
@@ -58,7 +63,8 @@ typedef half2 ggml_half2;
5863
typedef sycl::half ggml_half;
5964
typedef sycl::half2 ggml_half2;
6065

61-
#define GGML_COMMON_AGGR data
66+
#define GGML_COMMON_AGGR_U
67+
#define GGML_COMMON_AGGR_S data
6268

6369
#define GGML_COMMON_DECL
6470
#endif
@@ -163,9 +169,9 @@ typedef struct {
163169
struct {
164170
ggml_half d; // delta
165171
ggml_half m; // min
166-
} GGML_COMMON_AGGR;
172+
} GGML_COMMON_AGGR_S;
167173
ggml_half2 dm;
168-
};
174+
} GGML_COMMON_AGGR_U;
169175
uint8_t qs[QK4_1 / 2]; // nibbles / quants
170176
} block_q4_1;
171177
static_assert(sizeof(block_q4_1) == 2 * sizeof(ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
@@ -184,9 +190,9 @@ typedef struct {
184190
struct {
185191
ggml_half d; // delta
186192
ggml_half m; // min
187-
} GGML_COMMON_AGGR;
193+
} GGML_COMMON_AGGR_S;
188194
ggml_half2 dm;
189-
};
195+
} GGML_COMMON_AGGR_U;
190196
uint8_t qh[4]; // 5-th bit of quants
191197
uint8_t qs[QK5_1 / 2]; // nibbles / quants
192198
} block_q5_1;
@@ -205,9 +211,9 @@ typedef struct {
205211
struct {
206212
ggml_half d; // delta
207213
ggml_half s; // d * sum(qs[i])
208-
} GGML_COMMON_AGGR;
214+
} GGML_COMMON_AGGR_S;
209215
ggml_half2 ds;
210-
};
216+
} GGML_COMMON_AGGR_U;
211217
int8_t qs[QK8_1]; // quants
212218
} block_q8_1;
213219
static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_half) + QK8_1, "wrong q8_1 block size/padding");
@@ -270,9 +276,9 @@ typedef struct {
270276
struct {
271277
ggml_half d; // super-block scale for quantized scales
272278
ggml_half dmin; // super-block scale for quantized mins
273-
} GGML_COMMON_AGGR;
279+
} GGML_COMMON_AGGR_S;
274280
ggml_half2 dm;
275-
};
281+
} GGML_COMMON_AGGR_U;
276282
} block_q2_K;
277283
static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
278284

@@ -297,9 +303,9 @@ typedef struct {
297303
struct {
298304
ggml_half d; // super-block scale for quantized scales
299305
ggml_half dmin; // super-block scale for quantized mins
300-
} GGML_COMMON_AGGR;
306+
} GGML_COMMON_AGGR_S;
301307
ggml_half2 dm;
302-
};
308+
} GGML_COMMON_AGGR_U;
303309
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
304310
uint8_t qs[QK_K/2]; // 4--bit quants
305311
} block_q4_K;
@@ -314,9 +320,9 @@ typedef struct {
314320
struct {
315321
ggml_half d; // super-block scale for quantized scales
316322
ggml_half dmin; // super-block scale for quantized mins
317-
} GGML_COMMON_AGGR;
323+
} GGML_COMMON_AGGR_S;
318324
ggml_half2 dm;
319-
};
325+
} GGML_COMMON_AGGR_U;
320326
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
321327
uint8_t qh[QK_K/8]; // quants, high bit
322328
uint8_t qs[QK_K/2]; // quants, low 4 bits

ggml/src/ggml-fp8.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ make -j8
1717
1818
rm -rf build
1919
cmake -B build
20-
cmake --build build --config Release
20+
cmake --build build --config Release -j $(nproc)
2121
./build/bin/llama-quantize ~/LLM/Mistral-Nemo-Instruct-2407.BF16.gguf ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf E3M4_Q
2222
./build/bin/llama-cli -c 1024 -m ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf -p "[INST]bonjour a tu un nom. je ne sais pas comment t'appeler. Si tu n'en as pas je peux t'appeler TINTIN[/INST]" -s 42
2323
./build/bin/llama-perplexity --kl-divergence-base ~/LLM/Mistral-Nemo-Instruct-2407.BF16.kld --kl-divergence -s 31337 -m ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf
@@ -27,6 +27,10 @@ rm -rf tmp
2727
mkdir tmp
2828
bash ./ci/run.sh ./tmp/results ./tmp/mnt
2929
30+
# HIP legacy target?
31+
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIPBLAS=ON
32+
cmake --build build2 --config Release -j $(nproc) -v
33+
3034
*/
3135

3236
template<int N> constexpr float EXP2() {
@@ -151,9 +155,9 @@ static inline void conv(const float* x, bloc_fp8<E, QK>* y, int64_t size) {
151155
const auto qk_size = size / QK;
152156
for (int64_t q=0; q<qk_size; ++q) {
153157
float m = 0;
154-
// @ voir si c'est lui qui pose probleme et si c'est sur toutes les target
155158
#ifdef GGML_USE_OPENMP_SIMD
156-
#pragma omp simd reduction(max:m)
159+
// not work on macos and warn.
160+
// #pragma omp simd reduction(max:m)
157161
#endif
158162
for (int64_t i=0; i<QK; i++) {
159163
m = std::max(std::abs(x[q*QK+i]),m);

0 commit comments

Comments
 (0)