Skip to content

Commit 58863d0

Browse files
committed
remove need of c++17
and some cleanup.
1 parent 3faf670 commit 58863d0

File tree

8 files changed

+36
-96
lines changed

8 files changed

+36
-96
lines changed

Makefile

+1-2
Original file line numberDiff line numberDiff line change
@@ -1086,12 +1086,11 @@ ggml/src/ggml-aarch64.o: \
10861086
ggml/src/ggml-common.h
10871087
$(CC) $(CFLAGS) -c $< -o $@
10881088

1089-
#$(CXX) $(CXXFLAGS) -std=c++17 -fopt-info-vec-missed -c $< -o $@
10901089
ggml/src/ggml-fp8.o: \
10911090
ggml/src/ggml-fp8.cpp \
10921091
ggml/src/ggml-fp8.h \
10931092
ggml/src/ggml-common.h
1094-
$(CXX) $(CXXFLAGS) -std=c++17 -c $< -o $@
1093+
$(CXX) $(CXXFLAGS) -c $< -o $@
10951094

10961095
ggml/src/ggml-blas.o: \
10971096
ggml/src/ggml-blas.cpp \

Package.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,5 +76,5 @@ let package = Package(
7676
linkerSettings: linkerSettings
7777
)
7878
],
79-
cxxLanguageStandard: .cxx17
79+
cxxLanguageStandard: .cxx11
8080
)

examples/quantize/quantize.cpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,8 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
5151
{ "Q4_0_4_4", LLAMA_FTYPE_MOSTLY_Q4_0_4_4, " 4.34G, +0.4685 ppl @ Llama-3-8B", },
5252
{ "Q4_0_4_8", LLAMA_FTYPE_MOSTLY_Q4_0_4_8, " 4.34G, +0.4685 ppl @ Llama-3-8B", },
5353
{ "Q4_0_8_8", LLAMA_FTYPE_MOSTLY_Q4_0_8_8, " 4.34G, +0.4685 ppl @ Llama-3-8B", },
54-
55-
{ "E4M3_Q", LLAMA_FTYPE_MOSTLY_E4M3_Q, "12,21G, 0.0050 kld @ Mistral-Nemo", },
56-
{ "E3M4_Q", LLAMA_FTYPE_MOSTLY_E3M4_Q, "12,21G, 0.0016 kld @ Mistral-Nemo", },
57-
54+
{ "E4M3_Q", LLAMA_FTYPE_MOSTLY_E4M3_Q, "12.21G, 0.0050 kld @ Mistral-Nemo", },
55+
{ "E3M4_Q", LLAMA_FTYPE_MOSTLY_E3M4_Q, "12.21G, 0.0016 kld @ Mistral-Nemo", },
5856
{ "F16", LLAMA_FTYPE_MOSTLY_F16, "14.00G, +0.0020 ppl @ Mistral-7B", },
5957
{ "BF16", LLAMA_FTYPE_MOSTLY_BF16, "14.00G, -0.0050 ppl @ Mistral-7B", },
6058
{ "F32", LLAMA_FTYPE_ALL_F32, "26.00G @ 7B", },

ggml/src/CMakeLists.txt

-6
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,6 @@ if (GGML_OPENMP_SIMD)
178178
# OpenMP_RUNTIME_MSVC=experimental / if (MSVC)
179179
message(STATUS "Using openmp_simd.")
180180
add_compile_definitions(GGML_USE_OPENMP_SIMD)
181-
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp-simd")
182181
set(OPENMP_SIMD_FLAGS -fopenmp-simd)
183182
else()
184183
message(WARNING "C++ compiler lacks OPENMP_SIMD support.")
@@ -1378,11 +1377,8 @@ endif()
13781377
file(GLOB GGML_HEADERS_FP8 "ggml-fp8.h")
13791378
file(GLOB GGML_SOURCES_FP8 "ggml-fp8.cpp")
13801379

1381-
#set_source_files_properties( ${GGML_SOURCES_FP8} PROPERTIES CXX_STANDARD 17)
1382-
#set_source_files_properties( ${GGML_SOURCES_FP8} PROPERTIES COMPILE_FLAGS "-std=c++17")
13831380
if (OPENMP_SIMD_FLAGS)
13841381
set_source_files_properties( ${GGML_SOURCES_FP8} PROPERTIES COMPILE_FLAGS ${OPENMP_SIMD_FLAGS})
1385-
#set_source_files_properties( ${GGML_SOURCES_FP8} PROPERTIES CMAKE_CXX_FLAGS ${OPENMP_SIMD_FLAGS})
13861382
endif()
13871383

13881384
# ggml
@@ -1421,8 +1417,6 @@ target_include_directories(ggml PUBLIC ../include)
14211417
target_include_directories(ggml PRIVATE . ${GGML_EXTRA_INCLUDES})
14221418
target_link_directories (ggml PRIVATE ${GGML_EXTRA_LIBDIRS})
14231419
target_compile_features (ggml PRIVATE c_std_11) # don't bump
1424-
target_compile_features (ggml PRIVATE cxx_std_17)
1425-
#target_compile_features (ggml PRIVATE cxx_constexpr)
14261420

14271421
list(APPEND GGML_EXTRA_LIBS_PRIVATE Threads::Threads)
14281422

ggml/src/ggml-common.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ typedef uint32_t ggml_half2;
1616
typedef uint16_t ggml_half;
1717
typedef uint32_t ggml_half2;
1818

19+
// std-c++ allow anonymous unions but some compiler warn on it
1920
#define GGML_COMMON_AGGR_U data
21+
// std-c++ do not allow it.
2022
#define GGML_COMMON_AGGR_S data
2123

2224
#define GGML_COMMON_DECL
@@ -437,7 +439,6 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_
437439
// - fp8 simple type
438440
typedef struct { uint8_t bits; } ggml_e5m2_t;
439441
typedef struct { uint8_t bits; } ggml_e4m3_t;
440-
typedef struct { uint8_t bits; } ggml_e3m4_t;
441442

442443
// - fp8 with bloc delta => 8.125 bpw
443444
typedef struct {

ggml/src/ggml-fp8.cpp

+26-51
Original file line numberDiff line numberDiff line change
@@ -7,53 +7,29 @@
77

88
#include "ggml-fp8.h"
99

10-
/*
11-
make clean
12-
make -j8
13-
# ./llama-quantize --output-tensor-type fp8_e3m4_q ~/LLM/Mistral-Nemo-Instruct-2407.BF16.gguf ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf E3M4_Q
14-
./llama-quantize ~/LLM/Mistral-Nemo-Instruct-2407.BF16.gguf ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf E3M4_Q
15-
./llama-cli -c 1024 -m ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf -p "[INST]bonjour a tu un nom. je ne sais pas comment t'appeler. Si tu n'en as pas je peux t'appeler TINTIN[/INST]" -s 42
16-
./llama-perplexity --kl-divergence-base ~/LLM/Mistral-Nemo-Instruct-2407.BF16.kld --kl-divergence -s 31337 -m ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf
17-
18-
rm -rf build
19-
cmake -B build
20-
cmake --build build --config Release -j $(nproc)
21-
./build/bin/llama-quantize ~/LLM/Mistral-Nemo-Instruct-2407.BF16.gguf ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf E3M4_Q
22-
./build/bin/llama-cli -c 1024 -m ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf -p "[INST]bonjour a tu un nom. je ne sais pas comment t'appeler. Si tu n'en as pas je peux t'appeler TINTIN[/INST]" -s 42
23-
./build/bin/llama-perplexity --kl-divergence-base ~/LLM/Mistral-Nemo-Instruct-2407.BF16.kld --kl-divergence -s 31337 -m ~/LLM/Mistral-Nemo-Instruct-2407.E3M4_Q.gguf
24-
25-
# la CI local:
26-
rm -rf tmp
27-
mkdir tmp
28-
bash ./ci/run.sh ./tmp/results ./tmp/mnt
29-
30-
# HIP legacy target?
31-
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIPBLAS=ON
32-
cmake --build build2 --config Release -j $(nproc) -v
33-
34-
*/
35-
36-
template<int N> constexpr float EXP2() {
37-
if constexpr (N==0) return 1;
38-
if constexpr (N>0) return EXP2<N-1>()*2;
39-
if constexpr (N<0) return EXP2<N+1>()/2;
10+
template<int N>
11+
constexpr float exp2() {
12+
union {
13+
float f;
14+
uint32_t bits;
15+
} out = {0};
16+
out.bits = (N+127)<<23;
17+
return out.f;
4018
}
41-
42-
// 2^N avec N>0 en entier
43-
template<int N> constexpr int EXP_I2() {
44-
if constexpr (N==0) return 1;
45-
if constexpr (N>0) return EXP_I2<N-1>()*2;
19+
template<int N>
20+
constexpr int exp_i2() {
21+
return 1 << N;
4622
}
4723

48-
template<int _E> //, int M=7-E> 1.7 bits!
24+
template<int E> //, int M=7-E> 1.7 bits!
4925
struct FP8 {
5026
uint8_t bits;
51-
using type = FP8<_E>;
52-
static constexpr int E=_E;
53-
static constexpr int M=7-_E;
54-
static constexpr int E_BIAS=EXP2<_E-1>()-1;
55-
static constexpr float MAX() { return (2-EXP2<-M+1>())*EXP2<EXP_I2<_E-1>()>(); }
56-
static constexpr float MIN() { return EXP2<-M>()*EXP2<2-EXP_I2<_E-1>()>(); }
27+
using type = FP8<E>;
28+
// static constexpr int E=_E;
29+
static constexpr int M() { return 7-E; }
30+
static constexpr int E_BIAS() { return exp_i2<E-1>()-1; }
31+
static constexpr float MAX() { return (2-exp2<-M()+1>())*exp2<exp_i2<E-1>()>(); }
32+
static constexpr float MIN() { return exp2<-M()>()*exp2<2-exp_i2<E-1>()>(); }
5733
//=============================================
5834

5935
#ifdef GGML_USE_OPENMP_SIMD
@@ -64,19 +40,19 @@ struct FP8 {
6440
float f;
6541
uint32_t bits;
6642
} in = {value};
67-
// le signe:
43+
// the signe:
6844
bits = (in.bits >> 24) & 0x80;
69-
// la valeur sans la signe!
45+
// value without signe!
7046
in.bits &= 0x7fffffff;
7147
//GGML_ASSERT(in.bits < 0x7f800000); // +/- infini ou NAN
7248
if (in.f >= MAX()) {
7349
bits |= 0x7E;
7450
} else if (in.f<MIN()) { // => 0.
7551
// OK: S.0000000
7652
} else {
77-
in.f *= EXP2<E_BIAS-127>();
78-
in.bits += 1<<(22-M); // for rounding
79-
bits |= (in.bits >> (23-M)) & 0x7F;
53+
in.f *= exp2<E_BIAS()-127>();
54+
in.bits += 1<<(22-M()); // for rounding
55+
bits |= (in.bits >> (23-M())) & 0x7F;
8056
}
8157
}
8258

@@ -88,13 +64,12 @@ struct FP8 {
8864
float f;
8965
uint32_t bits;
9066
} out = {0};
91-
// le signe:
9267
out.bits = bits & 0x80;
9368
out.bits <<= 24;
9469
uint32_t _bits = bits & 0x7F;
95-
_bits <<= (23-M);
70+
_bits <<= (23-M());
9671
out.bits |= _bits;
97-
out.f *= EXP2<127-E_BIAS>();
72+
out.f *= exp2<127-E_BIAS()>();
9873
return out.f;
9974
}
10075
};
@@ -156,7 +131,7 @@ static inline void conv(const float* x, bloc_fp8<E, QK>* y, int64_t size) {
156131
for (int64_t q=0; q<qk_size; ++q) {
157132
float m = 0;
158133
#ifdef GGML_USE_OPENMP_SIMD
159-
// not work on macos and warn.
134+
// did not work on macOS and warn.
160135
// #pragma omp simd reduction(max:m)
161136
#endif
162137
for (int64_t i=0; i<QK; i++) {

ggml/src/ggml.c

-27
Original file line numberDiff line numberDiff line change
@@ -13329,33 +13329,6 @@ static void ggml_compute_forward_set(
1332913329
{
1333013330
ggml_compute_forward_set_f32(params, dst);
1333113331
} break;
13332-
case GGML_TYPE_F16:
13333-
case GGML_TYPE_BF16:
13334-
case GGML_TYPE_Q4_0:
13335-
case GGML_TYPE_Q4_1:
13336-
case GGML_TYPE_Q5_0:
13337-
case GGML_TYPE_Q5_1:
13338-
case GGML_TYPE_Q8_0:
13339-
case GGML_TYPE_Q8_1:
13340-
case GGML_TYPE_Q2_K:
13341-
case GGML_TYPE_Q3_K:
13342-
case GGML_TYPE_Q4_K:
13343-
case GGML_TYPE_Q5_K:
13344-
case GGML_TYPE_Q6_K:
13345-
case GGML_TYPE_TQ1_0:
13346-
case GGML_TYPE_TQ2_0:
13347-
case GGML_TYPE_IQ2_XXS:
13348-
case GGML_TYPE_IQ2_XS:
13349-
case GGML_TYPE_IQ3_XXS:
13350-
case GGML_TYPE_IQ1_S:
13351-
case GGML_TYPE_IQ1_M:
13352-
case GGML_TYPE_IQ4_NL:
13353-
case GGML_TYPE_IQ4_XS:
13354-
case GGML_TYPE_IQ3_S:
13355-
case GGML_TYPE_IQ2_S:
13356-
case GGML_TYPE_Q4_0_4_4:
13357-
case GGML_TYPE_Q4_0_4_8:
13358-
case GGML_TYPE_Q4_0_8_8:
1335913332
default:
1336013333
{
1336113334
GGML_ABORT("fatal error");

src/llama.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -5299,10 +5299,10 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
52995299
case LLAMA_FTYPE_MOSTLY_Q4_0_4_8: return "Q4_0_4_8";
53005300
case LLAMA_FTYPE_MOSTLY_Q4_0_8_8: return "Q4_0_8_8";
53015301

5302-
case LLAMA_FTYPE_MOSTLY_E5M2: return "E5M2";
5303-
case LLAMA_FTYPE_MOSTLY_E4M3: return "E4M3";
5304-
case LLAMA_FTYPE_MOSTLY_E4M3_Q: return "E4M3_Q";
5305-
case LLAMA_FTYPE_MOSTLY_E3M4_Q: return "E3M4_Q";
5302+
case LLAMA_FTYPE_MOSTLY_E5M2: return "E5M2";
5303+
case LLAMA_FTYPE_MOSTLY_E4M3: return "E4M3";
5304+
case LLAMA_FTYPE_MOSTLY_E4M3_Q: return "E4M3_Q";
5305+
case LLAMA_FTYPE_MOSTLY_E3M4_Q: return "E3M4_Q";
53065306

53075307
default: return "unknown, may not work";
53085308
}

0 commit comments

Comments
 (0)