Skip to content

Commit 9858f7b

Browse files
committed
Merge branch 'concedo_experimental' into croco_nex_0
2 parents 182cc2e + 6fa50f7 commit 9858f7b

File tree

3 files changed

+87
-36
lines changed

3 files changed

+87
-36
lines changed

Diff for: ggml/src/ggml-metal/ggml-metal.metal

+21-12
Original file line numberDiff line numberDiff line change
@@ -373,24 +373,33 @@ void dequantize_q5_K(device const block_q5_K *xb, short il, thread type4x4 & reg
373373
template <typename type4x4>
374374
void dequantize_q6_K(device const block_q6_K *xb, short il, thread type4x4 & reg) {
375375
const half d_all = xb->d;
376-
device const uint8_t * ql = (device const uint8_t *)xb->ql;
377-
device const uint8_t * qh = (device const uint8_t *)xb->qh;
376+
device const uint16_t * ql = (device const uint16_t *)xb->ql;
377+
device const uint16_t * qh = (device const uint16_t *)xb->qh;
378378
device const int8_t * scales = (device const int8_t *)xb->scales;
379379

380-
ql = ql + 64*(il/8) + 32*((il/2)&1) + 16*(il&1);
381-
qh = qh + 32*(il/8) + 16*(il&1);
380+
ql = ql + 32*(il/8) + 16*((il/2)&1) + 8*(il&1);
381+
qh = qh + 16*(il/8) + 8*(il&1);
382382
float sc = scales[(il%2) + 2 * ((il/2))];
383383
il = (il/2) & 3;
384384

385-
const uint16_t kmask1 = il>1 ? (il>2 ? 192 : 48) : (il>0 ? 12 : 3);
386-
const uint16_t kmask2 = il>1 ? 0xF0 : 0x0F;
387-
const float coef = il>1 ? 1.f/16.f : 1.f;
385+
const uint32_t kmask1 = il>1 ? (il>2 ? 0xC0C0C0C0 : 0x30303030) : (il>0 ? 0x0C0C0C0C : 0x03030303);
386+
const uint32_t kmask2 = il>1 ? 0xF0F0F0F0 : 0x0F0F0F0F;
388387
const float ml = d_all * sc * 32.f;
389-
const float dl = d_all * sc * coef;
390-
for (int i = 0; i < 16; ++i) {
391-
const half q = il&1 ? ((ql[i] & kmask2) | ((qh[i] & kmask1) << 2))
392-
: ((ql[i] & kmask2) | ((qh[i] & kmask1) << 4));
393-
reg[i/4][i%4] = dl * q - ml;
388+
const float dl0 = d_all * sc;
389+
const float dl1 = dl0 / 256.f;
390+
const float dl2 = dl0 / (256.f * 256.f);
391+
const float dl3 = dl0 / (256.f * 256.f * 256.f);
392+
const uint8_t shr_h = il>2 ? 2 : 0;
393+
const uint8_t shl_h = il>1 ? 0 : (il>0 ? 2 : 4);
394+
const uint8_t shr_l = il>1 ? 4 : 0;
395+
for (int i = 0; i < 4; ++i) {
396+
const uint32_t low = (ql[2*i] | (uint32_t)(ql[2*i+1] << 16)) & kmask2;
397+
const uint32_t high = (qh[2*i] | (uint32_t)(qh[2*i+1] << 16)) & kmask1;
398+
const uint32_t q = ((high << shl_h) >> shr_h) | (low >> shr_l);
399+
reg[i][0] = dl0 * ((half)(q & 0xFF)) - ml;
400+
reg[i][1] = dl1 * ((float)(q & 0xFF00)) - ml;
401+
reg[i][2] = dl2 * ((float)(q & 0xFF0000)) - ml;
402+
reg[i][3] = dl3 * ((float)(q & 0xFF000000)) - ml;
394403
}
395404
}
396405

Diff for: klite.embd

+13-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
1212
-->
1313

1414
<script>
15-
const LITEVER = 214;
15+
const LITEVER = 215;
1616
const urlParams = new URLSearchParams(window.location.search);
1717
var localflag = true;
1818
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@@ -11913,7 +11913,7 @@ Current version indicated by LITEVER below.
1191311913
});
1191411914
}
1191511915

11916-
function clear_poll_flags()
11916+
function restore_retried_text()
1191711917
{
1191811918
if(retry_in_progress)
1191911919
{
@@ -11928,6 +11928,10 @@ Current version indicated by LITEVER below.
1192811928
}
1192911929
}
1193011930
}
11931+
}
11932+
function clear_poll_flags()
11933+
{
11934+
restore_retried_text();
1193111935
pending_response_id = "";
1193211936
poll_in_progress = false;
1193311937
synchro_polled_response = null;
@@ -16484,6 +16488,7 @@ Current version indicated by LITEVER below.
1648416488
handle_incoming_text(gentxt, genworker, genmdl, genkudos);
1648516489
}
1648616490
}else{
16491+
restore_retried_text();
1648716492
retry_preserve_last = false;
1648816493
}
1648916494
synchro_polled_response = null;
@@ -16544,7 +16549,13 @@ Current version indicated by LITEVER below.
1654416549
}
1654516550
else {
1654616551
last_reply_was_empty = (gentxt=="" || gentxt.trim()=="");
16552+
let was_retry_in_progress = retry_in_progress;
1654716553
handle_incoming_text(gentxt, genworker, genmdl, genkudos);
16554+
if (gentxt=="" && was_retry_in_progress)
16555+
{
16556+
retry_in_progress = was_retry_in_progress;
16557+
restore_retried_text(); //horde only: this handles the case when the retry returned empty text, we restore the old text
16558+
}
1654816559
}
1654916560
}
1655016561
render_gametext();

Diff for: koboldcpp.py

+53-22
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
# dry_seq_break_max = 128
5656

5757
# global vars
58-
KcppVersion = "1.84000"
58+
KcppVersion = "1.84200"
5959
LcppVersion = "b4722"
6060
CudaSpecifics = "Cu128_Ar86_SMC2_DmmvX32Y1"
6161
ReleaseDate = "2025/02/15"
@@ -106,6 +106,7 @@
106106
websearch_lastresponse = []
107107
preloaded_story = None
108108
chatcompl_adapter = None
109+
chatcompl_adapter_list = None #if using autoguess, will populate this will potential adapters
109110
embedded_kailite = None
110111
embedded_kcpp_docs = None
111112
embedded_kcpp_sdui = None
@@ -3093,7 +3094,7 @@ def do_GET(self):
30933094
opts = []
30943095
if args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
30953096
dirpath = os.path.abspath(args.admindir)
3096-
opts = [f for f in sorted(os.listdir(dirpath)) if f.endswith(".kcpps") and os.path.isfile(os.path.join(dirpath, f))]
3097+
opts = [f for f in sorted(os.listdir(dirpath)) if (f.endswith(".kcpps") or f.endswith(".kcppt")) and os.path.isfile(os.path.join(dirpath, f))]
30973098
response_body = (json.dumps(opts).encode())
30983099

30993100
elif self.path.endswith(('/api/extra/perf')):
@@ -3465,7 +3466,7 @@ def do_POST(self):
34653466
if targetfile and targetfile!="":
34663467
dirpath = os.path.abspath(args.admindir)
34673468
targetfilepath = os.path.join(dirpath, targetfile)
3468-
opts = [f for f in os.listdir(dirpath) if f.endswith(".kcpps") and os.path.isfile(os.path.join(dirpath, f))]
3469+
opts = [f for f in os.listdir(dirpath) if (f.endswith(".kcpps") or f.endswith(".kcppt")) and os.path.isfile(os.path.join(dirpath, f))]
34693470
if targetfile in opts and os.path.exists(targetfilepath):
34703471
print(f"Admin: Received request to reload config to {targetfile}")
34713472
global_memory["restart_target"] = targetfile
@@ -5384,7 +5385,7 @@ def make_url_request_horde(url, data, method='POST',addmykey=False):
53845385
"name": worker_name,
53855386
"models": [friendlymodelname],
53865387
"max_length": maxhordelen,
5387-
"max_context_length": maxhordectx,
5388+
"max_context_length": min(maxctx,maxhordectx),
53885389
"priority_usernames": [],
53895390
"softprompts": [],
53905391
"bridge_agent": BRIDGE_AGENT,
@@ -5697,7 +5698,7 @@ def sanitize_string(input_string):
56975698
sanitized_string = re.sub( r'[^\w\d\.\-_]', '', input_string)
56985699
return sanitized_string
56995700

5700-
def downloader_internal(input_url, output_filename, capture_output, min_file_size=64): #64 bytes required by default
5701+
def downloader_internal(input_url, output_filename, capture_output, min_file_size=64): # 64 bytes required by default
57015702
import shutil
57025703
import subprocess
57035704
import os
@@ -5711,20 +5712,44 @@ def downloader_internal(input_url, output_filename, capture_output, min_file_siz
57115712
return output_filename
57125713
print(f"Downloading {input_url}", flush=True)
57135714
dl_success = False
5714-
if shutil.which("aria2c") is not None:
5715-
rc = subprocess.run(f"aria2c -x 16 -s 16 --summary-interval=30 --console-log-level=error --log-level=error --download-result=default --allow-overwrite=true --file-allocation=none -o {output_filename} {input_url}", shell=True, capture_output=capture_output, text=True, check=True, encoding='utf-8')
5716-
dl_success = (rc.returncode==0 and os.path.exists(output_filename) and os.path.getsize(output_filename) > min_file_size)
5717-
if not dl_success and shutil.which("curl") is not None:
5718-
rc = subprocess.run(f"curl -fLo {output_filename} {input_url}", shell=True, capture_output=capture_output, text=True, check=True, encoding='utf-8')
5719-
dl_success = (rc.returncode==0 and os.path.exists(output_filename) and os.path.getsize(output_filename) > min_file_size)
5720-
if not dl_success and shutil.which("wget") is None:
5721-
rc = subprocess.run(f"wget -O {output_filename} {input_url}", shell=True, capture_output=capture_output, text=True, check=True, encoding='utf-8')
5722-
dl_success = (rc.returncode==0 and os.path.exists(output_filename) and os.path.getsize(output_filename) > min_file_size)
5715+
5716+
try:
5717+
if shutil.which("aria2c") is not None:
5718+
rc = subprocess.run(
5719+
f"aria2c -x 16 -s 16 --summary-interval=30 --console-log-level=error --log-level=error --download-result=default --allow-overwrite=true --file-allocation=none -o {output_filename} {input_url}",
5720+
shell=True, capture_output=capture_output, text=True, check=True, encoding='utf-8'
5721+
)
5722+
dl_success = (rc.returncode == 0 and os.path.exists(output_filename) and os.path.getsize(output_filename) > min_file_size)
5723+
except subprocess.CalledProcessError as e:
5724+
print(f"aria2c failed: {e}")
5725+
5726+
try:
5727+
if not dl_success and shutil.which("curl") is not None:
5728+
rc = subprocess.run(
5729+
f"curl -fLo {output_filename} {input_url}",
5730+
shell=True, capture_output=capture_output, text=True, check=True, encoding='utf-8'
5731+
)
5732+
dl_success = (rc.returncode == 0 and os.path.exists(output_filename) and os.path.getsize(output_filename) > min_file_size)
5733+
except subprocess.CalledProcessError as e:
5734+
print(f"curl failed: {e}")
5735+
5736+
try:
5737+
if not dl_success and shutil.which("wget") is not None:
5738+
rc = subprocess.run(
5739+
f"wget -O {output_filename} {input_url}",
5740+
shell=True, capture_output=capture_output, text=True, check=True, encoding='utf-8'
5741+
)
5742+
dl_success = (rc.returncode == 0 and os.path.exists(output_filename) and os.path.getsize(output_filename) > min_file_size)
5743+
except subprocess.CalledProcessError as e:
5744+
print(f"wget failed: {e}")
5745+
57235746
if not dl_success:
5724-
print("Could not find suitable download software, please install aria2 or curl.")
5747+
print("Could not find suitable download software, or all download methods failed. Please install aria2, curl, or wget.")
57255748
return None
5749+
57265750
return output_filename
57275751

5752+
57285753
def download_model_from_url(url, permitted_types=[".gguf",".safetensors", ".ggml", ".bin"], min_file_size=64):
57295754
if url and url!="":
57305755
if url.endswith("?download=true"):
@@ -5789,6 +5814,9 @@ def main(launch_args):
57895814
# print("Python version: " + sys.version)
57905815
if args.debugmode != 1:
57915816
showdebug = False #not shared with child process!
5817+
if args.debugmode >= 1:
5818+
print("Debug Mode is Enabled!")
5819+
args.quiet = False # verbose outputs
57925820

57935821
try:
57945822
delete_old_pyinstaller() #perform some basic cleanup of old temporary directories
@@ -5991,7 +6019,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
59916019

59926020
# try to read chat completions adapter
59936021
if args.chatcompletionsadapter:
5994-
global chatcompl_adapter
6022+
global chatcompl_adapter, chatcompl_adapter_list
59956023
ccadapter_path = None
59966024
canload = False
59976025
adapt_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'kcpp_adapters')
@@ -6031,6 +6059,9 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
60316059
print("Chat Completions Adapter Loaded")
60326060
else:
60336061
print("Warning: Chat Completions Adapter invalid or not found.")
6062+
if (chatcompl_adapter is not None and isinstance(chatcompl_adapter, list)):
6063+
chatcompl_adapter_list = chatcompl_adapter
6064+
chatcompl_adapter = None
60346065

60356066
# handle model downloads if needed
60366067
if args.model_param and args.model_param!="":
@@ -6238,23 +6269,20 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
62386269
exitcounter = 999
62396270
exit_with_error(3,"Could not load text model: " + modelname)
62406271

6241-
if (chatcompl_adapter is not None and isinstance(chatcompl_adapter, list)):
6272+
if (chatcompl_adapter_list is not None and isinstance(chatcompl_adapter_list, list)):
62426273
# The chat completions adapter is a list that needs derivation from chat templates
62436274
# Try to derive chat completions adapter from chat template, now that we have the model loaded
62446275
if not args.nomodel and args.model_param:
62456276
ctbytes = handle.get_chat_template()
62466277
chat_template = ctypes.string_at(ctbytes).decode("UTF-8","ignore")
6247-
candidates = chatcompl_adapter
62486278
if chat_template != "":
6249-
for entry in candidates:
6279+
for entry in chatcompl_adapter_list:
62506280
if all(s in chat_template for s in entry['search']):
62516281
print(f"Chat completion heuristic: {entry['name']}")
62526282
chatcompl_adapter = entry['adapter']
62536283
break
62546284
if chatcompl_adapter is None:
62556285
print("Chat template heuristics failed to identify chat completions format. Alpaca will be used.")
6256-
else:
6257-
chatcompl_adapter = None #if no text model loaded, erase the list.
62586286

62596287
#handle loading image model
62606288
if args.sdmodel and args.sdmodel!="":
@@ -6445,7 +6473,10 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
64456473
print(f"StableUI is available at {endpoint_url}/sdui/")
64466474
global_memory["load_complete"] = True
64476475
if args.launch:
6448-
LaunchWebbrowser(endpoint_url,"--launch was set, but could not launch web browser automatically.")
6476+
def launch_browser_thread():
6477+
LaunchWebbrowser(endpoint_url,"--launch was set, but could not launch web browser automatically.")
6478+
browser_thread = threading.Timer(2, launch_browser_thread) #2 second delay
6479+
browser_thread.start()
64496480

64506481
if args.hordekey and args.hordekey!="":
64516482
if args.hordeworkername and args.hordeworkername!="":

0 commit comments

Comments
 (0)