Fix Q4 cache in chat example

turboderp · turboderp · commit 082a9fe9df9d · 2024-03-06T19:13:21.000+01:00
diff --git a/examples/chat.py b/examples/chat.py
@@ -132,6 +132,8 @@
 
 if args.cache_8bit:
     cache = ExLlamaV2Cache_8bit(model, lazy = not model.loaded)
+elif args.cache_q4:
+    cache = ExLlamaV2Cache_Q4(model, lazy = not model.loaded)
 else:
     cache = ExLlamaV2Cache(model, lazy = not model.loaded)