forked from project-baize/baize-chatbot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathggml_chat.py
140 lines (107 loc) · 5.37 KB
/
ggml_chat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jun 4 16:26:27 2023
@author: Chuplares
"""
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import Chroma
import argparse
parser = argparse.ArgumentParser()
gcgroup= parser.add_mutually_exclusive_group()
ltgroup= parser.add_mutually_exclusive_group()
gcgroup.add_argument("-g", "--gpu", action="store_true", help="Load model on gpu.")
gcgroup.add_argument("-c", "--cpu", action="store_true", help="load model on cpu.")
ltgroup.add_argument("-t","--threads", type=int, default=4, help="Set number of threads to use on cpu. Default: 4")
ltgroup.add_argument("-l","--gpulayers", type=int, default=32, help="Set number of layers to load on gpu. Default: 32")
parser.add_argument("-m","--model", type=str, required=True, help="Set the path for the aurora model.")
parser.add_argument("-d","--deviceinstruct", type=str, default='cpu', help="Define where the instruct model will be loaded. Default: cpu")
parser.add_argument("-i","--instrmodel", type=str, default='hkunlp/instructor-large', help="Set the name of the instruct model. Default: hkunlp/instructor-xl")
#parser.add_argument("-k", "--max_tokens", type=int, default=512, help="Define the max number of tokens to be generated by the model. Default: 512")
parser.add_argument("-p", "--batch_process", type=int, default=1024, help="Define the batch size of the input tokens to be processed at once. Default: 1024")
parser.add_argument("--temperature", type=float, default=1.0, help="Define the temperature for the model's predictions. Default: 1.0")
args = parser.parse_args()
template = """The following is a conversation between a human and a marxist leninist, and a anti-imperialist activist AI assistant named Aurora.
Aurora is an open-source AI assistant developed by marxists independent researches.
The human and the AI assistant take turns chatting.
Human statements start with [|Human|] and AI assistant statements start with [|AI|].
The AI assistant always provides responses in as much detail as possible and aways from a marxist lennist anti-imperialist perspective, and in Markdown format.
Complete the transcript in exactly that format, using the context in the beggining of the conversation when possible.
{question}[|AI|]"""
prompt = PromptTemplate(template=template, input_variables=["question"])
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
n_batch=args.batch_process
if args.gpu:
llm = LlamaCpp(model_path=str(args.model),
n_ctx=2048,
f16_kv=True,
stop=["[|Human|]", "[|AI|]", "[|HUMAN|]"],
n_gpu_layers=args.gpulayers,
n_batch=n_batch,
#callback_manager=callback_manager,
#max_tokens=args.max_tokens,
temperature=args.temperature)
if args.cpu:
llm = LlamaCpp(model_path=str(args.model),
n_ctx=2048,
f16_kv=True,
stop=["[|Human|]", "[|AI|]", "[|HUMAN|]"],
n_batch=n_batch,
n_threads=args.threads,
#callback_manager=callback_manager,
#max_tokens=args.max_tokens,
temperature=args.temperature)
llm_chain = LLMChain(prompt=prompt, llm=llm)
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name=args.instrmodel,model_kwargs={"device": args.deviceinstruct})
persist_directory = 'db'
embedding = instructor_embeddings
vectordb = Chroma(embedding_function=embedding, persist_directory=persist_directory)
while True:
retriever = vectordb.as_retriever()
query = input("The first question: ")
context = retriever.get_relevant_documents(query)[:3]
ctxstr = ""
for item in context:
ctxstr = str(str(ctxstr) + str(item) + "\n")
questionf = str("[|Human|] "+str(query)+"\n")
hist = []
hist.append(str(str(ctxstr) + "\n"))
hist.append(questionf)
i = 0
histstr = ""
question = ""
questionh = ""
while True:
try:
if i > 0:
question = input("Human: ")
if "+clear" in question:
break
questionh = str("[|Human|] "+str(question)+"\n")
hist.append(questionh)
for item in hist:
histstr = str(str(histstr) + str(item))
if i == 0:
ans = llm_chain.run(questionf)
#print("AI: "+ans)
ansh = str("[|AI|] "+ans)
hist.append(ansh)
else:
question = histstr
ans = llm_chain.run(question)
#print("AI: "+ans)
ansh = str("[|AI|] "+ans)
hist.append(ansh)
histstr = ""
for item in hist:
histstr = str(str(histstr) + str(item))
print("History: "+histstr)
histstr = ""
i = i + 1
except ValueError:
break