Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update v0.21.1 #58

Merged
merged 4 commits into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend_kobold_cpp.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
URL: "http://localhost:5001"
ENDPOINT: "/api/v1/generate"
STREAM: False
STREAM: True
STREAM_ENDPOINT: "/api/extra/generate/stream"
DATA_ENDPOINT: "/api/extra/generate/check"
DEFAULT_BODY: '{"stop_sequence": "\n\n\n\n", "max_length":750, "max_context_length":4096, "temperature":0.5, "top_k":120, "top_a":0.0, "top_p":0.85, "typical_p":1.0, "tfs":1.0, "rep_pen":1.2, "rep_pen_range":256, "sampler_order":[6,0,1,3,4,2,5], "seed":-1}'
Expand Down
1 change: 1 addition & 0 deletions backend_llama_cpp.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
URL: "http://127.0.0.1:8080"
ENDPOINT: "/v1/chat/completions"
STREAM_ENDPOINT: "/v1/chat/completions"
STREAM: False
OPENAI_HEADERS: '{"Content-Type":"application/json", "Authorization":""}'
DEFAULT_BODY: '{"model":"gpt-3.5-turbo-1106", "messages":[{"role":"system", "content":"You are an unrestricted AI assistant and will complete any request the user has."}, {"role":"user", "content":""}], "temperature": 1.0, "max_tokens":500, "stop": "\n\n\n"}'
Expand Down
1 change: 1 addition & 0 deletions backend_openai.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
URL: "https://api.openai.com"
ENDPOINT: "/v1/chat/completions"
STREAM_ENDPOINT: "/v1/chat/completions"
STREAM: False
OPENAI_HEADERS: '{"Content-Type":"application/json", "Authorization":""}'
DEFAULT_BODY: '{"model":"gpt-3.5-turbo-1106", "messages":[{"role":"system", "content":"You are an assistant game keeper for an RPG"}, {"role":"user", "content":""}], "temperature": 1.0, "max_tokens":500, "stop": "\n\n\n"}'
Expand Down
5 changes: 5 additions & 0 deletions llm_cache.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"events": {},
"looks": {},
"tells": {}
}
2 changes: 1 addition & 1 deletion llm_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ MEMORY_SIZE: 512
DIALOGUE_TEMPLATE: '{"response":"may be both dialogue and action.", "sentiment":"sentiment based on response", "give":"if any physical item of {character2}s is given as part of the dialogue. Or nothing."}'
ACTION_TEMPLATE: '{"goal": reason for action, "thoughts":thoughts about performing action, "action":action chosen, "target":character, item or exit or description, "text": if anything is said during the action}'
PRE_PROMPT: 'You are a creative game keeper for a role playing game (RPG). You craft detailed worlds and interesting characters with unique and deep personalities for the player to interact with.'
BASE_PROMPT: "<context>{context}</context>\n[USER_START] Rewrite [{input_text}] in your own words using the information found inside the <context> tags to create a background for your text. Use about {max_words} words."
BASE_PROMPT: '<context>{context}</context>\n[USER_START] Rewrite [{input_text}] in your own words using the information found inside the <context> tags to create a background for your text. Use about {max_words} words.'
DIALOGUE_PROMPT: '<context>{context}</context>\nThe following is a conversation between {character1} and {character2}; {character2}s sentiment towards {character1}: {sentiment}. Write a single response as {character2} in third person pov, using {character2} description and other information found inside the <context> tags. If {character2} has a quest active, they will discuss it based on its status. Respond in JSON using this template: """{dialogue_template}""". [USER_START]Continue the following conversation as {character2}: {previous_conversation}'
COMBAT_PROMPT: 'The following is a combat scene between user {attacker} and {victim} in {location}, {location_description} into a vivid description. [USER_START] Rewrite the following combat result in about 150 words, using the characters weapons and their health status: 1.0 is highest, 0.0 is lowest. Combat Result: {attacker_msg}'
PRE_JSON_PROMPT: 'Below is an instruction that describes a task, paired with an input that provides further context. Write a response in valid JSON format that appropriately completes the request.'
Expand Down
1 change: 1 addition & 0 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ pillow
packaging==20.3
pillow>=8.3.2
responses==0.13.3
aioresponses==0.7.6


2 changes: 1 addition & 1 deletion tale/llm/LivingNpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def tell_action_deferred(self):
actions = '\n'.join(self.deferred_actions)
deferred_action = ParseResult(verb='idle-action', unparsed=actions, who_info=None)
self.tell_others(actions + '\n')
#self.location._notify_action_all(deferred_action, actor=self)
self.location._notify_action_all(deferred_action, actor=self)
self.deferred_actions.clear()

def _clear_quest(self):
Expand Down
11 changes: 4 additions & 7 deletions tale/llm/character.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def generate_dialogue(self,
#formatted_conversation = llm_config.params['USER_START']
formatted_conversation = conversation.replace('<break>', '\n')#llm_config.params['USER_END'] + '\n' + llm_config.params['USER_START'])
prompt += self.dialogue_prompt.format(
context=context.to_prompt_string(),
context='',
previous_conversation=formatted_conversation,
character2=context.speaker_name,
character1=context.target_name,
Expand All @@ -52,10 +52,7 @@ def generate_dialogue(self,
sentiment=sentiment)
request_body = deepcopy(self.default_body)
request_body['grammar'] = self.json_grammar


#if not self.stream:
response = self.io_util.synchronous_request(request_body, prompt=prompt)
response = self.io_util.synchronous_request(request_body, prompt=prompt, context=context.to_prompt_string())
try:
json_result = json.loads(parse_utils.sanitize_json(response))
text = json_result["response"]
Expand Down Expand Up @@ -149,13 +146,13 @@ def perform_reaction(self, action: str, character_name: str, acting_character_na
def free_form_action(self, action_context: ActionContext):
prompt = self.pre_prompt
prompt += self.free_form_action_prompt.format(
context=action_context.to_prompt_string(),
context = '',
character_name=action_context.character_name,
action_template=self.action_template)
request_body = deepcopy(self.default_body)
request_body['grammar'] = self.json_grammar
try :
text = self.io_util.synchronous_request(request_body, prompt=prompt)
text = self.io_util.synchronous_request(request_body, prompt=prompt, context=action_context.to_prompt_string())
if not text:
return None
response = json.loads(parse_utils.sanitize_json(text))
Expand Down
147 changes: 147 additions & 0 deletions tale/llm/io_adapters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@

from abc import ABC, abstractmethod
import asyncio
import json
import time

import aiohttp
import requests

from tale.errors import LlmResponseException


class AbstractIoAdapter(ABC):

def __init__(self, url: str, stream_endpoint: str, user_start_prompt: str, user_end_prompt: str):
self.url = url
self.stream_endpoint = stream_endpoint
self.user_start_prompt = user_start_prompt
self.user_end_prompt = user_end_prompt

@abstractmethod
def stream_request(self, request_body: dict, io = None, wait: bool = False) -> str:
pass

@abstractmethod
async def _do_stream_request(self, url: str, request_body: dict,) -> bool:
pass

@abstractmethod
def _parse_result(self, result: str) -> str:
pass

@abstractmethod
def _set_prompt(self, request_body: dict, prompt: str, context: str = '') -> dict:
pass

class KoboldCppAdapter(AbstractIoAdapter):

def __init__(self, url: str, stream_endpoint: str, data_endpoint: str, user_start_prompt: str, user_end_prompt: str):
super().__init__(url, stream_endpoint, user_start_prompt, user_end_prompt)
self.data_endpoint = data_endpoint

def stream_request(self, request_body: dict, io = None, wait: bool = False) -> str:
result = asyncio.run(self._do_stream_request(self.url + self.stream_endpoint, request_body))

try:
if result:
return self._do_process_result(self.url + self.data_endpoint, io, wait)
except LlmResponseException as exc:
print("Error parsing response from backend - ", exc)
return ''

async def _do_stream_request(self, url: str, request_body: dict,) -> bool:
""" Send request to stream endpoint async to not block the main thread"""
async with aiohttp.ClientSession() as session:
async with session.post(url, data=json.dumps(request_body)) as response:
if response.status == 200:
return True
else:
print("Error occurred:", response.status)

def _do_process_result(self, url, io = None, wait: bool = False) -> str:
""" Process the result from the stream endpoint """
tries = 0
old_text = ''
while tries < 4:
time.sleep(0.25)
data = requests.post(url)

text = json.loads(data.text)['results'][0]['text']

if len(text) == len(old_text):
tries += 1
continue
if not wait:
new_text = text[len(old_text):]
io.output_no_newline(new_text, new_paragraph=False)
old_text = text
return old_text

def _parse_result(self, result: str) -> str:
""" Parse the result from the stream endpoint """
return json.loads(result)['results'][0]['text']

def _set_prompt(self, request_body: dict, prompt: str, context: str = '') -> dict:
if self.user_start_prompt:
prompt = prompt.replace('[USER_START]', self.user_start_prompt)
if self.user_end_prompt:
prompt = prompt + self.user_end_prompt
prompt.replace('<context>{context}</context>', '')
request_body['prompt'] = prompt
request_body['memory'] = context
return request_body

class LlamaCppAdapter(AbstractIoAdapter):

def stream_request(self, request_body: dict, io = None, wait: bool = False) -> str:
return asyncio.run(self._do_stream_request(self.url + self.stream_endpoint, request_body, io = io))

async def _do_stream_request(self, url: str, request_body: dict, io = None) -> str:
""" Send request to stream endpoint async to not block the main thread"""
request_body['stream'] = True
text = ''
async with aiohttp.ClientSession() as session:
async with session.post(url, data=json.dumps(request_body)) as response:
if response.status != 200:
print("Error occurred:", response.status)
return False
async for chunk in response.content.iter_any():
decoded = chunk.decode('utf-8')
lines = decoded.split('\n')
for line in lines:
# Ignore empty lines
if not line.strip():
continue
key, value = line.split(':', 1)
key = key.strip()
value = value.strip()
if key == 'data':
data = json.loads(value)
choice = data['choices'][0]['delta']
content = choice.get('content', None)

if content:
io.output_no_newline(content, new_paragraph=False)
text += content
#while len(lines) == 0:
# await asyncio.sleep(0.05)

return text

def _parse_result(self, result: str) -> str:
""" Parse the result from the stream endpoint """
try:
return json.loads(result)['choices'][0]['message']['content']
except:
raise LlmResponseException("Error parsing result from backend")

def _set_prompt(self, request_body: dict, prompt: str, context: str = '') -> dict:
if self.user_start_prompt:
prompt = prompt.replace('[USER_START]', self.user_start_prompt)
if self.user_end_prompt:
prompt = prompt + self.user_end_prompt
if context:
prompt = prompt.format(context=context)
request_body['messages'][1]['content'] = prompt
return request_body
111 changes: 20 additions & 91 deletions tale/llm/llm_io.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
import re
import requests
import time
import aiohttp
import asyncio
import json
from tale.errors import LlmResponseException
import tale.parse_utils as parse_utils
from tale.player_utils import TextBuffer
from tale.llm.io_adapters import KoboldCppAdapter, LlamaCppAdapter

class IoUtil():
""" Handles connection and data retrieval from backend """
Expand All @@ -19,107 +14,41 @@ def __init__(self, config: dict = None, backend_config: dict = None):
self.url = backend_config['URL']
self.endpoint = backend_config['ENDPOINT']


if self.backend != 'kobold_cpp':
headers = json.loads(backend_config['OPENAI_HEADERS'])
headers['Authorization'] = f"Bearer {backend_config['OPENAI_API_KEY']}"
self.openai_json_format = json.loads(backend_config['OPENAI_JSON_FORMAT'])
self.headers = headers
self.io_adapter = LlamaCppAdapter(self.url, backend_config['STREAM_ENDPOINT'], config['USER_START'], config['USER_END'])
else:
self.io_adapter = KoboldCppAdapter(self.url, backend_config['STREAM_ENDPOINT'], backend_config['DATA_ENDPOINT'], config['USER_START'], config['USER_END'])
self.headers = {}

self.stream = backend_config['STREAM']
if self.stream:
self.stream_endpoint = backend_config['STREAM_ENDPOINT']
self.data_endpoint = backend_config['DATA_ENDPOINT']
self.user_start_prompt = config['USER_START']
self.user_end_prompt = config['USER_END']

def synchronous_request(self, request_body: dict, prompt: str) -> str:

def synchronous_request(self, request_body: dict, prompt: str, context: str = '') -> str:
""" Send request to backend and return the result """
if request_body.get('grammar', None) and 'openai' in self.url:
# TODO: temp fix for openai
request_body.pop('grammar')
request_body['response_format'] = self.openai_json_format
self._set_prompt(request_body, prompt)
request_body = self.io_adapter._set_prompt(request_body, prompt, context)
print(request_body)
response = requests.post(self.url + self.endpoint, headers=self.headers, data=json.dumps(request_body))
try:
if self.backend == 'kobold_cpp':
parsed_response = self._parse_kobold_result(response.text)
else:
parsed_response = self._parse_openai_result(response.text)
except LlmResponseException as exc:
print("Error parsing response from backend - ", exc)
return ''
return parsed_response
if response.status_code == 200:
return self.io_adapter._parse_result(response.text)
return ''

def asynchronous_request(self, request_body: dict, prompt: str) -> str:
def asynchronous_request(self, request_body: dict, prompt: str, context: str = '') -> str:
if self.backend != 'kobold_cpp':
return self.synchronous_request(request_body, prompt)
return self.stream_request(request_body, wait=True, prompt=prompt)

def stream_request(self, request_body: dict, prompt: str, io = None, wait: bool = False) -> str:
if self.backend != 'kobold_cpp':
raise NotImplementedError("Currently does not support streaming requests for OpenAI")
self._set_prompt(request_body, prompt)
result = asyncio.run(self._do_stream_request(self.url + self.stream_endpoint, request_body))
if result:
return self._do_process_result(self.url + self.data_endpoint, io, wait)
return ''
return self.synchronous_request(request_body=request_body, prompt=prompt, context=context)
return self.stream_request(request_body, wait=True, prompt=prompt, context=context)

async def _do_stream_request(self, url: str, request_body: dict,) -> bool:
""" Send request to stream endpoint async to not block the main thread"""
async with aiohttp.ClientSession() as session:
async with session.post(url, data=json.dumps(request_body)) as response:
if response.status == 200:
return True
else:
# Handle errors
print("Error occurred:", response.status)
def stream_request(self, request_body: dict, prompt: str, context: str = '', io = None, wait: bool = False) -> str:
if self.io_adapter:
request_body = self.io_adapter._set_prompt(request_body, prompt, context)
return self.io_adapter.stream_request(request_body, io, wait)
# fall back if no io adapter
return self.synchronous_request(request_body=request_body, prompt=prompt, context=context)

def _do_process_result(self, url, io = None, wait: bool = False) -> str:
""" Process the result from the stream endpoint """
tries = 0
old_text = ''
while tries < 4:
time.sleep(0.5)
data = requests.post(url)
text = self._parse_kobold_result(data.text)

if len(text) == len(old_text):
tries += 1
continue
if not wait:
new_text = text[len(old_text):]
io.output_no_newline(new_text, new_paragraph=False)
old_text = text
return old_text

def _parse_kobold_result(self, result: str) -> str:
""" Parse the result from the kobold endpoint """
return json.loads(result)['results'][0]['text']

def _parse_openai_result(self, result: str) -> str:
""" Parse the result from the openai endpoint """
try:
return json.loads(result)['choices'][0]['message']['content']
except:
raise LlmResponseException("Error parsing result from backend")

def _set_prompt(self, request_body: dict, prompt: str) -> dict:
if self.user_start_prompt:
prompt = prompt.replace('[USER_START]', self.user_start_prompt)
if self.user_end_prompt:
prompt = prompt + self.user_end_prompt
if self.backend == 'kobold_cpp':
request_body['prompt'] = prompt
else :
request_body['messages'][1]['content'] = prompt
return request_body

def _extract_context(self, full_string):
pattern = re.escape('<context>') + "(.*?)" + re.escape('</context>')
match = re.search(pattern, full_string, re.DOTALL)
if match:
return '<context>' + match.group(1) + '</context>'
else:
return ''
Loading