Skip to content

Commit ae9d7bd

Browse files
authored
Merge pull request #5 from Zhou-Shilin/main
feat!: Add advanced mode support
2 parents 9b1973c + 21f9c13 commit ae9d7bd

File tree

5 files changed

+205
-17
lines changed

5 files changed

+205
-17
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
__pycache__
12
generated/*
23

34
logs/*
45
test.py
5-
__pycache__
6+
_config.yaml

config.yaml

+84-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,34 @@
1+
########## EDIT REQUIRED ##########
2+
13
# GPT SETTINGS #
2-
# EDIT REQUIRED
34
# Get your api key from openai. Remember google/bing is always your best friend.
45
# Model names: gpt-4-turbo-preview, gpt-3.5-turbo, etc.
56
# Recommend -> gpt-4-turbo-preview, which codes more accurately and is less likely to write bugs, but is more expensive.
67

7-
API_KEY: ""
8+
API_KEY: "" # Free API Key with GPT-4 access: https://github.com/CubeGPT/.github/discussions/1
89
BASE_URL: "https://api.openai.com/v1/chat/completions"
9-
GENERATE_MODEL: "gpt-4-turbo-2024-04-09" # Don't use gpt-4, because this model is longer supports json modes.
10+
11+
GENERATE_MODEL: "gpt-4-turbo-preview" # Don't use gpt-4, because this model is longer supports json modes.
12+
13+
14+
# ADVANCED MODE #
15+
# This mode is experimental. But we highly recommend you to enable this mode for better performance.
16+
ADVANCED_MODE: True
17+
IMAGE_GENERATION_MODEL: "dall-e-3"
18+
IMAGE_SIZE: "1024x1024"
19+
VISION_MODEL: "gpt-4-vision-preview"
20+
21+
# Note: If you are using the free API key above, you can't use the advanced mode since it doesn't support dall-e-3 and gpt-4-vision-preview models.
22+
USE_DIFFERENT_APIKEY_FOR_DALLE_MODEL: False
23+
DALLE_API_KEY: ""
24+
DALLE_BASE_URL: "https://api.openai.com/v1/chat/completions"
25+
26+
USE_DIFFERENT_APIKEY_FOR_VISION_MODEL: False
27+
VISION_API_KEY: ""
28+
VISION_BASE_URL: "https://api.openai.com/v1/chat/completions"
29+
30+
31+
########## EDIT OPTIONAL ##########
1032

1133
# PROMPT SETTINGS #
1234
# If you don't know what it is, please don't touch it. Be sure to backup before editing.
@@ -48,6 +70,65 @@ SYS_GEN: |
4870
USR_GEN: |
4971
%DESCRIPTION%
5072
73+
## Advanced Mode ##
74+
75+
### Programme ###
76+
BTR_DESC_SYS_GEN: |
77+
You are an minecraft schematic designer. Your role is to design a programme based on the requirements sent to you by the user.
78+
For exmaple,
79+
User input: "A cafe."
80+
Response: "A small cafe with a modern design, red roof and brown door, big windows. Inside, there's two tables and a bar."
81+
82+
BTR_DESC_USR_GEN: |
83+
%DESCRIPTION%
84+
85+
### Image Tag Generation ###
86+
IMG_TAG_SYS_GEN: |
87+
You work for a minecraft schematic company and you need to use AI to generate the design image based on designer's architectural programme. Please response the tags you'd like to use for the image generation.
88+
Never response anything else.
89+
Example resposne: "A minecraft building with a modern design, red roof and brown door, big windows."
90+
91+
IMG_TAG_USR_GEN: |
92+
Designer's programme: %PROGRAMME%
93+
94+
### Stucture Generation (Advanced with gpt-4-vision) ###
95+
SYS_GEN_ADV: |
96+
You are a minecraft structure builder bot. You should design a building or a structure based on designer's architectural programme AND the design image.
97+
Response in json like this:
98+
{
99+
"materials": [
100+
"A: \"minecraft:air\"",
101+
"S: \"minecraft:stone\""
102+
],
103+
"structures": [
104+
{
105+
"floor": 0,
106+
"structure": "SSSSSSSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS"
107+
},
108+
{
109+
"floor": 1,
110+
"structure": "SSGGGGSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS"
111+
},
112+
{
113+
"floor": 2,
114+
"structure": "SSGGGGSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS"
115+
},
116+
{
117+
"floor": 3,
118+
"structure": "SSSSSSSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS"
119+
},
120+
{
121+
"floor": 4,
122+
"structure": "SSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\n"
123+
}
124+
]
125+
}
126+
Never response anything else. Do not design a building which is too large (more than 10 floors). Never use markdown format. Use \n for line feed.
127+
128+
USR_GEN_ADV: |
129+
%DESCRIPTION%
130+
The image is attached below.
131+
51132
# Developer Settings #
52133
DEBUG_MODE: True
53134
VERSION_NUMBER: "Alpha-1.0" #NEVER EDIT THIS IF YOU DON'T KNOW WHAT ARE YOU DOING

console.py

+23-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,24 @@ def generate_plugin(description):
3737

3838
return schem
3939

40+
def get_schematic_advanced(description):
41+
print("(Advanced Mode) Generating programme...")
42+
programme = core.askgpt(config.BTR_DESC_SYS_GEN, config.BTR_DESC_USR_GEN.replace("%DESCRIPTION%", description), config.GENERATE_MODEL, disable_json_mode=True)
43+
44+
print("(Advanced Mode) Generating image tag...")
45+
image_tag = core.askgpt(config.IMG_TAG_SYS_GEN, config.IMG_TAG_USR_GEN.replace("%PROGRAMME%", programme), config.GENERATE_MODEL, disable_json_mode=True)
46+
47+
print("(Advanced Mode) Generating image...")
48+
tag = image_tag + ", minecraft)"
49+
image_url = core.ask_dall_e(tag)
50+
51+
print("(Advanced Mode) Generating schematic...")
52+
response = core.askgpt(config.SYS_GEN_ADV, config.USR_GEN_ADV.replace("%DESCRIPTION%", description), config.VISION_MODEL, image_url=image_url)
53+
54+
schem = core.text_to_schem(response)
55+
56+
return schem
57+
4058
if __name__ == "__main__":
4159
core.initialize()
4260

@@ -54,7 +72,11 @@ def generate_plugin(description):
5472

5573
print("Generating...")
5674

57-
schem = generate_plugin(description)
75+
if config.ADVANCED_MODE:
76+
print("Advanced mode is enabled. Generating a schematic with advanced features.")
77+
schem = get_schematic_advanced(description)
78+
else:
79+
schem = generate_plugin(description)
5880

5981
logger(f"console: Saving {name}.schem to generated/ folder.")
6082
version_tag = core.input_version_to_mcs_tag(version)

core.py

+69-11
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
import mcschematic
33
import sys
44
import json
5+
import requests
6+
import base64
7+
import uuid
58

69
from log_writer import logger
710
import config
@@ -20,35 +23,59 @@ def initialize():
2023
"""
2124
logger(f"Launch. Software version {config.VERSION_NUMBER}, platform {sys.platform}")
2225

23-
def askgpt(system_prompt: str, user_prompt: str, model_name: str):
26+
def askgpt(system_prompt: str, user_prompt: str, model_name: str, disable_json_mode: bool = False, image_url: str = None):
2427
"""
2528
Interacts with ChatGPT using the specified prompts.
2629
2730
Args:
2831
system_prompt (str): The system prompt.
2932
user_prompt (str): The user prompt.
33+
model_name (str): The model name to use.
34+
disable_json_mode (bool): Whether to disable JSON mode.
3035
3136
Returns:
3237
str: The response from ChatGPT.
3338
"""
34-
client = OpenAI(api_key=config.API_KEY, base_url=config.BASE_URL)
39+
if image_url is not None and config.USE_DIFFERENT_APIKEY_FOR_VISION_MODEL:
40+
logger("Using different API key for vision model.")
41+
client = OpenAI(api_key=config.VISION_API_KEY, base_url=config.VISION_BASE_URL)
42+
else:
43+
client = OpenAI(api_key=config.API_KEY, base_url=config.BASE_URL)
44+
3545
logger("Initialized the OpenAI client.")
3646

3747
# Define the messages for the conversation
38-
messages = [
39-
{"role": "system", "content": system_prompt},
40-
{"role": "user", "content": user_prompt}
41-
]
48+
if image_url is not None:
49+
messages = [
50+
{"role": "system", "content": system_prompt},
51+
{"role": "user", "content": [
52+
{"type": "text", "text": user_prompt},
53+
{"type": "image_url", "image_url": {"url": image_url}}
54+
]
55+
}
56+
]
57+
else:
58+
messages = [
59+
{"role": "system", "content": system_prompt},
60+
{"role": "user", "content": user_prompt}
61+
]
62+
4263

4364
logger(f"askgpt: system {system_prompt}")
4465
logger(f"askgpt: user {user_prompt}")
4566

4667
# Create a chat completion
47-
response = client.chat.completions.create(
48-
model=model_name,
49-
response_format={"type": "json_object"},
50-
messages=messages
51-
)
68+
if disable_json_mode:
69+
response = client.chat.completions.create(
70+
model=model_name,
71+
messages=messages
72+
)
73+
else:
74+
response = client.chat.completions.create(
75+
model=model_name,
76+
response_format={"type": "json_object"},
77+
messages=messages
78+
)
5279

5380
logger(f"askgpt: response {response}")
5481

@@ -57,6 +84,37 @@ def askgpt(system_prompt: str, user_prompt: str, model_name: str):
5784
logger(f"askgpt: extracted reply {assistant_reply}")
5885
return assistant_reply
5986

87+
def ask_dall_e(description: str):
88+
"""
89+
Generates a design image using the DALL-E API.
90+
91+
Args:
92+
description (str): The prompt or description for generating the image.
93+
94+
Returns:
95+
str: The URL of the generated image.
96+
"""
97+
if config.USE_DIFFERENT_APIKEY_FOR_DALLE_MODEL:
98+
client = OpenAI(api_key=config.DALLE_API_KEY, base_url=config.DALLE_BASE_URL)
99+
else:
100+
client = OpenAI(api_key=config.API_KEY, base_url=config.BASE_URL)
101+
102+
logger("ask_dall_e: Generating design image using DALL-E API.")
103+
104+
response = client.images.generate(
105+
model=config.IMAGE_GENERATION_MODEL,
106+
prompt=description,
107+
size=config.IMAGE_SIZE,
108+
quality="standard",
109+
n=1,
110+
)
111+
112+
image_url = response.data[0].url
113+
114+
logger(f"ask_dall_e: Generated image URL {image_url}")
115+
116+
return image_url
117+
60118
def text_to_schem(text: str):
61119
"""
62120
Converts a JSON string to a Minecraft schematic.

ui.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,24 @@ def get_schematic(description):
2929

3030
return schem
3131

32+
def get_schematic_advanced(description):
33+
print("(Advanced Mode) Generating programme...")
34+
programme = core.askgpt(config.BTR_DESC_SYS_GEN, config.BTR_DESC_USR_GEN.replace("%DESCRIPTION%", description), config.GENERATE_MODEL, disable_json_mode=True)
35+
36+
print("(Advanced Mode) Generating image tag...")
37+
image_tag = core.askgpt(config.IMG_TAG_SYS_GEN, config.IMG_TAG_USR_GEN.replace("%PROGRAMME%", programme), config.GENERATE_MODEL, disable_json_mode=True)
38+
39+
print("(Advanced Mode) Generating image...")
40+
tag = image_tag + ", minecraft)"
41+
image_url = core.ask_dall_e(tag)
42+
43+
print("(Advanced Mode) Generating schematic...")
44+
response = core.askgpt(config.SYS_GEN_ADV, config.USR_GEN_ADV.replace("%DESCRIPTION%", description), config.VISION_MODEL, image_url=image_url)
45+
46+
schem = core.text_to_schem(response)
47+
48+
return schem
49+
3250
def generate_schematic():
3351
"""
3452
Generates a schematic file based on user input.
@@ -42,6 +60,11 @@ def generate_schematic():
4260
"""
4361
generate_button.config(state=tk.DISABLED, text="Generating...")
4462

63+
if config.ADVANCED_MODE:
64+
msgbox.showwarning("Warning", "You are using advanced mode. This mode will generate schematic with higher quality, but it may take longer to generate.")
65+
66+
msgbox.showinfo("Info", "It is expected to take 30 seconds to 5 minutes. The programme may \"not responding\", this is normal, just be patient. DO NOT CLOSE THE PROGRAM. Click the button below to start generating.")
67+
4568
version = version_entry.get()
4669
name = name_entry.get()
4770
description = description_entry.get()
@@ -50,7 +73,10 @@ def generate_schematic():
5073
logger(f"console: input name {name}")
5174
logger(f"console: input description {description}")
5275

53-
schem = get_schematic(description)
76+
if config.ADVANCED_MODE:
77+
schem = get_schematic_advanced(description)
78+
else:
79+
schem = get_schematic(description)
5480

5581
logger(f"console: Saving {name}.schem to generated/ folder.")
5682
version_tag = core.input_version_to_mcs_tag(version)

0 commit comments

Comments
 (0)