Skip to content

Commit 93d2b42

Browse files
authored
Merge pull request #13 from mutablelogic/dev
Added image and tts generation
2 parents f880468 + 8800125 commit 93d2b42

27 files changed

+951
-91
lines changed

7283d94c6a7e5bc91e7875ccf51a96d3.m2a

25.8 KB
Binary file not shown.

README.md

+61-2
Original file line numberDiff line numberDiff line change
@@ -411,15 +411,74 @@ Commands:
411411
agents Return a list of agents
412412
models Return a list of models
413413
tools Return a list of tools
414-
download Download a model
414+
download Download a model (for Ollama)
415415
chat Start a chat session
416-
complete Complete a prompt
416+
complete Complete a prompt, generate image or speech from text
417417
embedding Generate an embedding
418418
version Print the version of this tool
419419
420420
Run "llm <command> --help" for more information on a command.
421421
```
422422

423+
### Prompt Completion
424+
425+
To have the model respond to a prompt, you can use the `complete` command. For example, to
426+
have the model respond to the prompt "What is the capital of France?" using the `claude-3-5-haiku-20241022`
427+
model, you can use the following command:
428+
429+
```bash
430+
llm complete "What is the capital of France?"
431+
```
432+
433+
The first time you use the command use the ``--model`` flag to specify the model you want to use. Your
434+
choice of model will be remembered for subsequent completions.
435+
436+
### Explain computer code
437+
438+
To have the model explain a piece of computer code, you can pipe the code into the `complete` command.
439+
For example, to have the model explain the code in the file `example.go`, you can use the following command:
440+
441+
```bash
442+
cat example.go | llm complete
443+
```
444+
445+
### Caption an image
446+
447+
To have the model generate a caption for an image, you can use the `complete` command with the `--file`
448+
flag. For example, to have the model generate a caption for the image in the file `example.jpg`, you can use
449+
the following command:
450+
451+
```bash
452+
llm complete --file picture.png "Explain this image"
453+
```
454+
455+
### Generate an image
456+
457+
To have the model generate an image from a prompt, you can use the `complete` command with the `--format image`
458+
option. For example, to have the model generate an image from the prompt "A picture of a cat", you can use
459+
the following command:
460+
461+
```bash
462+
llm complete --model dall-e-3 --format image "A picture of a cat"
463+
```
464+
465+
Flags `--size`, `--quality` and `--style` can be used to specify the image parameters. It will write the image
466+
file in the current working directory.
467+
468+
### Convert text to speech
469+
470+
To have a model generate text from speech:
471+
472+
```bash
473+
echo book.txt | llm complete --model tts-1 --format mp3 --voice coral
474+
```
475+
476+
It will write the audio file in the current working directory. You can currently write
477+
the following audio formats and voices:
478+
479+
* Formats: `--format mp3`, `--format opus`, `--format aac`, `--format flac`, `--format wav`, `--format pcm`
480+
* Voices: `--voice alloy`, `--voice ash`, `--voice coral`, `--voice echo`, `--voice fable`, `--voice onyx`, `--voice nova`, `--voice sage`, `--voice shimmer`
481+
423482
## Contributing & Distribution
424483

425484
_This module is currently in development and subject to change_. Please do file

attachment.go

+107-32
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,42 @@
11
package llm
22

33
import (
4+
"crypto/md5"
45
"encoding/base64"
56
"encoding/json"
7+
"fmt"
68
"io"
79
"mime"
810
"net/http"
911
"os"
1012
"path/filepath"
13+
"strings"
1114
)
1215

1316
///////////////////////////////////////////////////////////////////////////////
1417
// TYPES
1518

19+
// General attachment metadata
1620
type AttachmentMeta struct {
1721
Id string `json:"id,omitempty"`
1822
Filename string `json:"filename,omitempty"`
1923
ExpiresAt uint64 `json:"expires_at,omitempty"`
2024
Caption string `json:"transcript,omitempty"`
2125
Data []byte `json:"data"`
26+
Type string `json:"type"`
27+
}
28+
29+
// OpenAI image metadata
30+
type ImageMeta struct {
31+
Url string `json:"url,omitempty"`
32+
Data []byte `json:"b64_json,omitempty"`
33+
Prompt string `json:"revised_prompt,omitempty"`
2234
}
2335

2436
// Attachment for messages
2537
type Attachment struct {
26-
meta AttachmentMeta
38+
meta *AttachmentMeta
39+
image *ImageMeta
2740
}
2841

2942
const (
@@ -38,21 +51,30 @@ func NewAttachment() *Attachment {
3851
return new(Attachment)
3952
}
4053

54+
// NewAttachment with OpenAI image
55+
func NewAttachmentWithImage(image *ImageMeta) *Attachment {
56+
return &Attachment{image: image}
57+
}
58+
4159
// ReadAttachment returns an attachment from a reader object.
4260
// It is the responsibility of the caller to close the reader.
43-
func ReadAttachment(r io.Reader) (*Attachment, error) {
44-
var filename string
61+
func ReadAttachment(r io.Reader, mimetype ...string) (*Attachment, error) {
62+
var filename, typ string
4563
data, err := io.ReadAll(r)
4664
if err != nil {
4765
return nil, err
4866
}
4967
if f, ok := r.(*os.File); ok {
5068
filename = f.Name()
5169
}
70+
if len(mimetype) > 0 {
71+
typ = mimetype[0]
72+
}
5273
return &Attachment{
53-
meta: AttachmentMeta{
74+
meta: &AttachmentMeta{
5475
Filename: filename,
5576
Data: data,
77+
Type: typ,
5678
},
5779
}, nil
5880
}
@@ -73,19 +95,27 @@ func (a *Attachment) MarshalJSON() ([]byte, error) {
7395
Filename string `json:"filename,omitempty"`
7496
Type string `json:"type"`
7597
Bytes uint64 `json:"bytes"`
76-
Caption string `json:"transcript,omitempty"`
98+
Hash string `json:"hash,omitempty"`
99+
Caption string `json:"caption,omitempty"`
77100
}
78-
j.Id = a.meta.Id
79-
j.Filename = a.meta.Filename
101+
80102
j.Type = a.Type()
81-
j.Bytes = uint64(len(a.meta.Data))
82-
j.Caption = a.meta.Caption
103+
j.Caption = a.Caption()
104+
j.Hash = a.Hash()
105+
j.Filename = a.Filename()
106+
if a.meta != nil {
107+
j.Id = a.meta.Id
108+
j.Bytes = uint64(len(a.meta.Data))
109+
} else if a.image != nil {
110+
j.Bytes = uint64(len(a.image.Data))
111+
}
112+
83113
return json.Marshal(j)
84114
}
85115

86116
// Stringify an attachment
87117
func (a *Attachment) String() string {
88-
data, err := json.MarshalIndent(a.meta, "", " ")
118+
data, err := json.MarshalIndent(a, "", " ")
89119
if err != nil {
90120
return err.Error()
91121
}
@@ -95,41 +125,83 @@ func (a *Attachment) String() string {
95125
////////////////////////////////////////////////////////////////////////////////
96126
// PUBLIC METHODS
97127

128+
// Compute and print the MD5 hash
129+
func (a *Attachment) Hash() string {
130+
hash := md5.New()
131+
hash.Write(a.Data())
132+
return fmt.Sprintf("%x", hash.Sum(nil))
133+
}
134+
135+
// Write out attachment
136+
func (a *Attachment) Write(w io.Writer) (int, error) {
137+
if a.meta != nil {
138+
return w.Write(a.meta.Data)
139+
}
140+
if a.image != nil {
141+
return w.Write(a.image.Data)
142+
}
143+
return 0, io.EOF
144+
}
145+
98146
// Return the filename of an attachment
99147
func (a *Attachment) Filename() string {
100-
return a.meta.Filename
148+
if a.meta != nil && a.meta.Filename != "" {
149+
return a.meta.Filename
150+
}
151+
// Obtain filename from MD5
152+
if ext, err := mime.ExtensionsByType(a.Type()); err == nil && len(ext) > 0 {
153+
return a.Hash() + ext[0]
154+
}
155+
return ""
101156
}
102157

103158
// Return the raw attachment data
104159
func (a *Attachment) Data() []byte {
105-
return a.meta.Data
160+
if a.meta != nil {
161+
return a.meta.Data
162+
}
163+
if a.image != nil {
164+
return a.image.Data
165+
}
166+
return nil
106167
}
107168

108169
// Return the caption for the attachment
109170
func (a *Attachment) Caption() string {
110-
return a.meta.Caption
171+
if a.meta != nil {
172+
return strings.TrimSpace(a.meta.Caption)
173+
}
174+
if a.image != nil {
175+
return strings.TrimSpace(a.image.Prompt)
176+
}
177+
return ""
111178
}
112179

113180
// Return the mime media type for the attachment, based
114181
// on the data and/or filename extension. Returns an empty string if
115182
// there is no data or filename
116183
func (a *Attachment) Type() string {
184+
// If there's a mimetype set, use this
185+
if a.meta != nil && a.meta.Type != "" {
186+
return a.meta.Type
187+
}
188+
117189
// If there's no data or filename, return empty
118-
if len(a.meta.Data) == 0 && a.meta.Filename == "" {
190+
if len(a.Data()) == 0 && a.Filename() == "" {
119191
return ""
120192
}
121193

122194
// Mimetype based on content
123195
mimetype := defaultMimetype
124-
if len(a.meta.Data) > 0 {
125-
mimetype = http.DetectContentType(a.meta.Data)
196+
if len(a.Data()) > 0 {
197+
mimetype = http.DetectContentType(a.Data())
126198
if mimetype != defaultMimetype {
127199
return mimetype
128200
}
129201
}
130202

131203
// Mimetype based on filename
132-
if a.meta.Filename != "" {
204+
if a.meta != nil && a.meta.Filename != "" {
133205
// Detect mimetype from extension
134206
mimetype = mime.TypeByExtension(filepath.Ext(a.meta.Filename))
135207
}
@@ -139,24 +211,27 @@ func (a *Attachment) Type() string {
139211
}
140212

141213
func (a *Attachment) Url() string {
142-
return "data:" + a.Type() + ";base64," + base64.StdEncoding.EncodeToString(a.meta.Data)
214+
return "data:" + a.Type() + ";base64," + base64.StdEncoding.EncodeToString(a.Data())
143215
}
144216

145217
// Streaming includes the ability to append data
146218
func (a *Attachment) Append(other *Attachment) {
147-
if other.meta.Id != "" {
148-
a.meta.Id = other.meta.Id
149-
}
150-
if other.meta.Filename != "" {
151-
a.meta.Filename = other.meta.Filename
152-
}
153-
if other.meta.ExpiresAt != 0 {
154-
a.meta.ExpiresAt = other.meta.ExpiresAt
155-
}
156-
if other.meta.Caption != "" {
157-
a.meta.Caption += other.meta.Caption
158-
}
159-
if len(other.meta.Data) > 0 {
160-
a.meta.Data = append(a.meta.Data, other.meta.Data...)
219+
if a.meta != nil {
220+
if other.meta.Id != "" {
221+
a.meta.Id = other.meta.Id
222+
}
223+
if other.meta.Filename != "" {
224+
a.meta.Filename = other.meta.Filename
225+
}
226+
if other.meta.ExpiresAt != 0 {
227+
a.meta.ExpiresAt = other.meta.ExpiresAt
228+
}
229+
if other.meta.Caption != "" {
230+
a.meta.Caption += other.meta.Caption
231+
}
232+
if len(other.meta.Data) > 0 {
233+
a.meta.Data = append(a.meta.Data, other.meta.Data...)
234+
}
161235
}
236+
// TODO: Append for image
162237
}

cmd/llm/chat.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ type ChatCmd struct {
2626
// PUBLIC METHODS
2727

2828
func (cmd *ChatCmd) Run(globals *Globals) error {
29-
return run(globals, cmd.Model, func(ctx context.Context, model llm.Model) error {
29+
return run(globals, AudioType, cmd.Model, func(ctx context.Context, model llm.Model) error {
3030
// Current buffer
3131
var buf string
3232

cmd/llm/chat2.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func NewTelegramServer(token string, model llm.Model, system string, toolkit llm
6363
// PUBLIC METHODS
6464

6565
func (cmd *Chat2Cmd) Run(globals *Globals) error {
66-
return run(globals, cmd.Model, func(ctx context.Context, model llm.Model) error {
66+
return run(globals, ChatType, cmd.Model, func(ctx context.Context, model llm.Model) error {
6767
server, err := NewTelegramServer(cmd.TelegramToken, model, cmd.System, globals.toolkit, telegram.WithDebug(globals.Debug))
6868
if err != nil {
6969
return err

0 commit comments

Comments
 (0)