Skip to content

Commit f2f031c

Browse files
authored
Merge pull request #11 from krillinai/feat-aitool
add aliyun support
2 parents c9ddce0 + 15cc875 commit f2f031c

16 files changed

+913
-130
lines changed

config/config-example.toml

+10-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
[app]
22
segment_duration = 5 # 音频分割间隔,单位:分钟
33
translate_parallel_num = 5 # 并发进行大模型翻译的数量上限
4-
proxy = "" # 网络代理
4+
proxy = "" # 网络代理地址,格式如http://127.0.0.1:7890
5+
transcribe_provider = "openai" # 语音识别供应商,当前可选值:openai,aliyun。强烈建议openai,选择后需要在下方配置供应商密钥等信息,下同
6+
llm_provider = "openai" # LLM供应商,当前可选值:openai
57

68
[server]
79
host = "127.0.0.1"
@@ -10,8 +12,10 @@
1012
[openai]
1113
api_key = "sk-XXX" # OpenAI API密钥
1214

13-
[aliyun] # 此处配置可以先忽略
14-
access_key_id = ""
15-
access_key_secret = ""
16-
app_key = ""
17-
cosy_voice_ws_addr = ""
15+
[aliyun]
16+
[aliyun.tts] # tts配置可以先忽略
17+
access_key_id = ""
18+
access_key_secret = ""
19+
app_key= ""
20+
[aliyun.asr]
21+
api_key = ""

config/config.go

+12-2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ type App struct {
1010
TranslateParallelNum int `toml:"translate_parallel_num"`
1111
Proxy string `toml:"proxy"`
1212
ParsedProxy *url.URL
13+
TranscribeProvider string `toml:"transcribe_provider"`
14+
LlmProvider string `toml:"llm_provider"`
1315
}
1416

1517
type Server struct {
@@ -21,11 +23,19 @@ type Openai struct {
2123
ApiKey string `toml:"api_key"`
2224
}
2325

24-
type Aliyun struct {
26+
type AliyunTts struct {
2527
AccessKeyId string `toml:"access_key_id"`
2628
AccessKeySecret string `toml:"access_key_secret"`
2729
AppKey string `toml:"app_key"`
28-
CosyVoiceWsAddr string `toml:"cosy_voice_ws_addr"`
30+
}
31+
32+
type AliyunBailian struct {
33+
ApiKey string `toml:"api_key"`
34+
}
35+
36+
type Aliyun struct {
37+
Tts AliyunTts `toml:"tts"`
38+
Bailian AliyunBailian `toml:"bailian"`
2939
}
3040

3141
type Config struct {

go.mod

+15-1
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@ toolchain go1.22.10
77
require (
88
github.com/BurntSushi/toml v1.3.2
99
github.com/aliyun/alibaba-cloud-sdk-go v1.63.72
10+
github.com/aliyun/alibabacloud-bailian-go-sdk v1.2.0
1011
github.com/gin-gonic/gin v1.10.0
1112
github.com/go-resty/resty/v2 v2.7.0
12-
github.com/google/uuid v1.3.0
13+
github.com/google/uuid v1.4.0
1314
github.com/gorilla/websocket v1.5.0
1415
github.com/samber/lo v1.38.1
1516
github.com/sashabaranov/go-openai v1.36.0
@@ -18,8 +19,20 @@ require (
1819
)
1920

2021
require (
22+
github.com/alibabacloud-go/alibabacloud-gateway-spi v0.0.4 // indirect
23+
github.com/alibabacloud-go/bailian-20230601 v1.1.0 // indirect
24+
github.com/alibabacloud-go/darabonba-openapi/v2 v2.0.2 // indirect
25+
github.com/alibabacloud-go/debug v0.0.0-20190504072949-9472017b5c68 // indirect
26+
github.com/alibabacloud-go/endpoint-util v1.1.0 // indirect
27+
github.com/alibabacloud-go/openapi-util v0.1.0 // indirect
28+
github.com/alibabacloud-go/tea v1.1.19 // indirect
29+
github.com/alibabacloud-go/tea-utils v1.3.1 // indirect
30+
github.com/alibabacloud-go/tea-utils/v2 v2.0.4 // indirect
31+
github.com/alibabacloud-go/tea-xml v1.1.2 // indirect
32+
github.com/aliyun/credentials-go v1.1.2 // indirect
2133
github.com/bytedance/sonic v1.11.6 // indirect
2234
github.com/bytedance/sonic/loader v0.1.1 // indirect
35+
github.com/clbanning/mxj/v2 v2.5.5 // indirect
2336
github.com/cloudwego/base64x v0.1.4 // indirect
2437
github.com/cloudwego/iasm v0.2.0 // indirect
2538
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
@@ -39,6 +52,7 @@ require (
3952
github.com/modern-go/reflect2 v1.0.2 // indirect
4053
github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect
4154
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
55+
github.com/tjfoc/gmsm v1.3.2 // indirect
4256
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
4357
github.com/ugorji/go/codec v1.2.12 // indirect
4458
go.uber.org/atomic v1.10.0 // indirect

go.sum

+83-2
Large diffs are not rendered by default.

internal/service/init.go

+30-2
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,47 @@
11
package service
22

33
import (
4+
"go.uber.org/zap"
45
"krillin-ai/config"
6+
"krillin-ai/internal/types"
7+
"krillin-ai/log"
58
"krillin-ai/pkg/aliyun"
69
"krillin-ai/pkg/openai"
710
)
811

912
type Service struct {
1013
OpenaiClient *openai.Client
11-
CosyCloneClient *aliyun.CosyCloneClient
14+
CosyCloneClient *aliyun.Client
15+
16+
Transcriber types.Transcriber
17+
ChatCompleter types.ChatCompleter
1218
}
1319

1420
func NewService() *Service {
21+
var transcriber types.Transcriber
22+
var chatCompleter types.ChatCompleter
23+
24+
switch config.Conf.App.TranscribeProvider {
25+
case "openai":
26+
transcriber = openai.NewClient(config.Conf.Openai.ApiKey, config.Conf.App.ParsedProxy)
27+
case "aliyun":
28+
transcriber = aliyun.NewClient()
29+
}
30+
log.GetLogger().Info("当前选择的转录源: ", zap.String("transcriber", config.Conf.App.TranscribeProvider))
31+
32+
switch config.Conf.App.LlmProvider {
33+
case "openai":
34+
chatCompleter = openai.NewClient(config.Conf.Openai.ApiKey, config.Conf.App.ParsedProxy)
35+
case "aliyun":
36+
chatCompleter = aliyun.NewChatClient(config.Conf.Aliyun.Bailian.ApiKey)
37+
}
38+
log.GetLogger().Info("当前选择的LLM源: ", zap.String("llm", config.Conf.App.LlmProvider))
39+
1540
return &Service{
1641
OpenaiClient: openai.NewClient(config.Conf.Openai.ApiKey, config.Conf.App.ParsedProxy),
17-
CosyCloneClient: aliyun.NewCosyCloneClient(),
42+
CosyCloneClient: aliyun.NewClient(),
43+
44+
Transcriber: transcriber,
45+
ChatCompleter: chatCompleter,
1846
}
1947
}

internal/service/subtitle_service.go

+10-11
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import (
1414
"krillin-ai/internal/storage"
1515
"krillin-ai/internal/types"
1616
"krillin-ai/log"
17-
"krillin-ai/pkg/openai"
1817
"krillin-ai/pkg/util"
1918
"os"
2019
"os/exec"
@@ -188,7 +187,7 @@ func (s Service) GetTaskStatus(req dto.GetVideoSubtitleTaskReq) (*dto.GetVideoSu
188187
}, nil
189188
}
190189

191-
// 新版流程:链接->本地音频文件->扣费->视频信息获取(若有)->本地字幕文件->cos上的字幕信息
190+
// 新版流程:链接->本地音频文件->视频信息获取(若有)->本地字幕文件->cos上的字幕信息
192191

193192
func (s Service) linkToAudioFile(ctx context.Context, stepParam *types.SubtitleTaskStepParam) error {
194193
var (
@@ -439,13 +438,13 @@ func (s Service) audioToSrt(ctx context.Context, stepParam *types.SubtitleTaskSt
439438
<-parallelControlChan
440439
}()
441440
// 语音转文字
442-
var transcriptionData *openai.TranscriptionData
441+
var transcriptionData *types.TranscriptionData
443442
for i := 0; i < 3; i++ {
444443
language := string(stepParam.OriginLanguage)
445444
if language == "zh_cn" {
446445
language = "zh" // 切换一下
447446
}
448-
transcriptionData, err = s.OpenaiClient.Transcription(audioFile.AudioFile, language)
447+
transcriptionData, err = s.Transcriber.Transcription(audioFile.AudioFile, language)
449448
if err == nil {
450449
break
451450
}
@@ -629,7 +628,7 @@ func (s Service) splitSrt(ctx context.Context, stepParam *types.SubtitleTaskStep
629628
return nil
630629
}
631630

632-
func getSentenceTimestamps(words []openai.Word, sentence string, lastTs float64, language types.StandardLanguageName) (types.SrtSentence, float64, error) {
631+
func getSentenceTimestamps(words []types.Word, sentence string, lastTs float64, language types.StandardLanguageName) (types.SrtSentence, float64, error) {
633632
var srtSt types.SrtSentence
634633
var sentenceWordList []string
635634
if language == types.LanguageNameEnglish || language == types.LanguageNameGerman { // 处理方式不同
@@ -638,7 +637,7 @@ func getSentenceTimestamps(words []openai.Word, sentence string, lastTs float64,
638637
return srtSt, 0, fmt.Errorf("sentence is empty")
639638
}
640639

641-
sentenceWords := make([]openai.Word, 0)
640+
sentenceWords := make([]types.Word, 0)
642641

643642
thisLastTs := lastTs
644643
sentenceWordIndex := 0
@@ -663,7 +662,7 @@ func getSentenceTimestamps(words []openai.Word, sentence string, lastTs float64,
663662
}
664663

665664
if sentenceWordIndex >= len(words) {
666-
sentenceWords = append(sentenceWords, openai.Word{
665+
sentenceWords = append(sentenceWords, types.Word{
667666
Text: sentenceWord,
668667
})
669668
sentenceWordIndex = 0
@@ -726,7 +725,7 @@ func getSentenceTimestamps(words []openai.Word, sentence string, lastTs float64,
726725
return srtSt, 0, fmt.Errorf("sentence is empty")
727726
}
728727

729-
sentenceWords := make([]openai.Word, 0)
728+
sentenceWords := make([]types.Word, 0)
730729

731730
thisLastTs := lastTs
732731
sentenceWordIndex := 0
@@ -768,7 +767,7 @@ func getSentenceTimestamps(words []openai.Word, sentence string, lastTs float64,
768767
}
769768

770769
// 找到 Num 值递增的最大连续子数组
771-
func findMaxIncreasingSubArray(words []openai.Word) (int, int) {
770+
func findMaxIncreasingSubArray(words []types.Word) (int, int) {
772771
if len(words) == 0 {
773772
return 0, 0
774773
}
@@ -805,7 +804,7 @@ func findMaxIncreasingSubArray(words []openai.Word) (int, int) {
805804
}
806805

807806
// 跳跃(非连续)找到 Num 值递增的最大子数组
808-
func jumpFindMaxIncreasingSubArray(words []openai.Word) (int, int) {
807+
func jumpFindMaxIncreasingSubArray(words []types.Word) (int, int) {
809808
if len(words) == 0 {
810809
return -1, -1
811810
}
@@ -923,7 +922,7 @@ func (s Service) splitTextAndTranslate(taskId, baseTaskPath string, targetLangua
923922
splitContent = ""
924923
} else {
925924
for i := 0; i < 3; i++ {
926-
splitContent, err = s.OpenaiClient.ChatCompletion(splitPrompt + audioFile.TranscriptionData.Text)
925+
splitContent, err = s.ChatCompleter.ChatCompletion(splitPrompt + audioFile.TranscriptionData.Text)
927926
if err == nil {
928927
break
929928
}

internal/types/interface.go

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package types
2+
3+
type ChatCompleter interface {
4+
ChatCompletion(query string) (string, error)
5+
}
6+
7+
type Transcriber interface {
8+
Transcription(audioFile, language string) (*TranscriptionData, error)
9+
}

internal/types/subtitle_task.go

+18-6
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
package types
22

3-
import (
4-
"krillin-ai/pkg/openai"
5-
)
6-
73
var SplitTextPrompt = `你是一个语言处理专家,擅长翻译和处理文本,按下面的要求,根据句意和标点对给出的内容进行拆分并翻译:
84
95
- 将原句子翻译成 %s
@@ -76,7 +72,7 @@ var TranslateVideoTitleAndDescriptionPrompt = `你是一个专业的翻译专家
7672
type SmallAudio struct {
7773
AudioFile string
7874
Num int
79-
TranscriptionData *openai.TranscriptionData
75+
TranscriptionData *TranscriptionData
8076
SrtNoTsFile string
8177
}
8278

@@ -140,6 +136,10 @@ const (
140136
TtsResultAudioFileName = "tts_final_audio.wav"
141137
)
142138

139+
const (
140+
AsrMono16kAudioFileName = "mono_16k_audio.mp3"
141+
)
142+
143143
type StandardLanguageName string
144144

145145
const (
@@ -269,8 +269,20 @@ type SubtitleTask struct {
269269
SrtNum int `json:"srt_num" gorm:"column:srt_num"` // 字幕数量
270270
SubtitleInfos []SubtitleInfo `gorm:"foreignKey:TaskId;references:TaskId"`
271271
Cover string `json:"cover" gorm:"column:cover"` // 封面
272-
Cost uint32 `json:"cost" gorm:"column:cost"` // 消耗的额度
273272
SpeechDownloadUrl string `json:"speech_download_url" gorm:"column:speech_download_url"` // 语音文件下载地址
274273
CreateTime int64 `json:"create_time" gorm:"column:create_time;autoCreateTime"` // 创建时间
275274
UpdateTime int64 `json:"update_time" gorm:"column:update_time;autoUpdateTime"` // 更新时间
276275
}
276+
277+
type Word struct {
278+
Num int
279+
Text string
280+
Start float64
281+
End float64
282+
}
283+
284+
type TranscriptionData struct {
285+
Language string
286+
Text string
287+
Words []Word
288+
}

main.go

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ func main() {
3535
err = util.CheckAndDownloadFfprobe()
3636
if err != nil {
3737
log.GetLogger().Error("ffprobe环境准备失败", zap.Error(err))
38+
return
3839
}
3940
err = util.CheckAndDownloadYtDlp()
4041
if err != nil {

pkg/aliyun/chat.go

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package aliyun
2+
3+
import (
4+
"context"
5+
goopenai "github.com/sashabaranov/go-openai"
6+
"go.uber.org/zap"
7+
"krillin-ai/log"
8+
)
9+
10+
type ChatClient struct {
11+
*goopenai.Client
12+
}
13+
14+
func NewChatClient(apiKey string) *ChatClient {
15+
cfg := goopenai.DefaultConfig(apiKey)
16+
cfg.BaseURL = "https://dashscope.aliyuncs.com/compatible-mode/v1" // 使用阿里云的openai兼容模式调用
17+
return &ChatClient{
18+
Client: goopenai.NewClientWithConfig(cfg),
19+
}
20+
}
21+
22+
func (c ChatClient) ChatCompletion(query string) (string, error) {
23+
req := goopenai.ChatCompletionRequest{
24+
Model: "qwen-plus",
25+
Messages: []goopenai.ChatCompletionMessage{
26+
{
27+
Role: goopenai.ChatMessageRoleSystem,
28+
Content: "You are an assistant that helps with subtitle translation.",
29+
},
30+
{
31+
Role: goopenai.ChatMessageRoleUser,
32+
Content: query,
33+
},
34+
},
35+
}
36+
37+
resp, err := c.CreateChatCompletion(context.Background(), req)
38+
if err != nil {
39+
log.GetLogger().Error("aliyun openai create chat completion failed", zap.Error(err))
40+
return "", err
41+
}
42+
43+
resContent := resp.Choices[0].Message.Content
44+
45+
return resContent, nil
46+
}

0 commit comments

Comments
 (0)