Skip to content

Commit 0ba8059

Browse files
authored
Merge pull request #30 from krillinai/feat-aitool
Feat aitool
2 parents f19c5b6 + d173bde commit 0ba8059

File tree

6 files changed

+25
-8
lines changed

6 files changed

+25
-8
lines changed

.gitignore

+1-2
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,4 @@ bin/
77
models/
88
uploads/
99
app.log
10-
build/
11-
models/
10+
build/

internal/service/subtitle_service.go

+17-2
Original file line numberDiff line numberDiff line change
@@ -444,19 +444,31 @@ func (s Service) splitAudio(ctx context.Context, stepParam *types.SubtitleTaskSt
444444
func (s Service) audioToSrt(ctx context.Context, stepParam *types.SubtitleTaskStepParam) error {
445445
log.GetLogger().Info("audioToSubtitle.audioToSrt start", zap.Any("taskId", stepParam.TaskId))
446446
var (
447+
cancel context.CancelFunc
447448
stepNum = 0
448449
parallelControlChan = make(chan struct{}, config.Conf.App.TranslateParallelNum)
449-
eg errgroup.Group
450+
eg *errgroup.Group
450451
stepNumMu sync.Mutex
451452
err error
452453
)
454+
ctx, cancel = context.WithCancel(ctx)
455+
defer cancel()
456+
eg, ctx = errgroup.WithContext(ctx)
453457
for _, audioFileItem := range stepParam.SmallAudios {
454458
parallelControlChan <- struct{}{}
455459
audioFile := audioFileItem
456460
eg.Go(func() error {
457461
defer func() {
458462
<-parallelControlChan
463+
if r := recover(); r != nil {
464+
log.GetLogger().Error("audioToSubtitle.audioToSrt panic recovered", zap.Any("recover", r))
465+
}
459466
}()
467+
select {
468+
case <-ctx.Done():
469+
return ctx.Err()
470+
default:
471+
}
460472
// 语音转文字
461473
var transcriptionData *types.TranscriptionData
462474
for i := 0; i < 3; i++ {
@@ -470,6 +482,7 @@ func (s Service) audioToSrt(ctx context.Context, stepParam *types.SubtitleTaskSt
470482
}
471483
}
472484
if err != nil {
485+
cancel()
473486
log.GetLogger().Error("audioToSubtitle.audioToSrt.Transcription err", zap.Any("stepParam", stepParam), zap.Error(err))
474487
return err
475488
}
@@ -486,6 +499,7 @@ func (s Service) audioToSrt(ctx context.Context, stepParam *types.SubtitleTaskSt
486499
// 拆分字幕并翻译
487500
err = s.splitTextAndTranslate(stepParam.TaskId, stepParam.TaskBasePath, stepParam.TargetLanguage, stepParam.EnableModalFilter, audioFile)
488501
if err != nil {
502+
cancel()
489503
log.GetLogger().Error("audioToSubtitle.audioToSrt.splitTextAndTranslate err", zap.Any("stepParam", stepParam), zap.Error(err))
490504
return err
491505
}
@@ -500,6 +514,7 @@ func (s Service) audioToSrt(ctx context.Context, stepParam *types.SubtitleTaskSt
500514
// 生成时间戳
501515
err = s.generateTimestamps(stepParam.TaskId, stepParam.TaskBasePath, stepParam.OriginLanguage, stepParam.SubtitleResultType, audioFile)
502516
if err != nil {
517+
cancel()
503518
log.GetLogger().Error("audioToSubtitle.audioToSrt.generateTimestamps err", zap.Any("stepParam", stepParam), zap.Error(err))
504519
return err
505520
}
@@ -676,7 +691,7 @@ func getSentenceTimestamps(words []types.Word, sentence string, lastTs float64,
676691
var srtSt types.SrtSentence
677692
var sentenceWordList []string
678693
sentenceWords := make([]types.Word, 0)
679-
if language == types.LanguageNameEnglish || language == types.LanguageNameGerman { // 处理方式不同
694+
if language == types.LanguageNameEnglish || language == types.LanguageNameGerman || language == types.LanguageNameTurkish { // 处理方式不同
680695
sentenceWordList = util.SplitSentence(sentence)
681696
if len(sentenceWordList) == 0 {
682697
return srtSt, sentenceWords, 0, fmt.Errorf("sentence is empty")

pkg/aliyun/asr.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"github.com/gorilla/websocket"
88
"go.uber.org/zap"
99
"io"
10+
"krillin-ai/internal/storage"
1011
"krillin-ai/internal/types"
1112
"krillin-ai/log"
1213
"net/http"
@@ -154,7 +155,7 @@ type Event struct {
154155
func processAudio(filePath string) (string, error) {
155156
dest := strings.ReplaceAll(filePath, filepath.Ext(filePath), "_mono_16K.mp3")
156157
cmdArgs := []string{"-i", filePath, "-ac", "1", "-ar", "16000", "-b:a", "192k", dest}
157-
cmd := exec.Command("ffmpeg", cmdArgs...)
158+
cmd := exec.Command(storage.FfmpegPath, cmdArgs...)
158159
output, err := cmd.CombinedOutput()
159160
if err != nil {
160161
log.GetLogger().Error("处理音频失败", zap.Error(err), zap.String("audio file", filePath), zap.String("output", string(output)))

pkg/aliyun/voice_clone.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ func (c *VoiceCloneClient) CosyVoiceClone(voicePrefix, audioURL string) (string,
107107
}
108108
log.GetLogger().Info("CosyVoiceClone请求完毕", zap.String("Response", resp.String()))
109109
if res.Message != "SUCCESS" {
110-
log.GetLogger().Error("CosyVoiceClone请求响应错误", zap.String("Message", res.Message))
110+
log.GetLogger().Error("CosyVoiceClone请求响应错误", zap.String("Request Id", res.RequestId), zap.Int("Code", res.Code), zap.String("Message", res.Message))
111111
return "", fmt.Errorf("CosyVoiceClone请求响应错误: %s", res.Message)
112112
}
113113
return res.VoiceName, nil

pkg/util/subtitle.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,8 @@ func ParseSrtNoTsToSrtBlock(srtNoTsFile string) ([]*SrtBlock, error) {
180180
}
181181

182182
func SplitSentence(sentence string) []string {
183-
// 使用正则表达式移除标点符号和特殊字符(除了字母、数字和空格)
184-
re := regexp.MustCompile(`[^\w\s']+`)
183+
// 使用正则表达式移除标点符号和特殊字符(保留各语言字母、数字和空格)
184+
re := regexp.MustCompile(`[^\p{L}\p{N}\s']+`)
185185
cleanedSentence := re.ReplaceAllString(sentence, " ")
186186

187187
// 使用 strings.Fields 按空格拆分成单词

static/index.html

+2
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,8 @@ <h1>世界帧精彩</h1>
229229
<option value="zh_cn">简体中文</option>
230230
<option value="en">英文</option>
231231
<option value="ja">日文</option>
232+
<option value="tr">土耳其语</option>
233+
<option value="de">德语</option>
232234
</select>
233235
</div>
234236

0 commit comments

Comments
 (0)