From 2f8ca2e4e6821ab54b0830a4e8dd29f25d836a3e Mon Sep 17 00:00:00 2001 From: Jacket <44538064+PRESIDENT810@users.noreply.github.com> Date: Wed, 3 Jul 2024 04:10:56 -0700 Subject: [PATCH 1/4] =?UTF-8?q?[Feat]=20=E6=B7=BB=E5=8A=A0=E6=89=B9?= =?UTF-8?q?=E9=87=8F=E4=B8=8B=E8=BD=BD=E6=96=87=E4=BB=B6=E5=A4=B9=E5=86=85?= =?UTF-8?q?=E5=85=A8=E9=83=A8=E6=96=87=E6=A1=A3=E7=9A=84=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=20(#121)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Initial support * Basically working * Use concurrency * Clean up * Integrate batch download into download functionality * Whoops --- README.md | 28 +++++++++- cmd/batchDownload.go | 130 +++++++++++++++++++++++++++++++++++++++++++ cmd/download.go | 44 ++++++++++----- cmd/main.go | 6 ++ core/client.go | 68 ++++++++++++++++++++++ core/client_test.go | 13 +++++ utils/common.go | 9 ++- 7 files changed, 280 insertions(+), 18 deletions(-) create mode 100644 cmd/batchDownload.go diff --git a/README.md b/README.md index 2f39615..cb13678 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,20 @@ --appId value Set app id for the OPEN API --appSecret value Set app secret for the OPEN API --help, -h show help (default: false) + + $ feishu2md dl -h + NAME: + feishu2md download - Download feishu/larksuite document to markdown file + +USAGE: + feishu2md download [command options] + +OPTIONS: + --output value, -o value Specify the output directory for the markdown files (default: "./") + --batch batch download all documents inside a folder, where the url is the base folder token (default: false) + --dump Dump json response of the OPEN API (default: false) + --help, -h show help (default: false) + ``` **生成配置文件** @@ -81,7 +95,7 @@ 更多的配置选项请手动打开配置文件更改。 - **下载为 Markdown** + **下载单个文档为 Markdown** 通过 `feishu2md dl ` 直接下载,文档链接可以通过 **分享 > 开启链接分享 > 复制链接** 获得。 @@ -90,6 +104,18 @@ ```bash $ feishu2md dl "https://domain.feishu.cn/docx/docxtoken" ``` + + **批量下载某文件夹内的全部文档为 Markdown** + + 通过`feishu2md dl --batch ` 直接下载,文件夹链接可以通过 **分享 > 开启链接分享 > 复制链接** 获得。 + + 示例: + + ```bash + $ feishu2md dl --batch -o="output_directory" "folder_token or folder_url" + ``` + + 此功能暂时不支持Docker版本
diff --git a/cmd/batchDownload.go b/cmd/batchDownload.go new file mode 100644 index 0000000..ffaccbb --- /dev/null +++ b/cmd/batchDownload.go @@ -0,0 +1,130 @@ +package main + +import ( + "context" + "fmt" + "github.com/Wsine/feishu2md/core" + "github.com/Wsine/feishu2md/utils" + "github.com/pkg/errors" + "os" + "path/filepath" + "strings" + "sync" + "time" +) + +var downloadFailureList = []string{} + +const ApiLimitsPerSec = 5 + +func singleDownload(relPath string, url string, outputDir string, config *core.Config) { + // If the output subdirectory for relPath does not exist, create it + outputPath := filepath.Join(outputDir, relPath) + subDir := filepath.Dir(outputPath) + if _, err := os.Stat(subDir); os.IsNotExist(err) { + err := os.MkdirAll(subDir, 0755) + if err != nil { + fmt.Println("Error creating directory:", err) + return + } + } + + // Download the document + err := downloadDocument(url, outputPath, false, config) + if err != nil { + fmt.Println("Error downloading document:", err) + downloadFailureList = append(downloadFailureList, url) + } +} + +func logDownloadFailures() { + // Log the URLs that failed to download in stderr + if len(downloadFailureList) > 0 { + fmt.Fprintln(os.Stderr, "The following URLs failed to download:") + for _, url := range downloadFailureList { + fmt.Fprintln(os.Stderr, url) + } + // Print the following message in Green background color + _, _ = fmt.Fprintln(os.Stderr, "\033[42m\033[30mDon't worry, this is not a total failure.\033[0m") + _, _ = fmt.Fprintln(os.Stderr, "\033[42m\033[30mSome of your documents may have been downloaded successfully.\033[0m") + _, _ = fmt.Fprintln(os.Stderr, "\033[42m\033[30mYou can try to download the failed documents again.\033[0m") + } +} + +// Batch download all the documents in the pathMap to the output directory +// The pathMap is a map of {relativePath, url} +// This function downloads all the documents using the url to the relativePath in the output directory +func batchDownload(pathMap map[string]string, outputDir string, config *core.Config) error { + utils.StopWhenErr = false + + var batchErr error = nil + + // API limit is 5 requests per second, + // so we use a pool of 5 goroutines added to the pool every second + readyOperators := make(chan struct{}, ApiLimitsPerSec) + finishedOperators := make(chan struct{}, ApiLimitsPerSec) + for i := 0; i < ApiLimitsPerSec; i++ { + finishedOperators <- struct{}{} + } + downloadFinished := false + // Set a timer to add 5 operators to the pool every 1.5 second (for safety) + go func() { + for { + if downloadFinished { + break + } + <-finishedOperators + readyOperators <- struct{}{} + <-time.After(1500 * time.Millisecond / ApiLimitsPerSec) + } + }() + + var wg sync.WaitGroup + for relPath, url := range pathMap { + wg.Add(1) + go func(relPath, url string) { + <-readyOperators + singleDownload(relPath, url, outputDir, config) + finishedOperators <- struct{}{} + wg.Done() + }(relPath, url) + } + + wg.Wait() + logDownloadFailures() + + if len(downloadFailureList) > 0 { + batchErr = errors.New("Some documents failed to download") + } + return batchErr +} + +func batchDownloadFolder(outputDir string, urlOrToken string, config *core.Config) error { + baseFolderToken := urlOrToken + // If this is batch download and a full directory url is provided, strip the last part (token) + if strings.Contains(urlOrToken, "/") { + segments := strings.Split(urlOrToken, "/") + baseFolderToken = segments[len(segments)-1] + baseFolderToken = strings.Split(baseFolderToken, "?")[0] // In case some user copy the url with query params + } + + // Create client with context + ctx := context.WithValue(context.Background(), "output", config.Output) + + client := core.NewClient( + config.Feishu.AppId, config.Feishu.AppSecret, + ) + pathMap, err := client.GetDriveStructure(ctx, baseFolderToken) + + if err != nil { + return err + } + + err = batchDownload(pathMap, outputDir, config) + if err != nil { + return err + } + + return nil + +} diff --git a/cmd/download.go b/cmd/download.go index a63f68b..5bccebe 100644 --- a/cmd/download.go +++ b/cmd/download.go @@ -17,22 +17,20 @@ import ( type DownloadOpts struct { outputDir string dump bool + batch bool } var downloadOpts = DownloadOpts{} -func handleDownloadCommand(url string, opts *DownloadOpts) error { +func downloadDocument(url string, outputDir string, dump bool, config *core.Config) error { // Validate the url to download docType, docToken, err := utils.ValidateDownloadURL(url) - utils.CheckErr(err) + // This might be a sheet or other kind of URL, we don't support downloading it yet so we just skip it + if err != nil { + return nil + } fmt.Println("Captured document token:", docToken) - // Load config - configPath, err := core.GetConfigFilePath() - utils.CheckErr(err) - config, err := core.ReadConfigFromFile(configPath) - utils.CheckErr(err) - // Create client with context ctx := context.WithValue(context.Background(), "output", config.Output) @@ -63,9 +61,11 @@ func handleDownloadCommand(url string, opts *DownloadOpts) error { if !config.Output.SkipImgDownload { for _, imgToken := range parser.ImgTokens { localLink, err := client.DownloadImage( - ctx, imgToken, filepath.Join(opts.outputDir, config.Output.ImageDir), + ctx, imgToken, filepath.Join(outputDir, config.Output.ImageDir), ) - utils.CheckErr(err) + if utils.CheckErr(err) != nil { + return err + } markdown = strings.Replace(markdown, imgToken, localLink, 1) } } @@ -77,15 +77,15 @@ func handleDownloadCommand(url string, opts *DownloadOpts) error { result := engine.FormatStr("md", markdown) // Handle the output directory and name - if _, err := os.Stat(opts.outputDir); os.IsNotExist(err) { - if err := os.MkdirAll(opts.outputDir, 0o755); err != nil { + if _, err := os.Stat(outputDir); os.IsNotExist(err) { + if err := os.MkdirAll(outputDir, 0o755); err != nil { return err } } - if opts.dump { + if dump { jsonName := fmt.Sprintf("%s.json", docToken) - outputPath := filepath.Join(opts.outputDir, jsonName) + outputPath := filepath.Join(outputDir, jsonName) data := struct { Document *lark.DocxDocument `json:"document"` Blocks []*lark.DocxBlock `json:"blocks"` @@ -106,7 +106,7 @@ func handleDownloadCommand(url string, opts *DownloadOpts) error { if config.Output.TitleAsFilename { mdName = fmt.Sprintf("%s.md", title) } - outputPath := filepath.Join(opts.outputDir, mdName) + outputPath := filepath.Join(outputDir, mdName) if err = os.WriteFile(outputPath, []byte(result), 0o644); err != nil { return err } @@ -114,3 +114,17 @@ func handleDownloadCommand(url string, opts *DownloadOpts) error { return nil } + +func handleDownloadCommand(url string, opts *DownloadOpts) error { + // Load config + configPath, err := core.GetConfigFilePath() + utils.CheckErr(err) + config, err := core.ReadConfigFromFile(configPath) + utils.CheckErr(err) + + if opts.batch { + return batchDownloadFolder(opts.outputDir, url, config) + } + + return downloadDocument(url, opts.outputDir, opts.dump, config) +} diff --git a/cmd/main.go b/cmd/main.go index 6cf9c74..05dbd1f 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -59,6 +59,12 @@ func main() { Usage: "Dump json response of the OPEN API", Destination: &downloadOpts.dump, }, + &cli.BoolFlag{ + Name: "batch", + Value: false, + Usage: "batch download all documents inside a folder, where the url is the base folder token", + Destination: &downloadOpts.batch, + }, }, ArgsUsage: "", Action: func(ctx *cli.Context) error { diff --git a/core/client.go b/core/client.go index ea07607..b662535 100644 --- a/core/client.go +++ b/core/client.go @@ -7,6 +7,7 @@ import ( "io" "os" "path/filepath" + "sync" "time" "github.com/chyroc/lark" @@ -104,3 +105,70 @@ func (c *Client) GetWikiNodeInfo(ctx context.Context, token string) (*lark.GetWi } return resp.Node, nil } + +func (c *Client) GetDriveFolderFileList(ctx context.Context, pageToken *string, folderToken *string) (*lark.GetDriveFileListResp, error) { + resp, _, err := c.larkClient.Drive.GetDriveFileList(ctx, &lark.GetDriveFileListReq{ + PageSize: nil, + PageToken: pageToken, + FolderToken: folderToken, + }) + if err != nil { + return nil, err + } + return resp, nil +} + +func (c *Client) GetDriveStructureRecursion(ctx context.Context, folderToken string, currentPath string, pairChannel chan Pair, wg *sync.WaitGroup) error { + defer wg.Done() + + resp, err := c.GetDriveFolderFileList(ctx, nil, &folderToken) + if err != nil { + return err + } + files := resp.Files + for resp.HasMore { + resp, err = c.GetDriveFolderFileList(ctx, &resp.NextPageToken, &folderToken) + if err != nil { + return err + } + files = append(files, resp.Files...) + } + + for _, file := range files { + if file.Type == "folder" { + wg.Add(1) + go func(path string, fileToken string) { + err := c.GetDriveStructureRecursion(ctx, fileToken, path, pairChannel, wg) + if err != nil { + fmt.Println(err) + } + }(currentPath+"/"+file.Name, file.Token) + } else { + pairChannel <- Pair{currentPath + "/" + file.Name, file.URL} + } + } + + return nil +} + +type Pair struct { + path string + url string +} + +func (c *Client) GetDriveStructure(ctx context.Context, baseFolderToken string) (map[string]string, error) { + pairChannel := make(chan Pair) + structure := map[string]string{} + wg := sync.WaitGroup{} + go func() { + for pair := range pairChannel { + structure[pair.path] = pair.url + } + }() + wg.Add(1) + err := c.GetDriveStructureRecursion(ctx, baseFolderToken, ".", pairChannel, &wg) + + wg.Wait() + + return structure, err +} diff --git a/core/client_test.go b/core/client_test.go index 1eaef4c..7e35911 100644 --- a/core/client_test.go +++ b/core/client_test.go @@ -93,3 +93,16 @@ func TestGetWikiNodeInfo(t *testing.T) { t.Errorf("Error: node type incorrect") } } + +func TestGetDriveStructure(t *testing.T) { + appID, appSecret := getIdAndSecretFromEnv(t) + c := core.NewClient(appID, appSecret) + baseFolderToken := "VknBfQ1pdla6AddwgkUuHT1ks7c" + structure, err := c.GetDriveStructure(context.Background(), &baseFolderToken) + if err != nil { + t.Error(err) + } + for path, url := range structure { + fmt.Printf("%s: %s\n", path, url) + } +} diff --git a/utils/common.go b/utils/common.go index f8d483e..8202fd9 100644 --- a/utils/common.go +++ b/utils/common.go @@ -7,7 +7,9 @@ import ( "strings" ) -func CheckErr(e error) { +var StopWhenErr = true + +func CheckErr(e error) error { if e != nil { fmt.Fprintln(os.Stderr, e) fmt.Fprintf( @@ -16,8 +18,11 @@ func CheckErr(e error) { strings.Repeat("=", 20), "Report the following if it is a bug", ) - panic(e) + if StopWhenErr { + panic(e) + } } + return e } func PrettyPrint(i interface{}) string { From 5815815dd0c02ec3b60f0ba27add010eb120f5ab Mon Sep 17 00:00:00 2001 From: Wsine Date: Thu, 4 Jul 2024 20:23:41 +0800 Subject: [PATCH 2/4] refactor: download documents in batch --- cmd/batchDownload.go | 130 ------------------------------------------- cmd/config.go | 18 +++--- cmd/download.go | 118 +++++++++++++++++++++++++++++---------- cmd/main.go | 14 ++--- core/client.go | 63 ++++----------------- core/client_test.go | 13 +++-- core/config.go | 5 +- core/parser.go | 17 ++---- core/parser_test.go | 9 ++- go.mod | 4 +- go.sum | 6 ++ utils/url.go | 16 +++++- utils/url_test.go | 2 +- web/download.go | 4 +- 14 files changed, 161 insertions(+), 258 deletions(-) delete mode 100644 cmd/batchDownload.go diff --git a/cmd/batchDownload.go b/cmd/batchDownload.go deleted file mode 100644 index ffaccbb..0000000 --- a/cmd/batchDownload.go +++ /dev/null @@ -1,130 +0,0 @@ -package main - -import ( - "context" - "fmt" - "github.com/Wsine/feishu2md/core" - "github.com/Wsine/feishu2md/utils" - "github.com/pkg/errors" - "os" - "path/filepath" - "strings" - "sync" - "time" -) - -var downloadFailureList = []string{} - -const ApiLimitsPerSec = 5 - -func singleDownload(relPath string, url string, outputDir string, config *core.Config) { - // If the output subdirectory for relPath does not exist, create it - outputPath := filepath.Join(outputDir, relPath) - subDir := filepath.Dir(outputPath) - if _, err := os.Stat(subDir); os.IsNotExist(err) { - err := os.MkdirAll(subDir, 0755) - if err != nil { - fmt.Println("Error creating directory:", err) - return - } - } - - // Download the document - err := downloadDocument(url, outputPath, false, config) - if err != nil { - fmt.Println("Error downloading document:", err) - downloadFailureList = append(downloadFailureList, url) - } -} - -func logDownloadFailures() { - // Log the URLs that failed to download in stderr - if len(downloadFailureList) > 0 { - fmt.Fprintln(os.Stderr, "The following URLs failed to download:") - for _, url := range downloadFailureList { - fmt.Fprintln(os.Stderr, url) - } - // Print the following message in Green background color - _, _ = fmt.Fprintln(os.Stderr, "\033[42m\033[30mDon't worry, this is not a total failure.\033[0m") - _, _ = fmt.Fprintln(os.Stderr, "\033[42m\033[30mSome of your documents may have been downloaded successfully.\033[0m") - _, _ = fmt.Fprintln(os.Stderr, "\033[42m\033[30mYou can try to download the failed documents again.\033[0m") - } -} - -// Batch download all the documents in the pathMap to the output directory -// The pathMap is a map of {relativePath, url} -// This function downloads all the documents using the url to the relativePath in the output directory -func batchDownload(pathMap map[string]string, outputDir string, config *core.Config) error { - utils.StopWhenErr = false - - var batchErr error = nil - - // API limit is 5 requests per second, - // so we use a pool of 5 goroutines added to the pool every second - readyOperators := make(chan struct{}, ApiLimitsPerSec) - finishedOperators := make(chan struct{}, ApiLimitsPerSec) - for i := 0; i < ApiLimitsPerSec; i++ { - finishedOperators <- struct{}{} - } - downloadFinished := false - // Set a timer to add 5 operators to the pool every 1.5 second (for safety) - go func() { - for { - if downloadFinished { - break - } - <-finishedOperators - readyOperators <- struct{}{} - <-time.After(1500 * time.Millisecond / ApiLimitsPerSec) - } - }() - - var wg sync.WaitGroup - for relPath, url := range pathMap { - wg.Add(1) - go func(relPath, url string) { - <-readyOperators - singleDownload(relPath, url, outputDir, config) - finishedOperators <- struct{}{} - wg.Done() - }(relPath, url) - } - - wg.Wait() - logDownloadFailures() - - if len(downloadFailureList) > 0 { - batchErr = errors.New("Some documents failed to download") - } - return batchErr -} - -func batchDownloadFolder(outputDir string, urlOrToken string, config *core.Config) error { - baseFolderToken := urlOrToken - // If this is batch download and a full directory url is provided, strip the last part (token) - if strings.Contains(urlOrToken, "/") { - segments := strings.Split(urlOrToken, "/") - baseFolderToken = segments[len(segments)-1] - baseFolderToken = strings.Split(baseFolderToken, "?")[0] // In case some user copy the url with query params - } - - // Create client with context - ctx := context.WithValue(context.Background(), "output", config.Output) - - client := core.NewClient( - config.Feishu.AppId, config.Feishu.AppSecret, - ) - pathMap, err := client.GetDriveStructure(ctx, baseFolderToken) - - if err != nil { - return err - } - - err = batchDownload(pathMap, outputDir, config) - if err != nil { - return err - } - - return nil - -} diff --git a/cmd/config.go b/cmd/config.go index 1ad0997..36557be 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -15,13 +15,15 @@ type ConfigOpts struct { var configOpts = ConfigOpts{} -func handleConfigCommand(opts *ConfigOpts) error { +func handleConfigCommand() error { configPath, err := core.GetConfigFilePath() - utils.CheckErr(err) + if err != nil { + return err + } fmt.Println("Configuration file on: " + configPath) if _, err := os.Stat(configPath); os.IsNotExist(err) { - config := core.NewConfig(opts.appId, opts.appSecret) + config := core.NewConfig(configOpts.appId, configOpts.appSecret) if err = config.WriteConfig2File(configPath); err != nil { return err } @@ -31,13 +33,13 @@ func handleConfigCommand(opts *ConfigOpts) error { if err != nil { return err } - if opts.appId != "" { - config.Feishu.AppId = opts.appId + if configOpts.appId != "" { + config.Feishu.AppId = configOpts.appId } - if opts.appSecret != "" { - config.Feishu.AppSecret = opts.appSecret + if configOpts.appSecret != "" { + config.Feishu.AppSecret = configOpts.appSecret } - if opts.appId != "" || opts.appSecret != "" { + if configOpts.appId != "" || configOpts.appSecret != "" { if err = config.WriteConfig2File(configPath); err != nil { return err } diff --git a/cmd/download.go b/cmd/download.go index 5bccebe..d5a5604 100644 --- a/cmd/download.go +++ b/cmd/download.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" "strings" + "sync" "github.com/88250/lute" "github.com/Wsine/feishu2md/core" @@ -20,24 +21,17 @@ type DownloadOpts struct { batch bool } -var downloadOpts = DownloadOpts{} +var dlOpts = DownloadOpts{} +var dlConfig core.Config -func downloadDocument(url string, outputDir string, dump bool, config *core.Config) error { +func downloadDocument(client *core.Client, ctx context.Context, url string, opts *DownloadOpts) error { // Validate the url to download - docType, docToken, err := utils.ValidateDownloadURL(url) - // This might be a sheet or other kind of URL, we don't support downloading it yet so we just skip it + docType, docToken, err := utils.ValidateDocumentURL(url) if err != nil { - return nil + return err } fmt.Println("Captured document token:", docToken) - // Create client with context - ctx := context.WithValue(context.Background(), "output", config.Output) - - client := core.NewClient( - config.Feishu.AppId, config.Feishu.AppSecret, - ) - // for a wiki page, we need to renew docType and docToken first if docType == "wiki" { node, err := client.GetWikiNodeInfo(ctx, docToken) @@ -46,22 +40,24 @@ func downloadDocument(url string, outputDir string, dump bool, config *core.Conf docToken = node.ObjToken } if docType == "docs" { - return errors.Errorf("Feishu Docs is no longer supported. Please refer to the Readme/Release for v1_support.") + return errors.Errorf( + `Feishu Docs is no longer supported. ` + + `Please refer to the Readme/Release for v1_support.`) } // Process the download docx, blocks, err := client.GetDocxContent(ctx, docToken) utils.CheckErr(err) - parser := core.NewParser(ctx) + parser := core.NewParser(dlConfig.Output) title := docx.Title markdown := parser.ParseDocxContent(docx, blocks) - if !config.Output.SkipImgDownload { + if !dlConfig.Output.SkipImgDownload { for _, imgToken := range parser.ImgTokens { localLink, err := client.DownloadImage( - ctx, imgToken, filepath.Join(outputDir, config.Output.ImageDir), + ctx, imgToken, filepath.Join(opts.outputDir, dlConfig.Output.ImageDir), ) if utils.CheckErr(err) != nil { return err @@ -77,15 +73,15 @@ func downloadDocument(url string, outputDir string, dump bool, config *core.Conf result := engine.FormatStr("md", markdown) // Handle the output directory and name - if _, err := os.Stat(outputDir); os.IsNotExist(err) { - if err := os.MkdirAll(outputDir, 0o755); err != nil { + if _, err := os.Stat(opts.outputDir); os.IsNotExist(err) { + if err := os.MkdirAll(opts.outputDir, 0o755); err != nil { return err } } - if dump { + if dlOpts.dump { jsonName := fmt.Sprintf("%s.json", docToken) - outputPath := filepath.Join(outputDir, jsonName) + outputPath := filepath.Join(opts.outputDir, jsonName) data := struct { Document *lark.DocxDocument `json:"document"` Blocks []*lark.DocxBlock `json:"blocks"` @@ -103,10 +99,10 @@ func downloadDocument(url string, outputDir string, dump bool, config *core.Conf // Write to markdown file mdName := fmt.Sprintf("%s.md", docToken) - if config.Output.TitleAsFilename { + if dlConfig.Output.TitleAsFilename { mdName = fmt.Sprintf("%s.md", title) } - outputPath := filepath.Join(outputDir, mdName) + outputPath := filepath.Join(opts.outputDir, mdName) if err = os.WriteFile(outputPath, []byte(result), 0o644); err != nil { return err } @@ -115,16 +111,80 @@ func downloadDocument(url string, outputDir string, dump bool, config *core.Conf return nil } -func handleDownloadCommand(url string, opts *DownloadOpts) error { +func downloadDocuments(client *core.Client, ctx context.Context, url string) error { + // Validate the url to download + folderToken, err := utils.ValidateFolderURL(url) + if err != nil { + return err + } + fmt.Println("Captured folder token:", folderToken) + + // Error channel and wait group + errChan := make(chan error) + wg := sync.WaitGroup{} + + // Recursively go through the folder and download the documents + var processFolder func(ctx context.Context, folderPath, folderToken string) error + processFolder = func(ctx context.Context, folderPath, folderToken string) error { + files, err := client.GetDriveFolderFileList(ctx, nil, &folderToken) + if err != nil { + return err + } + opts := DownloadOpts{outputDir: folderPath, dump: dlOpts.dump, batch: false} + for _, file := range files { + if file.Type == "folder" { + _folderPath := filepath.Join(folderPath, file.Name) + if err := processFolder(ctx, _folderPath, file.Token); err != nil { + return err + } + } else if file.Type == "docx" { + // concurrently download the document + wg.Add(1) + go func() { + if err := downloadDocument(client, ctx, file.URL, &opts); err != nil { + errChan <- err + } + wg.Done() + }() + } + } + return nil + } + if err := processFolder(ctx, dlOpts.outputDir, folderToken); err != nil { + return err + } + + // Wait for all the downloads to finish + go func() { + wg.Wait() + close(errChan) + }() + for err := range errChan { + return err + } + return nil +} + +func handleDownloadCommand(url string) error { // Load config configPath, err := core.GetConfigFilePath() - utils.CheckErr(err) - config, err := core.ReadConfigFromFile(configPath) - utils.CheckErr(err) + if err != nil { + return err + } + dlConfig, err := core.ReadConfigFromFile(configPath) + if err != nil { + return err + } + + // Instantiate the client + client := core.NewClient( + dlConfig.Feishu.AppId, dlConfig.Feishu.AppSecret, + ) + ctx := context.Background() - if opts.batch { - return batchDownloadFolder(opts.outputDir, url, config) + if dlOpts.batch { + return downloadDocuments(client, ctx, url) } - return downloadDocument(url, opts.outputDir, opts.dump, config) + return downloadDocument(client, ctx, url, &dlOpts) } diff --git a/cmd/main.go b/cmd/main.go index 05dbd1f..d8c6143 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -38,7 +38,7 @@ func main() { }, }, Action: func(ctx *cli.Context) error { - return handleConfigCommand(&configOpts) + return handleConfigCommand() }, }, { @@ -51,28 +51,28 @@ func main() { Aliases: []string{"o"}, Value: "./", Usage: "Specify the output directory for the markdown files", - Destination: &downloadOpts.outputDir, + Destination: &dlOpts.outputDir, }, &cli.BoolFlag{ Name: "dump", Value: false, Usage: "Dump json response of the OPEN API", - Destination: &downloadOpts.dump, + Destination: &dlOpts.dump, }, &cli.BoolFlag{ Name: "batch", Value: false, - Usage: "batch download all documents inside a folder, where the url is the base folder token", - Destination: &downloadOpts.batch, + Usage: "Download all documents under a folder", + Destination: &dlOpts.batch, }, }, ArgsUsage: "", Action: func(ctx *cli.Context) error { if ctx.NArg() == 0 { - return cli.Exit("Please specify the document url", 1) + return cli.Exit("Please specify the document/folder url", 1) } else { url := ctx.Args().First() - return handleDownloadCommand(url, &downloadOpts) + return handleDownloadCommand(url) } }, }, diff --git a/core/client.go b/core/client.go index b662535..5e2b3de 100644 --- a/core/client.go +++ b/core/client.go @@ -7,10 +7,10 @@ import ( "io" "os" "path/filepath" - "sync" "time" "github.com/chyroc/lark" + "github.com/chyroc/lark_rate_limiter" ) type Client struct { @@ -22,6 +22,7 @@ func NewClient(appID, appSecret string) *Client { larkClient: lark.New( lark.WithAppCredential(appID, appSecret), lark.WithTimeout(60*time.Second), + lark.WithApiMiddleware(lark_rate_limiter.Wait(5, 5)), ), } } @@ -106,7 +107,7 @@ func (c *Client) GetWikiNodeInfo(ctx context.Context, token string) (*lark.GetWi return resp.Node, nil } -func (c *Client) GetDriveFolderFileList(ctx context.Context, pageToken *string, folderToken *string) (*lark.GetDriveFileListResp, error) { +func (c *Client) GetDriveFolderFileList(ctx context.Context, pageToken *string, folderToken *string) ([]*lark.GetDriveFileListRespFile, error) { resp, _, err := c.larkClient.Drive.GetDriveFileList(ctx, &lark.GetDriveFileListReq{ PageSize: nil, PageToken: pageToken, @@ -115,60 +116,18 @@ func (c *Client) GetDriveFolderFileList(ctx context.Context, pageToken *string, if err != nil { return nil, err } - return resp, nil -} - -func (c *Client) GetDriveStructureRecursion(ctx context.Context, folderToken string, currentPath string, pairChannel chan Pair, wg *sync.WaitGroup) error { - defer wg.Done() - - resp, err := c.GetDriveFolderFileList(ctx, nil, &folderToken) - if err != nil { - return err - } - files := resp.Files + files := resp.Files for resp.HasMore { - resp, err = c.GetDriveFolderFileList(ctx, &resp.NextPageToken, &folderToken) + resp, _, err = c.larkClient.Drive.GetDriveFileList(ctx, &lark.GetDriveFileListReq{ + PageSize: nil, + PageToken: &resp.NextPageToken, + FolderToken: folderToken, + }) if err != nil { - return err + return nil, err } files = append(files, resp.Files...) } - - for _, file := range files { - if file.Type == "folder" { - wg.Add(1) - go func(path string, fileToken string) { - err := c.GetDriveStructureRecursion(ctx, fileToken, path, pairChannel, wg) - if err != nil { - fmt.Println(err) - } - }(currentPath+"/"+file.Name, file.Token) - } else { - pairChannel <- Pair{currentPath + "/" + file.Name, file.URL} - } - } - - return nil -} - -type Pair struct { - path string - url string + return files, nil } -func (c *Client) GetDriveStructure(ctx context.Context, baseFolderToken string) (map[string]string, error) { - pairChannel := make(chan Pair) - structure := map[string]string{} - wg := sync.WaitGroup{} - go func() { - for pair := range pairChannel { - structure[pair.path] = pair.url - } - }() - wg.Add(1) - err := c.GetDriveStructureRecursion(ctx, baseFolderToken, ".", pairChannel, &wg) - - wg.Wait() - - return structure, err -} diff --git a/core/client_test.go b/core/client_test.go index 7e35911..ab01f8d 100644 --- a/core/client_test.go +++ b/core/client_test.go @@ -94,15 +94,16 @@ func TestGetWikiNodeInfo(t *testing.T) { } } -func TestGetDriveStructure(t *testing.T) { +func TestGetDriveFolderFileList(t *testing.T) { appID, appSecret := getIdAndSecretFromEnv(t) c := core.NewClient(appID, appSecret) - baseFolderToken := "VknBfQ1pdla6AddwgkUuHT1ks7c" - structure, err := c.GetDriveStructure(context.Background(), &baseFolderToken) + folderToken := "VknBfQ1pdla6AddwgkUuHT1ks7c" + files, err := c.GetDriveFolderFileList( + context.Background(), nil, &folderToken) if err != nil { t.Error(err) } - for path, url := range structure { - fmt.Printf("%s: %s\n", path, url) - } + if len(files) == 0 { + t.Errorf("Error: no files found") + } } diff --git a/core/config.go b/core/config.go index 2efa902..82943a9 100644 --- a/core/config.go +++ b/core/config.go @@ -2,7 +2,6 @@ package core import ( "encoding/json" - "io/ioutil" "os" "path" "path/filepath" @@ -50,7 +49,7 @@ func GetConfigFilePath() (string, error) { } func ReadConfigFromFile(configPath string) (*Config, error) { - file, err := ioutil.ReadFile(configPath) + file, err := os.ReadFile(configPath) if err != nil { return nil, err } @@ -71,6 +70,6 @@ func (conf *Config) WriteConfig2File(configPath string) error { if err != nil { return err } - err = ioutil.WriteFile(configPath, file, 0o644) + err = os.WriteFile(configPath, file, 0o644) return err } diff --git a/core/parser.go b/core/parser.go index 1c4a88b..2362123 100644 --- a/core/parser.go +++ b/core/parser.go @@ -1,7 +1,6 @@ package core import ( - "context" "fmt" "reflect" "strings" @@ -12,14 +11,14 @@ import ( ) type Parser struct { - ctx context.Context + useHTMLTags bool ImgTokens []string blockMap map[string]*lark.DocxBlock } -func NewParser(ctx context.Context) *Parser { +func NewParser(config OutputConfig) *Parser { return &Parser{ - ctx: ctx, + useHTMLTags: config.UseHTMLTags, ImgTokens: make([]string, 0), blockMap: make(map[string]*lark.DocxBlock), } @@ -244,12 +243,8 @@ func (p *Parser) ParseDocxTextElementTextRun(tr *lark.DocxTextElementTextRun) st buf := new(strings.Builder) postWrite := "" if style := tr.TextElementStyle; style != nil { - useHTMLTags := NewConfig("", "").Output.UseHTMLTags - if p.ctx.Value("output") != nil { - useHTMLTags = p.ctx.Value("output").(OutputConfig).UseHTMLTags - } if style.Bold { - if useHTMLTags { + if p.useHTMLTags { buf.WriteString("") postWrite = "" } else { @@ -257,7 +252,7 @@ func (p *Parser) ParseDocxTextElementTextRun(tr *lark.DocxTextElementTextRun) st postWrite = "**" } } else if style.Italic { - if useHTMLTags { + if p.useHTMLTags { buf.WriteString("") postWrite = "" } else { @@ -265,7 +260,7 @@ func (p *Parser) ParseDocxTextElementTextRun(tr *lark.DocxTextElementTextRun) st postWrite = "_" } } else if style.Strikethrough { - if useHTMLTags { + if p.useHTMLTags { buf.WriteString("") postWrite = "" } else { diff --git a/core/parser_test.go b/core/parser_test.go index f83059d..cdac710 100644 --- a/core/parser_test.go +++ b/core/parser_test.go @@ -1,10 +1,9 @@ package core_test import ( - "context" "encoding/json" "fmt" - "io/ioutil" + "io" "os" "path" "testing" @@ -37,15 +36,15 @@ func TestParseDocxContent(t *testing.T) { Document *lark.DocxDocument `json:"document"` Blocks []*lark.DocxBlock `json:"blocks"` }{} - byteValue, _ := ioutil.ReadAll(jsonFile) + byteValue, _ := io.ReadAll(jsonFile) json.Unmarshal(byteValue, &data) - parser := core.NewParser(context.Background()) + parser := core.NewParser(core.NewConfig("", "").Output) mdParsed := parser.ParseDocxContent(data.Document, data.Blocks) fmt.Println(mdParsed) mdParsed = engine.FormatStr("md", mdParsed) - mdFile, err := ioutil.ReadFile(path.Join(root, "testdata", td+".md")) + mdFile, err := os.ReadFile(path.Join(root, "testdata", td+".md")) utils.CheckErr(err) mdExpected := string(mdFile) diff --git a/go.mod b/go.mod index 16d65b7..b8e483c 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.21 require ( github.com/88250/lute v1.7.3 - github.com/chyroc/lark v0.0.97-0.20220706015537-dc21f96c8ebd + github.com/chyroc/lark v0.0.98-0.20220914014759-f9ad5a16e595 github.com/joho/godotenv v1.4.0 github.com/olekukonko/tablewriter v0.0.5 github.com/urfave/cli/v2 v2.6.0 @@ -20,6 +20,7 @@ require ( github.com/alecthomas/chroma v0.9.2 // indirect github.com/bytedance/sonic v1.8.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect + github.com/chyroc/lark_rate_limiter v0.1.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 // indirect github.com/davecgh/go-spew v1.1.1 // indirect @@ -47,6 +48,7 @@ require ( golang.org/x/net v0.19.0 // indirect golang.org/x/sys v0.15.0 // indirect golang.org/x/text v0.14.0 // indirect + golang.org/x/time v0.0.0-20220722155302-e5dcc9cfc0b9 // indirect google.golang.org/protobuf v1.33.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 7beb157..1d9c34a 100644 --- a/go.sum +++ b/go.sum @@ -47,6 +47,10 @@ github.com/chyroc/go-ptr v1.6.0 h1:4GwCNrNfk4806eQKbHO2A/N/YOLW6jHIrBPWKfMe6F0= github.com/chyroc/go-ptr v1.6.0/go.mod h1:FKNjNg3sCLx7VhQGwuml6sITX1mvhKS0Je9uN9tt65Q= github.com/chyroc/lark v0.0.97-0.20220706015537-dc21f96c8ebd h1:i7uP6BA2aHqLSqn3URHrPvm8NLmoVSJNmoaboRvkpek= github.com/chyroc/lark v0.0.97-0.20220706015537-dc21f96c8ebd/go.mod h1:ZMmVyuBFmzLkiVKuORy7nEoNK/WvDh77cMsc3laJ5H8= +github.com/chyroc/lark v0.0.98-0.20220914014759-f9ad5a16e595 h1:fonLvnX4ULSjn5E+rk0OevXRayuuTMi0kTjMSBeBenE= +github.com/chyroc/lark v0.0.98-0.20220914014759-f9ad5a16e595/go.mod h1:ZMmVyuBFmzLkiVKuORy7nEoNK/WvDh77cMsc3laJ5H8= +github.com/chyroc/lark_rate_limiter v0.1.0 h1:nZA4Ipx3jqqg1PXRxv2dTYLEyz0h0GY8yhUo9Az15j8= +github.com/chyroc/lark_rate_limiter v0.1.0/go.mod h1:N7R/j7o8yKRyBiP/AtUHAWNRlxMECsGV5w7NMw9vKuA= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= @@ -366,6 +370,8 @@ golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20220722155302-e5dcc9cfc0b9 h1:ftMN5LMiBFjbzleLqtoBZk7KdJwhuybIU+FckUHgoyQ= +golang.org/x/time v0.0.0-20220722155302-e5dcc9cfc0b9/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/utils/url.go b/utils/url.go index b567edd..a47b003 100644 --- a/utils/url.go +++ b/utils/url.go @@ -14,13 +14,23 @@ func UnescapeURL(rawURL string) string { return rawURL } -func ValidateDownloadURL(url string) (string, string, error) { - reg := regexp.MustCompile("^https://[\\w-.]+/(docx|wiki)/([a-zA-Z0-9]+)") +func ValidateDocumentURL(url string) (string, string, error) { + reg := regexp.MustCompile("^https://[\\w-.]+/(docs|docx|wiki)/([a-zA-Z0-9]+)") matchResult := reg.FindStringSubmatch(url) if matchResult == nil || len(matchResult) != 3 { - return "", "", errors.Errorf("Invalid feishu/larksuite URL format") + return "", "", errors.Errorf("Invalid feishu/larksuite document URL pattern") } docType := matchResult[1] docToken := matchResult[2] return docType, docToken, nil } + +func ValidateFolderURL(url string) (string, error) { + reg := regexp.MustCompile("^https://[\\w-.]+/drive/folder/([a-zA-Z0-9]+)") + matchResult := reg.FindStringSubmatch(url) + if matchResult == nil || len(matchResult) != 2 { + return "", errors.Errorf("Invalid feishu/larksuite folder URL pattern") + } + folderToken := matchResult[1] + return folderToken, nil +} diff --git a/utils/url_test.go b/utils/url_test.go index 9dc9511..629ddf9 100644 --- a/utils/url_test.go +++ b/utils/url_test.go @@ -69,7 +69,7 @@ func TestValidateDownloadURL(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if _, _, got := ValidateDownloadURL(tt.url); (got == nil) != tt.noErr { + if _, _, got := ValidateDocumentURL(tt.url); (got == nil) != tt.noErr { t.Errorf("ValidateDownloadURL(%v)", tt.url) } }) diff --git a/web/download.go b/web/download.go index 6bf85ec..deb9631 100644 --- a/web/download.go +++ b/web/download.go @@ -26,7 +26,7 @@ func downloadHandler(c *gin.Context) { } // Validate the url to download - docType, docToken, err := utils.ValidateDownloadURL(feishu_docx_url) + docType, docToken, err := utils.ValidateDocumentURL(feishu_docx_url) fmt.Println("Captured document token:", docToken) // Create client with context @@ -40,7 +40,7 @@ func downloadHandler(c *gin.Context) { ) // Process the download - parser := core.NewParser(ctx) + parser := core.NewParser(config.Output) markdown := "" // for a wiki page, we need to renew docType and docToken first From 8dac401603827b22365b4453e9c252ddfe87e505 Mon Sep 17 00:00:00 2001 From: Wsine Date: Mon, 8 Jul 2024 20:03:00 +0800 Subject: [PATCH 3/4] format: tidy the code --- cmd/config.go | 6 +-- cmd/download.go | 124 ++++++++++++++++++++++---------------------- core/client.go | 15 +++--- core/client_test.go | 10 ++-- core/parser.go | 8 +-- utils/url.go | 14 ++--- 6 files changed, 88 insertions(+), 89 deletions(-) diff --git a/cmd/config.go b/cmd/config.go index 36557be..ff35665 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -17,9 +17,9 @@ var configOpts = ConfigOpts{} func handleConfigCommand() error { configPath, err := core.GetConfigFilePath() - if err != nil { - return err - } + if err != nil { + return err + } fmt.Println("Configuration file on: " + configPath) if _, err := os.Stat(configPath); os.IsNotExist(err) { diff --git a/cmd/download.go b/cmd/download.go index d5a5604..86a29e3 100644 --- a/cmd/download.go +++ b/cmd/download.go @@ -41,8 +41,8 @@ func downloadDocument(client *core.Client, ctx context.Context, url string, opts } if docType == "docs" { return errors.Errorf( - `Feishu Docs is no longer supported. ` + - `Please refer to the Readme/Release for v1_support.`) + `Feishu Docs is no longer supported. ` + + `Please refer to the Readme/Release for v1_support.`) } // Process the download @@ -112,75 +112,75 @@ func downloadDocument(client *core.Client, ctx context.Context, url string, opts } func downloadDocuments(client *core.Client, ctx context.Context, url string) error { - // Validate the url to download - folderToken, err := utils.ValidateFolderURL(url) - if err != nil { - return err - } - fmt.Println("Captured folder token:", folderToken) - - // Error channel and wait group - errChan := make(chan error) - wg := sync.WaitGroup{} - - // Recursively go through the folder and download the documents - var processFolder func(ctx context.Context, folderPath, folderToken string) error - processFolder = func(ctx context.Context, folderPath, folderToken string) error { - files, err := client.GetDriveFolderFileList(ctx, nil, &folderToken) - if err != nil { - return err - } - opts := DownloadOpts{outputDir: folderPath, dump: dlOpts.dump, batch: false} - for _, file := range files { - if file.Type == "folder" { - _folderPath := filepath.Join(folderPath, file.Name) - if err := processFolder(ctx, _folderPath, file.Token); err != nil { - return err - } - } else if file.Type == "docx" { - // concurrently download the document - wg.Add(1) - go func() { - if err := downloadDocument(client, ctx, file.URL, &opts); err != nil { - errChan <- err - } - wg.Done() - }() - } - } - return nil - } - if err := processFolder(ctx, dlOpts.outputDir, folderToken); err != nil { - return err - } - - // Wait for all the downloads to finish - go func() { - wg.Wait() - close(errChan) - }() - for err := range errChan { - return err - } - return nil + // Validate the url to download + folderToken, err := utils.ValidateFolderURL(url) + if err != nil { + return err + } + fmt.Println("Captured folder token:", folderToken) + + // Error channel and wait group + errChan := make(chan error) + wg := sync.WaitGroup{} + + // Recursively go through the folder and download the documents + var processFolder func(ctx context.Context, folderPath, folderToken string) error + processFolder = func(ctx context.Context, folderPath, folderToken string) error { + files, err := client.GetDriveFolderFileList(ctx, nil, &folderToken) + if err != nil { + return err + } + opts := DownloadOpts{outputDir: folderPath, dump: dlOpts.dump, batch: false} + for _, file := range files { + if file.Type == "folder" { + _folderPath := filepath.Join(folderPath, file.Name) + if err := processFolder(ctx, _folderPath, file.Token); err != nil { + return err + } + } else if file.Type == "docx" { + // concurrently download the document + wg.Add(1) + go func(_url string) { + if err := downloadDocument(client, ctx, _url, &opts); err != nil { + errChan <- err + } + wg.Done() + }(file.URL) + } + } + return nil + } + if err := processFolder(ctx, dlOpts.outputDir, folderToken); err != nil { + return err + } + + // Wait for all the downloads to finish + go func() { + wg.Wait() + close(errChan) + }() + for err := range errChan { + return err + } + return nil } func handleDownloadCommand(url string) error { // Load config configPath, err := core.GetConfigFilePath() - if err != nil { - return err - } + if err != nil { + return err + } dlConfig, err := core.ReadConfigFromFile(configPath) - if err != nil { - return err - } + if err != nil { + return err + } - // Instantiate the client - client := core.NewClient( + // Instantiate the client + client := core.NewClient( dlConfig.Feishu.AppId, dlConfig.Feishu.AppSecret, ) - ctx := context.Background() + ctx := context.Background() if dlOpts.batch { return downloadDocuments(client, ctx, url) diff --git a/core/client.go b/core/client.go index 5e2b3de..63a7cf5 100644 --- a/core/client.go +++ b/core/client.go @@ -10,7 +10,7 @@ import ( "time" "github.com/chyroc/lark" - "github.com/chyroc/lark_rate_limiter" + "github.com/chyroc/lark_rate_limiter" ) type Client struct { @@ -22,7 +22,7 @@ func NewClient(appID, appSecret string) *Client { larkClient: lark.New( lark.WithAppCredential(appID, appSecret), lark.WithTimeout(60*time.Second), - lark.WithApiMiddleware(lark_rate_limiter.Wait(5, 5)), + lark.WithApiMiddleware(lark_rate_limiter.Wait(5, 5)), ), } } @@ -116,13 +116,13 @@ func (c *Client) GetDriveFolderFileList(ctx context.Context, pageToken *string, if err != nil { return nil, err } - files := resp.Files + files := resp.Files for resp.HasMore { resp, _, err = c.larkClient.Drive.GetDriveFileList(ctx, &lark.GetDriveFileListReq{ - PageSize: nil, - PageToken: &resp.NextPageToken, - FolderToken: folderToken, - }) + PageSize: nil, + PageToken: &resp.NextPageToken, + FolderToken: folderToken, + }) if err != nil { return nil, err } @@ -130,4 +130,3 @@ func (c *Client) GetDriveFolderFileList(ctx context.Context, pageToken *string, } return files, nil } - diff --git a/core/client_test.go b/core/client_test.go index ab01f8d..6f27d51 100644 --- a/core/client_test.go +++ b/core/client_test.go @@ -97,13 +97,13 @@ func TestGetWikiNodeInfo(t *testing.T) { func TestGetDriveFolderFileList(t *testing.T) { appID, appSecret := getIdAndSecretFromEnv(t) c := core.NewClient(appID, appSecret) - folderToken := "VknBfQ1pdla6AddwgkUuHT1ks7c" + folderToken := "G15mfSfIHlyquudfhq5cg9kdnjg" files, err := c.GetDriveFolderFileList( - context.Background(), nil, &folderToken) + context.Background(), nil, &folderToken) if err != nil { t.Error(err) } - if len(files) == 0 { - t.Errorf("Error: no files found") - } + if len(files) == 0 { + t.Errorf("Error: no files found") + } } diff --git a/core/parser.go b/core/parser.go index 2362123..7a69562 100644 --- a/core/parser.go +++ b/core/parser.go @@ -12,15 +12,15 @@ import ( type Parser struct { useHTMLTags bool - ImgTokens []string - blockMap map[string]*lark.DocxBlock + ImgTokens []string + blockMap map[string]*lark.DocxBlock } func NewParser(config OutputConfig) *Parser { return &Parser{ useHTMLTags: config.UseHTMLTags, - ImgTokens: make([]string, 0), - blockMap: make(map[string]*lark.DocxBlock), + ImgTokens: make([]string, 0), + blockMap: make(map[string]*lark.DocxBlock), } } diff --git a/utils/url.go b/utils/url.go index a47b003..03b5371 100644 --- a/utils/url.go +++ b/utils/url.go @@ -26,11 +26,11 @@ func ValidateDocumentURL(url string) (string, string, error) { } func ValidateFolderURL(url string) (string, error) { - reg := regexp.MustCompile("^https://[\\w-.]+/drive/folder/([a-zA-Z0-9]+)") - matchResult := reg.FindStringSubmatch(url) - if matchResult == nil || len(matchResult) != 2 { - return "", errors.Errorf("Invalid feishu/larksuite folder URL pattern") - } - folderToken := matchResult[1] - return folderToken, nil + reg := regexp.MustCompile("^https://[\\w-.]+/drive/folder/([a-zA-Z0-9]+)") + matchResult := reg.FindStringSubmatch(url) + if matchResult == nil || len(matchResult) != 2 { + return "", errors.Errorf("Invalid feishu/larksuite folder URL pattern") + } + folderToken := matchResult[1] + return folderToken, nil } From 2fc9ea4fee12bea649cc6ad2aff9ee101371deca Mon Sep 17 00:00:00 2001 From: Wsine Date: Mon, 8 Jul 2024 20:34:18 +0800 Subject: [PATCH 4/4] update: batch download guideline --- README.md | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index cb13678..3e09189 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Feishu2Md +# feishu2md [![Golang - feishu2md](https://img.shields.io/github/go-mod/go-version/wsine/feishu2md?color=%2376e1fe&logo=go)](https://go.dev/) [![Unittest](https://github.com/Wsine/feishu2md/actions/workflows/unittest.yaml/badge.svg)](https://github.com/Wsine/feishu2md/actions/workflows/unittest.yaml) @@ -20,13 +20,13 @@ 配置文件需要填写 APP ID 和 APP SECRET 信息,请参考 [飞书官方文档](https://open.feishu.cn/document/ukTMukTMukTM/ukDNz4SO0MjL5QzM/get-) 获取。推荐设置为 - 进入飞书[开发者后台](https://open.feishu.cn/app) -- 创建企业自建应用,信息随意填写 -- 选择测试企业和人员,创建测试企业,绑定应用,切换至测试版本 -- (重要)打开权限管理,云文档,开通所有只读权限 - - 「查看、评论和导出文档」权限 `docs:doc:readonly` - - 「查看 DocX 文档」权限 `docx:document:readonly` - - 「查看、评论和下载云空间中所有文件」权限 `drive:drive:readonly` - - 「查看和下载云空间中的文件」权限 `drive:file:readonly` +- 创建企业自建应用(个人版),信息随意填写 +- (重要)打开权限管理,开通以下必要的权限(可点击以下链接参考 API 调试台->权限配置字段) + - [获取文档基本信息](https://open.feishu.cn/document/server-docs/docs/docs/docx-v1/document/get),「查看新版文档」权限 `docx:document:readonly` + - [获取文档所有块](https://open.feishu.cn/document/server-docs/docs/docs/docx-v1/document/list),「查看新版文档」权限 `docx:document:readonly` + - [下载素材](https://open.feishu.cn/document/server-docs/docs/drive-v1/media/download),「下载云文档中的图片和附件」权限 `docs:document.media:download` + - [获取文件夹中的文件清单](https://open.feishu.cn/document/server-docs/docs/drive-v1/folder/list),「查看、评论、编辑和管理云空间中所有文件」权限 `drive:file:readonly` + - [获取知识空间节点信息](https://open.feishu.cn/document/server-docs/docs/wiki-v2/space-node/get_node),「查看知识库」权限 `wiki:wiki:readonly` - 打开凭证与基础信息,获取 App ID 和 App Secret ## 如何使用 @@ -71,20 +71,20 @@ --appId value Set app id for the OPEN API --appSecret value Set app secret for the OPEN API --help, -h show help (default: false) - + $ feishu2md dl -h NAME: - feishu2md download - Download feishu/larksuite document to markdown file + feishu2md download - Download feishu/larksuite document to markdown file + + USAGE: + feishu2md download [command options] -USAGE: - feishu2md download [command options] + OPTIONS: + --output value, -o value Specify the output directory for the markdown files (default: "./") + --dump Dump json response of the OPEN API (default: false) + --batch Download all documents under a folder (default: false) + --help, -h show help (default: false) -OPTIONS: - --output value, -o value Specify the output directory for the markdown files (default: "./") - --batch batch download all documents inside a folder, where the url is the base folder token (default: false) - --dump Dump json response of the OPEN API (default: false) - --help, -h show help (default: false) - ``` **生成配置文件** @@ -97,7 +97,7 @@ OPTIONS: **下载单个文档为 Markdown** - 通过 `feishu2md dl ` 直接下载,文档链接可以通过 **分享 > 开启链接分享 > 复制链接** 获得。 + 通过 `feishu2md dl ` 直接下载,文档链接可以通过 **分享 > 开启链接分享 > 互联网上获得链接的人可阅读 > 复制链接** 获得。 示例: @@ -107,15 +107,16 @@ OPTIONS: **批量下载某文件夹内的全部文档为 Markdown** - 通过`feishu2md dl --batch ` 直接下载,文件夹链接可以通过 **分享 > 开启链接分享 > 复制链接** 获得。 + 此功能暂时不支持Docker版本 + + 通过`feishu2md dl --batch ` 直接下载,文件夹链接可以通过 **分享 > 开启链接分享 > 互联网上获得链接的人可阅读 > 复制链接** 获得。 示例: - + ```bash - $ feishu2md dl --batch -o="output_directory" "folder_token or folder_url" + $ feishu2md dl --batch -o output_directory "https://domain.feishu.cn/drive/folder/foldertoken" ``` - 此功能暂时不支持Docker版本