1
- use std:: sync:: Arc ;
1
+ use std:: {
2
+ collections:: HashMap ,
3
+ sync:: { Arc , LazyLock } ,
4
+ } ;
2
5
3
- use futures:: future:: try_join;
4
6
use google_drive3:: {
5
7
api:: Scope ,
6
8
yup_oauth2:: { read_service_account_key, ServiceAccountAuthenticator } ,
@@ -10,11 +12,62 @@ use http_body_util::BodyExt;
10
12
use hyper_rustls:: HttpsConnector ;
11
13
use hyper_util:: client:: legacy:: connect:: HttpConnector ;
12
14
use indexmap:: IndexSet ;
13
- use log:: debug ;
15
+ use log:: warn ;
14
16
15
17
use crate :: ops:: sdk:: * ;
16
18
19
+ struct ExportMimeType {
20
+ text : & ' static str ,
21
+ binary : & ' static str ,
22
+ }
23
+
17
24
const FOLDER_MIME_TYPE : & ' static str = "application/vnd.google-apps.folder" ;
25
+ const FILE_MIME_TYPE : & ' static str = "application/vnd.google-apps.file" ;
26
+ static EXPORT_MIME_TYPES : LazyLock < HashMap < & ' static str , ExportMimeType > > = LazyLock :: new ( || {
27
+ HashMap :: from ( [
28
+ (
29
+ "application/vnd.google-apps.document" ,
30
+ ExportMimeType {
31
+ text : "text/markdown" ,
32
+ binary : "application/pdf" ,
33
+ } ,
34
+ ) ,
35
+ (
36
+ "application/vnd.google-apps.spreadsheet" ,
37
+ ExportMimeType {
38
+ text : "text/csv" ,
39
+ binary : "application/pdf" ,
40
+ } ,
41
+ ) ,
42
+ (
43
+ "application/vnd.google-apps.presentation" ,
44
+ ExportMimeType {
45
+ text : "text/plain" ,
46
+ binary : "application/pdf" ,
47
+ } ,
48
+ ) ,
49
+ (
50
+ "application/vnd.google-apps.drawing" ,
51
+ ExportMimeType {
52
+ text : "image/svg+xml" ,
53
+ binary : "image/png" ,
54
+ } ,
55
+ ) ,
56
+ (
57
+ "application/vnd.google-apps.script" ,
58
+ ExportMimeType {
59
+ text : "application/vnd.google-apps.script+json" ,
60
+ binary : "application/vnd.google-apps.script+json" ,
61
+ } ,
62
+ ) ,
63
+ ] )
64
+ } ) ;
65
+
66
+ fn is_supported_file_type ( mime_type : & str ) -> bool {
67
+ !mime_type. starts_with ( "application/vnd.google-apps." )
68
+ || EXPORT_MIME_TYPES . contains_key ( mime_type)
69
+ || mime_type == FILE_MIME_TYPE
70
+ }
18
71
19
72
#[ derive( Debug , Deserialize ) ]
20
73
pub struct Spec {
@@ -91,11 +144,21 @@ impl Executor {
91
144
let ( _, files) = list_call. doit ( ) . await ?;
92
145
if let Some ( files) = files. files {
93
146
for file in files {
94
- if let Some ( id) = file. id {
95
- if file. mime_type . as_ref ( ) == Some ( & FOLDER_MIME_TYPE . to_string ( ) ) {
96
- Box :: pin ( self . traverse_folder ( & id, visited_folder_ids, result) ) . await ?;
97
- } else {
98
- result. insert ( KeyValue :: Str ( Arc :: from ( id) ) ) ;
147
+ match ( file. id , file. mime_type ) {
148
+ ( Some ( id) , Some ( mime_type) ) => {
149
+ if mime_type == FOLDER_MIME_TYPE {
150
+ Box :: pin ( self . traverse_folder ( & id, visited_folder_ids, result) )
151
+ . await ?;
152
+ } else if is_supported_file_type ( & mime_type) {
153
+ result. insert ( KeyValue :: Str ( Arc :: from ( id) ) ) ;
154
+ } else {
155
+ warn ! ( "Skipping file with unsupported mime type: id={id}, mime_type={mime_type}, name={:?}" , file. name) ;
156
+ }
157
+ }
158
+ ( id, mime_type) => {
159
+ warn ! (
160
+ "Skipping file with incomplete metadata: id={id:?}, mime_type={mime_type:?}" ,
161
+ ) ;
99
162
}
100
163
}
101
164
}
@@ -121,17 +184,32 @@ impl SourceExecutor for Executor {
121
184
async fn get_value ( & self , key : & KeyValue ) -> Result < Option < FieldValues > > {
122
185
let file_id = key. str_value ( ) ?;
123
186
124
- let filename = async {
125
- let ( _, file) = self
126
- . drive_hub
187
+ let ( _, file) = self
188
+ . drive_hub
189
+ . files ( )
190
+ . get ( file_id)
191
+ . add_scope ( Scope :: Readonly )
192
+ . doit ( )
193
+ . await ?;
194
+
195
+ let resp_body = if let Some ( export_mime_type) = file
196
+ . mime_type
197
+ . as_ref ( )
198
+ . and_then ( |mime_type| EXPORT_MIME_TYPES . get ( mime_type. as_str ( ) ) )
199
+ {
200
+ let target_mime_type = if self . binary {
201
+ export_mime_type. binary
202
+ } else {
203
+ export_mime_type. text
204
+ } ;
205
+ self . drive_hub
127
206
. files ( )
128
- . get ( file_id)
207
+ . export ( & file_id, target_mime_type )
129
208
. add_scope ( Scope :: Readonly )
130
209
. doit ( )
131
- . await ?;
132
- anyhow:: Ok ( file. name . unwrap_or_default ( ) )
133
- } ;
134
- let body = async {
210
+ . await ?
211
+ . into_body ( )
212
+ } else {
135
213
let ( resp, _) = self
136
214
. drive_hub
137
215
. files ( )
@@ -140,13 +218,11 @@ impl SourceExecutor for Executor {
140
218
. param ( "alt" , "media" )
141
219
. doit ( )
142
220
. await ?;
143
- let content = resp. into_body ( ) . collect ( ) . await ?;
144
- anyhow:: Ok ( content)
221
+ resp. into_body ( )
145
222
} ;
146
- let ( filename, content) = try_join ( filename, body) . await ?;
147
-
223
+ let content = resp_body. collect ( ) . await ?;
148
224
let mut fields = Vec :: with_capacity ( 2 ) ;
149
- fields. push ( filename . into ( ) ) ;
225
+ fields. push ( file . name . unwrap_or_default ( ) . into ( ) ) ;
150
226
if self . binary {
151
227
fields. push ( content. to_bytes ( ) . to_vec ( ) . into ( ) ) ;
152
228
} else {
0 commit comments