Skip to content

Commit db11fd8

Browse files
authored
Support files for Google docs/spreadsheet/etc in GoogleDrive source (#162)
1 parent 4f7e001 commit db11fd8

File tree

1 file changed

+97
-21
lines changed

1 file changed

+97
-21
lines changed

src/ops/sources/google_drive.rs

Lines changed: 97 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
use std::sync::Arc;
1+
use std::{
2+
collections::HashMap,
3+
sync::{Arc, LazyLock},
4+
};
25

3-
use futures::future::try_join;
46
use google_drive3::{
57
api::Scope,
68
yup_oauth2::{read_service_account_key, ServiceAccountAuthenticator},
@@ -10,11 +12,62 @@ use http_body_util::BodyExt;
1012
use hyper_rustls::HttpsConnector;
1113
use hyper_util::client::legacy::connect::HttpConnector;
1214
use indexmap::IndexSet;
13-
use log::debug;
15+
use log::warn;
1416

1517
use crate::ops::sdk::*;
1618

19+
struct ExportMimeType {
20+
text: &'static str,
21+
binary: &'static str,
22+
}
23+
1724
const FOLDER_MIME_TYPE: &'static str = "application/vnd.google-apps.folder";
25+
const FILE_MIME_TYPE: &'static str = "application/vnd.google-apps.file";
26+
static EXPORT_MIME_TYPES: LazyLock<HashMap<&'static str, ExportMimeType>> = LazyLock::new(|| {
27+
HashMap::from([
28+
(
29+
"application/vnd.google-apps.document",
30+
ExportMimeType {
31+
text: "text/markdown",
32+
binary: "application/pdf",
33+
},
34+
),
35+
(
36+
"application/vnd.google-apps.spreadsheet",
37+
ExportMimeType {
38+
text: "text/csv",
39+
binary: "application/pdf",
40+
},
41+
),
42+
(
43+
"application/vnd.google-apps.presentation",
44+
ExportMimeType {
45+
text: "text/plain",
46+
binary: "application/pdf",
47+
},
48+
),
49+
(
50+
"application/vnd.google-apps.drawing",
51+
ExportMimeType {
52+
text: "image/svg+xml",
53+
binary: "image/png",
54+
},
55+
),
56+
(
57+
"application/vnd.google-apps.script",
58+
ExportMimeType {
59+
text: "application/vnd.google-apps.script+json",
60+
binary: "application/vnd.google-apps.script+json",
61+
},
62+
),
63+
])
64+
});
65+
66+
fn is_supported_file_type(mime_type: &str) -> bool {
67+
!mime_type.starts_with("application/vnd.google-apps.")
68+
|| EXPORT_MIME_TYPES.contains_key(mime_type)
69+
|| mime_type == FILE_MIME_TYPE
70+
}
1871

1972
#[derive(Debug, Deserialize)]
2073
pub struct Spec {
@@ -91,11 +144,21 @@ impl Executor {
91144
let (_, files) = list_call.doit().await?;
92145
if let Some(files) = files.files {
93146
for file in files {
94-
if let Some(id) = file.id {
95-
if file.mime_type.as_ref() == Some(&FOLDER_MIME_TYPE.to_string()) {
96-
Box::pin(self.traverse_folder(&id, visited_folder_ids, result)).await?;
97-
} else {
98-
result.insert(KeyValue::Str(Arc::from(id)));
147+
match (file.id, file.mime_type) {
148+
(Some(id), Some(mime_type)) => {
149+
if mime_type == FOLDER_MIME_TYPE {
150+
Box::pin(self.traverse_folder(&id, visited_folder_ids, result))
151+
.await?;
152+
} else if is_supported_file_type(&mime_type) {
153+
result.insert(KeyValue::Str(Arc::from(id)));
154+
} else {
155+
warn!("Skipping file with unsupported mime type: id={id}, mime_type={mime_type}, name={:?}", file.name);
156+
}
157+
}
158+
(id, mime_type) => {
159+
warn!(
160+
"Skipping file with incomplete metadata: id={id:?}, mime_type={mime_type:?}",
161+
);
99162
}
100163
}
101164
}
@@ -121,17 +184,32 @@ impl SourceExecutor for Executor {
121184
async fn get_value(&self, key: &KeyValue) -> Result<Option<FieldValues>> {
122185
let file_id = key.str_value()?;
123186

124-
let filename = async {
125-
let (_, file) = self
126-
.drive_hub
187+
let (_, file) = self
188+
.drive_hub
189+
.files()
190+
.get(file_id)
191+
.add_scope(Scope::Readonly)
192+
.doit()
193+
.await?;
194+
195+
let resp_body = if let Some(export_mime_type) = file
196+
.mime_type
197+
.as_ref()
198+
.and_then(|mime_type| EXPORT_MIME_TYPES.get(mime_type.as_str()))
199+
{
200+
let target_mime_type = if self.binary {
201+
export_mime_type.binary
202+
} else {
203+
export_mime_type.text
204+
};
205+
self.drive_hub
127206
.files()
128-
.get(file_id)
207+
.export(&file_id, target_mime_type)
129208
.add_scope(Scope::Readonly)
130209
.doit()
131-
.await?;
132-
anyhow::Ok(file.name.unwrap_or_default())
133-
};
134-
let body = async {
210+
.await?
211+
.into_body()
212+
} else {
135213
let (resp, _) = self
136214
.drive_hub
137215
.files()
@@ -140,13 +218,11 @@ impl SourceExecutor for Executor {
140218
.param("alt", "media")
141219
.doit()
142220
.await?;
143-
let content = resp.into_body().collect().await?;
144-
anyhow::Ok(content)
221+
resp.into_body()
145222
};
146-
let (filename, content) = try_join(filename, body).await?;
147-
223+
let content = resp_body.collect().await?;
148224
let mut fields = Vec::with_capacity(2);
149-
fields.push(filename.into());
225+
fields.push(file.name.unwrap_or_default().into());
150226
if self.binary {
151227
fields.push(content.to_bytes().to_vec().into());
152228
} else {

0 commit comments

Comments
 (0)