Skip to content

test(bigquery): add integration tests for dataset admin operations #1914

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/auth/integration-tests/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ gax = { path = "../../../src/gax", package = "google-cloud-gax" }
language = { path = "../../../src/generated/cloud/language/v2", package = "google-cloud-language-v2" }
scoped-env = "2"
secretmanager = { path = "../../../src/generated/cloud/secretmanager/v1", package = "google-cloud-secretmanager-v1" }
bigquery = { path = "../../../src/generated/cloud/bigquery/v2", package = "google-cloud-bigquery-v2" }
tempfile = "3"

[dev-dependencies]
Expand Down
4 changes: 4 additions & 0 deletions src/integration-tests/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ wkt.workspace = true
package = "google-cloud-firestore"
path = "../../src/firestore"

[dependencies.bigquery]
package = "google-cloud-bigquery-v2"
path = "../../src/generated/cloud/bigquery/v2"

[dependencies.sm]
package = "google-cloud-secretmanager-v1"
path = "../../src/generated/cloud/secretmanager/v1"
Expand Down
145 changes: 145 additions & 0 deletions src/integration-tests/src/bigquery.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::Result;
use gax::error::Error;
use rand::{Rng, distr::Alphanumeric};

pub async fn dataset_admin(
builder: bigquery::builder::dataset_service::ClientBuilder,
) -> Result<()> {
// Enable a basic subscriber. Useful to troubleshoot problems and visually
// verify tracing is doing something.
#[cfg(feature = "log-integration-tests")]
let _guard = {
use tracing_subscriber::fmt::format::FmtSpan;
let subscriber = tracing_subscriber::fmt()
.with_level(true)
.with_thread_ids(true)
.with_span_events(FmtSpan::NEW | FmtSpan::CLOSE)
.finish();

tracing::subscriber::set_default(subscriber)
};

let project_id = crate::project_id()?;
let client = builder.build().await?;
cleanup_stale_datasets(&client, &project_id).await?;

let rand_suffix: String = rand::rng()
.sample_iter(&Alphanumeric)
.take(8)
.map(char::from)
.collect();

let ds_name = format!("rust_bq_test_dataset_{rand_suffix}");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general we prefer using labels to identify test resources. Can we do that here?


println!("CREATING DATASET WITH ID: {ds_name}");

let create = client
.insert_dataset(&project_id)
.set_dataset(bigquery::model::Dataset::new().set_dataset_reference(
bigquery::model::DatasetReference::new().set_dataset_id(&ds_name),
))
.send()
.await?;
println!("CREATE DATASET = {create:?}");

assert!(create.dataset_reference.is_some());

let list = client.list_datasets(&project_id).send().await?;
println!("LIST DATASET = {} entries", list.datasets.len());

assert!(!list.datasets.is_empty());
assert!(list.datasets.len() > 1);
assert!(
list.datasets
.iter()
.find(|v| v.id.contains(&ds_name))
.is_some()
);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All of this can be changed to:

Suggested change
assert!(!list.datasets.is_empty());
assert!(list.datasets.len() > 1);
assert!(
list.datasets
.iter()
.find(|v| v.id.contains(&ds_name))
.is_some()
);
assert!(
list.datasets
.iter()
.find(|v| v.id.contains(&ds_name))
.is_some(),
"{:?}", list.datasets
);

Separately, why "contains"? Can we make that a more specific predicate?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the Dataset id contains the fully qualified id of the dataset, so it would be something like projects/{projectId}/datasets/{datasetId}, so that's why I used contains.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I was suggesting we do something more specific, such as:

  v.id.strip_suffix(&ds_name).and(Some(true)).unwrap_or_default()

or

  v.id == format!("projects/{project_id}/datasets/{ds_name}")

or (gross):

  v.id.strip_prefix("projects/").and_then(|s| s.strip_prefix(&project_id)).and_then(|s|  s.strip_prefix("/datasets/")).and_then(|s| s == &ds_name).unwrap_or_default()


let delete = client
.delete_dataset(&project_id, &ds_name)
.set_delete_contents(true)
.send()
.await?;
println!("DELETE DATASET = {delete:?}");

//assert!(delete().is_ok());

Ok(())
}

async fn cleanup_stale_datasets(
client: &bigquery::client::DatasetService,
project_id: &str,
) -> Result<()> {
use std::time::{Duration, SystemTime, UNIX_EPOCH};
let stale_deadline = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map_err(Error::other)?;
let stale_deadline = stale_deadline - Duration::from_secs(48 * 60 * 60);
let stale_deadline = stale_deadline.as_millis() as i64;

let list = client.list_datasets(project_id).send().await?;
let pending_all_datasets = list
.datasets
.iter()
.map(|v| {
return client
.get_dataset(
project_id,
v.dataset_reference.as_ref().map_or("", |v| &v.dataset_id),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This deserves some comment explaining what is going on. If the dataset_reference is not set it seems we cannot make a successful call to .get_dataset()? Maybe returning None is more appropriate? or maybe using v.id and stripping the project_id?

Maybe we should have something like:

fn extract_dataset_id(project_id: &str, v: bigquery::model::ListFormatDataset) -> Option<String> {
    match v.dataset_reference {
        Some(r) => r.dataset_id.clone,
        None => v.id.strip_prefix("projects/").strip_prefix(project_id).owned(),
    }
}

and then we issue the request only if we have Some()...

)
.send();
})
.collect::<Vec<_>>();

let stale_datasets = futures::future::join_all(pending_all_datasets)
.await
.into_iter()
.filter_map(|r| {
if r.as_ref()
.is_ok_and(|ds| ds.creation_time < stale_deadline && ds.id.contains("bq_rust"))
{
return r.ok();
}
return None;
})
.collect::<Vec<_>>();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a case where using into_stream() and StreamExt should help.

To start with, we should be able to write:

  use futures::StreamExt;
  ... ... ..

  let list = client.list_datasets(project_id).send().await?;
  futures::stream::iter(list.datasets.iter())
    .flat_map_unordered(16, // limit the number of concurrent requests 
        async |item| {
            if let Some(id) = extract_dataset_id(item) {
                return client.get_dataset(project_id, id).send().await;
            }
            None
        })
    .flat_map_unordered(16, |result| -> Option<Result<()>> {
      let result = result?;
      match result {
        Err(e) => Some(Err(e)),
        Ok(dataset) => {
            let id = extract_dataset_id(project_id, dataset.)?; // Blegh, this needs some refactoring
            if dataset.creation_time < stale_deadline && ds.labels.find("integration-test") == Some("true") {             
              return Some(client.delete_dataset(project_id, id).await);
            }
            None
        }
      }
    });

Finally you may need to collect all the errors.


println!("found {} stale datasets", stale_datasets.len());

let pending_deletion: Vec<_> = stale_datasets
.iter()
.map(|ds| {
client
.delete_dataset(
project_id,
ds.dataset_reference.as_ref().map_or("", |v| &v.dataset_id),
)
.set_delete_contents(true)
.send()
})
.collect();

futures::future::join_all(pending_deletion)
.await
.into_iter()
.zip(stale_datasets)
.for_each(|(r, ds)| println!("{} = {r:?}", ds.id));

Ok(())
}
1 change: 1 addition & 0 deletions src/integration-tests/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

use gax::error::Error;
pub type Result<T> = std::result::Result<T, gax::error::Error>;
pub mod bigquery;
pub mod error_details;
pub mod firestore;
pub mod secret_manager;
Expand Down
12 changes: 12 additions & 0 deletions src/integration-tests/tests/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@ mod driver {
.map_err(report)
}

#[test_case(bigquery::client::DatasetService::builder(); "default")]
#[test_case(bigquery::client::DatasetService::builder().with_tracing(); "with tracing enabled")]
#[test_case(bigquery::client::DatasetService::builder().with_retry_policy(retry_policy()); "with retry enabled")]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we do not need all three cases for each library. One is enough, maybe this?

Suggested change
#[test_case(bigquery::client::DatasetService::builder(); "default")]
#[test_case(bigquery::client::DatasetService::builder().with_tracing(); "with tracing enabled")]
#[test_case(bigquery::client::DatasetService::builder().with_retry_policy(retry_policy()); "with retry enabled")]
#[test_case(bigquery::client::DatasetService::builder().with_retry_policy(retry_policy()).with_tracing(); "with retry and tracing enabled")]

#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
async fn run_bigquery(
builder: bigquery::builder::dataset_service::ClientBuilder,
) -> integration_tests::Result<()> {
integration_tests::bigquery::dataset_admin(builder)
.await
.map_err(report)
}

#[test_case(sm::client::SecretManagerService::builder(); "default")]
#[test_case(sm::client::SecretManagerService::builder().with_tracing(); "with tracing enabled")]
#[test_case(sm::client::SecretManagerService::builder().with_retry_policy(retry_policy()); "with retry enabled")]
Expand Down
33 changes: 31 additions & 2 deletions src/wkt/src/empty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use serde::{Deserialize, Deserializer, Serialize, de::IntoDeserializer};

/// A generic empty message that you can re-use to avoid defining duplicated
/// empty messages in your APIs. A typical example is to use it as the request
/// or the response type of an API method. For instance:
Expand All @@ -21,10 +23,23 @@
/// rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty);
/// }
/// ```
///
#[derive(Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
#[derive(Clone, Debug, Default, PartialEq, Serialize)]
pub struct Empty {}

impl<'de> Deserialize<'de> for Empty {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let input = String::deserialize(deserializer).unwrap_or(String::default());
if input.trim().is_empty() || input.trim().eq("null") {
return Ok(Empty::default());
}
let string_deserializer = String::into_deserializer(input);
Ok(Option::<Empty>::deserialize(string_deserializer)?.unwrap())
}
}

impl crate::message::Message for Empty {
fn typename() -> &'static str {
"type.googleapis.com/google.protobuf.Empty"
Expand All @@ -51,4 +66,18 @@ mod tests {
assert_eq!(Empty::default(), got);
Ok(())
}

#[test]
fn deserialize_null() -> Result {
let got = serde_json::from_value(json!(null))?;
assert_eq!(Empty::default(), got);
Ok(())
}

#[test]
fn deserialize_empty() -> Result {
let got = serde_json::from_str("")?;
assert_eq!(Empty::default(), got);
Ok(())
}
}
Loading