Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: warn if default cloud_function_service_account is used #1424

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions bigframes/functions/_function_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
)

from bigframes import clients
from bigframes import version as bigframes_version

if TYPE_CHECKING:
from bigframes.session import Session
Expand Down Expand Up @@ -129,6 +130,13 @@ def remote_function(
.. deprecated:: 0.0.1
This is an internal method. Please use :func:`bigframes.pandas.remote_function` instead.

.. warning::
To use remote functions with Bigframes 2.0 and onwards, please set an
explicit user-managed cloud_function_service_account or explicitly set
cloud_function_service_account to `None`.

See, https://cloud.google.com/functions/docs/securing/function-identity.

.. note::
Please make sure following is setup before using this API:

Expand Down Expand Up @@ -314,6 +322,22 @@ def remote_function(

session = cast(bigframes.session.Session, session or bpd.get_global_session())

# raise a UserWarning if user does not explicitly set cloud_function_service_account to a
# user-managed cloud_function_service_account of to default
msg = (
"You have not explicitly set a user-managed cloud_function_service_account. "
"Using the default compute service account."
"To use Bigframes 2.0, please set an explicit user-managed "
"cloud_function_service_account or explicitly set cloud_function_service_account to `None`."
"See, https://cloud.google.com/functions/docs/securing/function-identity."
)

if (
bigframes_version.__version__.startswith("1.")
and cloud_function_service_account is None
):
warnings.warn(msg, category=UserWarning)

# A BigQuery client is required to perform BQ operations
if not bigquery_client:
bigquery_client = session.bqclient
Expand Down
7 changes: 7 additions & 0 deletions bigframes/session/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1215,6 +1215,13 @@ def remote_function(
supports dataframe with column types ``Int64``/``Float64``/``boolean``/
``string``/``binary[pyarrow]``.

.. warning::
To use remote functions with Bigframes 2.0 and onwards, please set an
explicit user-managed cloud_function_service_account or explicitly set
cloud_function_service_account to `None`.

See, https://cloud.google.com/functions/docs/securing/function-identity.

.. note::
Please make sure following is setup before using this API:

Expand Down
94 changes: 53 additions & 41 deletions notebooks/remote_functions/remote_function_usecases.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -44,7 +44,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 2,
"metadata": {
"id": "Y6QAttCqqMM0"
},
Expand All @@ -55,7 +55,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
Expand All @@ -66,17 +66,21 @@
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/google/home/shobs/code/bigframes1/venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py:3550: UserWarning: Reading cached table from 2024-07-24 08:01:12.491984+00:00 to avoid incompatibilies with previous reads of this table. To read the latest version, set `use_cache=False` or close the current session with Session.close() or bigframes.pandas.close_session().\n",
" exec(code_obj, self.user_global_ns, self.user_ns)\n"
]
"data": {
"text/html": [
"Query job 1f6094e9-1942-477c-9ce3-87a614d71294 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:1f6094e9-1942-477c-9ce3-87a614d71294&page=queryresults\">Open Job</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Query job 9d155f10-e37a-4d20-b2ff-02868ecb58f4 is DONE. 582.8 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:9d155f10-e37a-4d20-b2ff-02868ecb58f4&page=queryresults\">Open Job</a>"
"Query job ba19f29c-33d3-4f12-9605-ddeafb74918e is DONE. 582.8 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ba19f29c-33d3-4f12-9605-ddeafb74918e&page=queryresults\">Open Job</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
Expand All @@ -88,7 +92,7 @@
{
"data": {
"text/html": [
"Query job 5a524e70-12dc-4116-b416-04570bbf754e is DONE. 82.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:5a524e70-12dc-4116-b416-04570bbf754e&page=queryresults\">Open Job</a>"
"Query job dd1ff8be-700a-4ce5-91a0-31413f70cfad is DONE. 82.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:dd1ff8be-700a-4ce5-91a0-31413f70cfad&page=queryresults\">Open Job</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
Expand Down Expand Up @@ -125,49 +129,49 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>Reds</td>\n",
" <td>Cubs</td>\n",
" <td>159</td>\n",
" <th>88</th>\n",
" <td>Royals</td>\n",
" <td>Athletics</td>\n",
" <td>176</td>\n",
" </tr>\n",
" <tr>\n",
" <th>358</th>\n",
" <th>106</th>\n",
" <td>Dodgers</td>\n",
" <td>Diamondbacks</td>\n",
" <td>223</td>\n",
" <td>Giants</td>\n",
" <td>216</td>\n",
" </tr>\n",
" <tr>\n",
" <th>416</th>\n",
" <td>Yankees</td>\n",
" <td>White Sox</td>\n",
" <td>216</td>\n",
" <th>166</th>\n",
" <td>Phillies</td>\n",
" <td>Royals</td>\n",
" <td>162</td>\n",
" </tr>\n",
" <tr>\n",
" <th>523</th>\n",
" <td>Rays</td>\n",
" <td>Athletics</td>\n",
" <td>187</td>\n",
" <th>247</th>\n",
" <td>Rangers</td>\n",
" <td>Royals</td>\n",
" <td>161</td>\n",
" </tr>\n",
" <tr>\n",
" <th>594</th>\n",
" <td>Pirates</td>\n",
" <td>Brewers</td>\n",
" <td>169</td>\n",
" <th>374</th>\n",
" <td>Athletics</td>\n",
" <td>Astros</td>\n",
" <td>161</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" homeTeamName awayTeamName duration_minutes\n",
"36 Reds Cubs 159\n",
"358 Dodgers Diamondbacks 223\n",
"416 Yankees White Sox 216\n",
"523 Rays Athletics 187\n",
"594 Pirates Brewers 169"
" homeTeamName awayTeamName duration_minutes\n",
"88 Royals Athletics 176\n",
"106 Dodgers Giants 216\n",
"166 Phillies Royals 162\n",
"247 Rangers Royals 161\n",
"374 Athletics Astros 161"
]
},
"execution_count": 22,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -216,7 +220,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
Expand All @@ -226,10 +230,18 @@
"outputId": "19351206-116e-4da2-8ff0-f288b7745b27"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/google/home/arwas/src1/python-bigquery-dataframes/bigframes/functions/_function_session.py:335: UserWarning: You have not explicitly set a user-managed cloud_function_service_account. Using the default compute service account, {cloud_function_service_account}. To use Bigframes 2.0, please set an explicit user-managed cloud_function_service_account or set cloud_function_service_account explicitly to `default`.See, https://cloud.google.com/functions/docs/securing/function-identity.\n",
" warnings.warn(msg, category=UserWarning)\n"
]
},
{
"data": {
"text/html": [
"Query job ec8d958d-93ef-45ae-8150-6ccfa8feb89a is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ec8d958d-93ef-45ae-8150-6ccfa8feb89a&page=queryresults\">Open Job</a>"
"Query job 7c021760-59c4-4f3a-846c-9693a4d16eef is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:7c021760-59c4-4f3a-846c-9693a4d16eef&page=queryresults\">Open Job</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
Expand All @@ -242,7 +254,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session54c8b0-e22dbecc9ec0374bda36bc23df3775b0-g8zp' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session54c8b0_e22dbecc9ec0374bda36bc23df3775b0_g8zp'.\n"
"Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-sessionca6012-ca541a90249f8b62951f38b7aba6a711-49to' and BQ remote function 'bigframes-dev._ed1e4d0f7d41174ba506d34d15dccf040d13f69e.bigframes_sessionca6012_ca541a90249f8b62951f38b7aba6a711_49to'.\n"
]
}
],
Expand Down Expand Up @@ -1430,7 +1442,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.9.19"
}
},
"nbformat": 4,
Expand Down
38 changes: 38 additions & 0 deletions tests/system/large/functions/test_remote_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import sys
import tempfile
import textwrap
import warnings

import google.api_core.exceptions
from google.cloud import bigquery, functions_v2, storage
Expand Down Expand Up @@ -1359,6 +1360,43 @@ def square_num(x):
)


@pytest.mark.flaky(retries=2, delay=120)
def test_remote_function_warns_default_cloud_function_service_account(scalars_dfs):
project = "bigframes-dev-perf"

rf_session = bigframes.Session(context=bigframes.BigQueryOptions(project=project))

try:
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")

@rf_session.remote_function([int], int, reuse=False)
def square_num(x):
if x is None:
return x
return x * x

scalars_df, scalars_pandas_df = scalars_dfs

bf_int64_col = scalars_df["int64_col"]
bf_result_col = bf_int64_col.apply(square_num)
bf_result = bf_int64_col.to_frame().assign(result=bf_result_col).to_pandas()

pd_int64_col = scalars_pandas_df["int64_col"]
pd_result_col = pd_int64_col.apply(lambda x: x if x is None else x * x)
pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)

assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)

assert issubclass(w[0].category, UserWarning)
assert "To use Bigframes 2.0, please set an explicit" in str(w[0].message)
finally:
# clean up the gcp assets created for the remote function
cleanup_remote_function_assets(
rf_session.bqclient, rf_session.cloudfunctionsclient, square_num
)


@pytest.mark.flaky(retries=2, delay=120)
def test_remote_function_with_gcf_cmek():
# TODO(shobs): Automate the following set-up during testing in the test project.
Expand Down