Skip to content

Commit

Permalink
Querylog should record the correct bytes_scanned (#6388)
Browse files Browse the repository at this point in the history
Following up on #4714, let's make
that change for the Querlog as well.

Currently, [this Snuba
producer](https://github.com/getsentry/snuba/blob/ffcfdb9241960e879af9904b06f57ba3387be8f2/snuba/state/__init__.py#L352)
produces a bunch of information about queries to the Querylog topic, and
the Querylog consumer is responsible for deserializing all that
information. We should be deserializing the progress_bytes instead of
bytes.

---------

Co-authored-by: Rachel Chen <rachelchen@PL6VFX9HP4.local>
  • Loading branch information
xurui-c and Rachel Chen authored Oct 4, 2024
1 parent 997ec1e commit aa4d027
Show file tree
Hide file tree
Showing 9 changed files with 38 additions and 13 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ python-dateutil==2.8.2
python-rapidjson==1.8
redis==4.3.4
sentry-arroyo==2.17.6
sentry-kafka-schemas==0.1.110
sentry-kafka-schemas==0.1.112
sentry-redis-tools==0.3.0
sentry-relay==0.8.44
sentry-sdk==1.40.5
Expand Down
6 changes: 3 additions & 3 deletions rust_snuba/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion rust_snuba/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pyo3 = { version = "0.18.1", features = ["chrono"] }
reqwest = { version = "0.11.11", features = ["stream"] }
rust_arroyo = { version = "*", git = "https://github.com/getsentry/arroyo" }
sentry = { version = "0.32.0", features = ["anyhow", "tracing"] }
sentry-kafka-schemas = "0.1.110"
sentry-kafka-schemas = "0.1.112"
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0" }
thiserror = "1.0"
Expand Down
14 changes: 13 additions & 1 deletion rust_snuba/src/processors/querylog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ struct Profile {
#[serde(default)]
struct ResultProfile {
bytes: u64,
progress_bytes: u64,
elapsed: f64,
}

Expand Down Expand Up @@ -163,6 +164,8 @@ struct QueryList {
array_join_columns: Vec<Vec<String>>,
#[serde(rename(serialize = "clickhouse_queries.bytes_scanned"))]
bytes_scanned: Vec<u64>,
#[serde(rename(serialize = "clickhouse_queries.bytes"))]
bytes: Vec<u64>,
#[serde(rename(serialize = "clickhouse_queries.duration_ms"))]
duration_ms: Vec<u64>,
}
Expand Down Expand Up @@ -190,6 +193,7 @@ impl TryFrom<Vec<FromQuery>> for QueryList {
let mut groupby_columns = vec![];
let mut array_join_columns = vec![];
let mut bytes_scanned = vec![];
let mut bytes = vec![];
let mut duration_ms = vec![];

for q in from {
Expand Down Expand Up @@ -224,7 +228,8 @@ impl TryFrom<Vec<FromQuery>> for QueryList {
groupby_columns.push(q.profile.groupby_cols);
array_join_columns.push(q.profile.array_join_cols);
let result_profile = q.result_profile.unwrap_or_default();
bytes_scanned.push(result_profile.bytes);
bytes_scanned.push(result_profile.progress_bytes);
bytes.push(result_profile.bytes);
duration_ms.push((result_profile.elapsed * 1000.0) as u64);

// consistent, cache hit, max_threads and is_duplicated may not be present
Expand Down Expand Up @@ -273,6 +278,7 @@ impl TryFrom<Vec<FromQuery>> for QueryList {
groupby_columns,
array_join_columns,
bytes_scanned,
bytes,
duration_ms,
})
}
Expand Down Expand Up @@ -383,6 +389,8 @@ mod tests {
},
"result_profile": {
"bytes": 1305,
"progress_bytes": 0,
"blocks": 1,
"blocks": 1,
"rows": 22,
"elapsed": 0.009863138198852539
Expand Down Expand Up @@ -490,6 +498,8 @@ mod tests {
},
"result_profile": {
"bytes": 1305,
"progress_bytes": 0,
"blocks": 1,
"blocks": 1,
"rows": 22,
"elapsed": 0.009863138198852539
Expand Down Expand Up @@ -597,6 +607,8 @@ mod tests {
},
"result_profile": {
"bytes": 1305,
"progress_bytes": 0,
"blocks": 1,
"blocks": 1,
"rows": 22,
"elapsed": 0.009863138198852539
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ expression: snapshot_payload
"clickhouse_queries.array_join_columns": [
[]
],
"clickhouse_queries.bytes": [
0
],
"clickhouse_queries.bytes_scanned": [
0
],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
source: src/processors/mod.rs
description: "{\n \"request\": {\n \"id\": \"24a78d10a0134f2aa6367ba2a393b504\",\n \"body\": {\n \"legacy\": true,\n \"query\": \"MATCH (events) SELECT count() AS `count`, min(timestamp) AS `first_seen`, max(timestamp) AS `last_seen` BY tags_key, tags_value WHERE timestamp >= toDateTime('2023-02-08T21:07:12.769001') AND timestamp < toDateTime('2023-02-08T21:12:39.015094') AND project_id IN tuple(1) AND project_id IN tuple(1) AND group_id IN tuple(5) ORDER BY count DESC LIMIT 4 BY tags_key\",\n \"dataset\": \"events\",\n \"app_id\": \"legacy\",\n \"parent_api\": \"/api/0/issues|groups/{issue_id}/tags/\"\n },\n \"referrer\": \"tagstore.__get_tag_keys_and_top_values\",\n \"team\": \"<unknown>\",\n \"feature\": \"<unknown>\",\n \"app_id\": \"legacy\"\n },\n \"dataset\": \"events\",\n \"entity\": \"events\",\n \"start_timestamp\": 1675919232,\n \"end_timestamp\": 1675919559,\n \"query_list\": [\n {\n \"sql\": \"SELECT (tupleElement((arrayJoin(arrayMap((x, y -> (x, y)), tags.key, tags.value)) AS snuba_all_tags), 1) AS _snuba_tags_key), (tupleElement(snuba_all_tags, 2) AS _snuba_tags_value), (count() AS _snuba_count), (min((timestamp AS _snuba_timestamp)) AS _snuba_first_seen), (max(_snuba_timestamp) AS _snuba_last_seen) FROM errors_local PREWHERE in((group_id AS _snuba_group_id), tuple(5)) WHERE equals(deleted, 0) AND greaterOrEquals(_snuba_timestamp, toDateTime('2023-02-08T21:07:12', 'Universal')) AND less(_snuba_timestamp, toDateTime('2023-02-08T21:12:39', 'Universal')) AND in((project_id AS _snuba_project_id), tuple(1)) AND in(_snuba_project_id, tuple(1)) GROUP BY _snuba_tags_key, _snuba_tags_value ORDER BY _snuba_count DESC LIMIT 4 BY _snuba_tags_key LIMIT 1000 OFFSET 0\",\n \"sql_anonymized\": \"SELECT (tupleElement((arrayJoin(arrayMap((x, y -> (x, y)), tags.key, tags.value)) AS snuba_all_tags), -1337) AS _snuba_tags_key), (tupleElement(snuba_all_tags, -1337) AS _snuba_tags_value), (count() AS _snuba_count), (min((timestamp AS _snuba_timestamp)) AS _snuba_first_seen), (max(_snuba_timestamp) AS _snuba_last_seen) FROM errors_local PREWHERE in((group_id AS _snuba_group_id), tuple(-1337)) WHERE equals(deleted, -1337) AND greaterOrEquals(_snuba_timestamp, toDateTime('2023-02-08T21:07:12', 'Universal')) AND less(_snuba_timestamp, toDateTime('2023-02-08T21:12:39', 'Universal')) AND in((project_id AS _snuba_project_id), tuple(-1337)) AND in(_snuba_project_id, tuple(-1337)) GROUP BY _snuba_tags_key, _snuba_tags_value ORDER BY _snuba_count DESC LIMIT 4 BY _snuba_tags_key LIMIT 1000 OFFSET 0\",\n \"start_timestamp\": 1675919232,\n \"end_timestamp\": 1675919559,\n \"stats\": {\n \"clickhouse_table\": \"errors_local\",\n \"final\": false,\n \"referrer\": \"tagstore.__get_tag_keys_and_top_values\",\n \"sample\": null,\n \"table_rate\": 0.6,\n \"table_concurrent\": 1,\n \"project_rate\": 0.6333333333333333,\n \"project_concurrent\": 1,\n \"consistent\": false,\n \"result_rows\": 22,\n \"result_cols\": 5,\n \"query_id\": \"9079915acbacff0804ed45c72b865024\"\n },\n \"status\": \"success\",\n \"trace_id\": \"\",\n \"profile\": {\n \"time_range\": null,\n \"table\": \"errors_local\",\n \"all_columns\": [\n \"errors_local.deleted\",\n \"errors_local.group_id\",\n \"errors_local.project_id\",\n \"errors_local.tags.key\",\n \"errors_local.tags.value\",\n \"errors_local.timestamp\"\n ],\n \"multi_level_condition\": false,\n \"where_profile\": {\n \"columns\": [\n \"errors_local.deleted\",\n \"errors_local.project_id\",\n \"errors_local.timestamp\"\n ],\n \"mapping_cols\": []\n },\n \"groupby_cols\": [\"errors_local.tags.key\", \"errors_local.tags.value\"],\n \"array_join_cols\": [\"errors_local.tags.key\", \"errors_local.tags.value\"]\n },\n \"result_profile\": {\n \"bytes\": 1305,\n \"blocks\": 1,\n \"rows\": 22,\n \"elapsed\": 0.009863138198852539\n },\n \"request_status\": \"success\",\n \"slo\": \"for\"\n }\n ],\n \"status\": \"success\",\n \"request_status\": \"success\",\n \"slo\": \"for\",\n \"timing\": {\n \"timestamp\": 1675890758,\n \"duration_ms\": 55,\n \"marks_ms\": {\n \"cache_get\": 2,\n \"cache_set\": 6,\n \"execute\": 10,\n \"get_configs\": 0,\n \"prepare_query\": 15,\n \"rate_limit\": 5,\n \"validate_schema\": 15\n },\n \"tags\": {}\n },\n \"projects\": [1],\n \"snql_anonymized\": \"MATCH Entity(events) SELECT tags_key, tags_value, (count() AS count), (min(timestamp) AS first_seen), (max(timestamp) AS last_seen) GROUP BY tags_key, tags_value WHERE greaterOrEquals(timestamp, toDateTime('$S')) AND less(timestamp, toDateTime('$S')) AND in(project_id, tuple(-1337)) AND in(project_id, tuple(-1337)) AND in(group_id, tuple(-1337)) ORDER BY count DESC LIMIT 4 BY tags_key LIMIT 1000 OFFSET 0\"\n}\n"
description: "{\n \"request\": {\n \"id\": \"24a78d10a0134f2aa6367ba2a393b504\",\n \"body\": {\n \"legacy\": true,\n \"query\": \"MATCH (events) SELECT count() AS `count`, min(timestamp) AS `first_seen`, max(timestamp) AS `last_seen` BY tags_key, tags_value WHERE timestamp >= toDateTime('2023-02-08T21:07:12.769001') AND timestamp < toDateTime('2023-02-08T21:12:39.015094') AND project_id IN tuple(1) AND project_id IN tuple(1) AND group_id IN tuple(5) ORDER BY count DESC LIMIT 4 BY tags_key\",\n \"dataset\": \"events\",\n \"app_id\": \"legacy\",\n \"parent_api\": \"/api/0/issues|groups/{issue_id}/tags/\"\n },\n \"referrer\": \"tagstore.__get_tag_keys_and_top_values\",\n \"team\": \"<unknown>\",\n \"feature\": \"<unknown>\",\n \"app_id\": \"legacy\"\n },\n \"dataset\": \"events\",\n \"entity\": \"events\",\n \"start_timestamp\": 1675919232,\n \"end_timestamp\": 1675919559,\n \"query_list\": [\n {\n \"sql\": \"SELECT (tupleElement((arrayJoin(arrayMap((x, y -> (x, y)), tags.key, tags.value)) AS snuba_all_tags), 1) AS _snuba_tags_key), (tupleElement(snuba_all_tags, 2) AS _snuba_tags_value), (count() AS _snuba_count), (min((timestamp AS _snuba_timestamp)) AS _snuba_first_seen), (max(_snuba_timestamp) AS _snuba_last_seen) FROM errors_local PREWHERE in((group_id AS _snuba_group_id), tuple(5)) WHERE equals(deleted, 0) AND greaterOrEquals(_snuba_timestamp, toDateTime('2023-02-08T21:07:12', 'Universal')) AND less(_snuba_timestamp, toDateTime('2023-02-08T21:12:39', 'Universal')) AND in((project_id AS _snuba_project_id), tuple(1)) AND in(_snuba_project_id, tuple(1)) GROUP BY _snuba_tags_key, _snuba_tags_value ORDER BY _snuba_count DESC LIMIT 4 BY _snuba_tags_key LIMIT 1000 OFFSET 0\",\n \"sql_anonymized\": \"SELECT (tupleElement((arrayJoin(arrayMap((x, y -> (x, y)), tags.key, tags.value)) AS snuba_all_tags), -1337) AS _snuba_tags_key), (tupleElement(snuba_all_tags, -1337) AS _snuba_tags_value), (count() AS _snuba_count), (min((timestamp AS _snuba_timestamp)) AS _snuba_first_seen), (max(_snuba_timestamp) AS _snuba_last_seen) FROM errors_local PREWHERE in((group_id AS _snuba_group_id), tuple(-1337)) WHERE equals(deleted, -1337) AND greaterOrEquals(_snuba_timestamp, toDateTime('2023-02-08T21:07:12', 'Universal')) AND less(_snuba_timestamp, toDateTime('2023-02-08T21:12:39', 'Universal')) AND in((project_id AS _snuba_project_id), tuple(-1337)) AND in(_snuba_project_id, tuple(-1337)) GROUP BY _snuba_tags_key, _snuba_tags_value ORDER BY _snuba_count DESC LIMIT 4 BY _snuba_tags_key LIMIT 1000 OFFSET 0\",\n \"start_timestamp\": 1675919232,\n \"end_timestamp\": 1675919559,\n \"stats\": {\n \"clickhouse_table\": \"errors_local\",\n \"final\": false,\n \"referrer\": \"tagstore.__get_tag_keys_and_top_values\",\n \"sample\": null,\n \"table_rate\": 0.6,\n \"table_concurrent\": 1,\n \"project_rate\": 0.6333333333333333,\n \"project_concurrent\": 1,\n \"consistent\": false,\n \"result_rows\": 22,\n \"result_cols\": 5,\n \"query_id\": \"9079915acbacff0804ed45c72b865024\"\n },\n \"status\": \"success\",\n \"trace_id\": \"\",\n \"profile\": {\n \"time_range\": null,\n \"table\": \"errors_local\",\n \"all_columns\": [\n \"errors_local.deleted\",\n \"errors_local.group_id\",\n \"errors_local.project_id\",\n \"errors_local.tags.key\",\n \"errors_local.tags.value\",\n \"errors_local.timestamp\"\n ],\n \"multi_level_condition\": false,\n \"where_profile\": {\n \"columns\": [\n \"errors_local.deleted\",\n \"errors_local.project_id\",\n \"errors_local.timestamp\"\n ],\n \"mapping_cols\": []\n },\n \"groupby_cols\": [\"errors_local.tags.key\", \"errors_local.tags.value\"],\n \"array_join_cols\": [\"errors_local.tags.key\", \"errors_local.tags.value\"]\n },\n \"result_profile\": {\n \"bytes\": 1305,\n \"progress_bytes\": 0,\n \"blocks\": 1,\n \"rows\": 22,\n \"elapsed\": 0.009863138198852539\n },\n \"request_status\": \"success\",\n \"slo\": \"for\"\n }\n ],\n \"status\": \"success\",\n \"request_status\": \"success\",\n \"slo\": \"for\",\n \"timing\": {\n \"timestamp\": 1675890758,\n \"duration_ms\": 55,\n \"marks_ms\": {\n \"cache_get\": 2,\n \"cache_set\": 6,\n \"execute\": 10,\n \"get_configs\": 0,\n \"prepare_query\": 15,\n \"rate_limit\": 5,\n \"validate_schema\": 15\n },\n \"tags\": {}\n },\n \"projects\": [1],\n \"snql_anonymized\": \"MATCH Entity(events) SELECT tags_key, tags_value, (count() AS count), (min(timestamp) AS first_seen), (max(timestamp) AS last_seen) GROUP BY tags_key, tags_value WHERE greaterOrEquals(timestamp, toDateTime('$S')) AND less(timestamp, toDateTime('$S')) AND in(project_id, tuple(-1337)) AND in(project_id, tuple(-1337)) AND in(group_id, tuple(-1337)) ORDER BY count DESC LIMIT 4 BY tags_key LIMIT 1000 OFFSET 0\"\n}\n"
expression: snapshot_payload
---
[
Expand All @@ -21,9 +21,12 @@ expression: snapshot_payload
"errors_local.tags.value"
]
],
"clickhouse_queries.bytes_scanned": [
"clickhouse_queries.bytes": [
1305
],
"clickhouse_queries.bytes_scanned": [
0
],
"clickhouse_queries.cache_hit": [
0
],
Expand Down
Loading

0 comments on commit aa4d027

Please sign in to comment.