Skip to content

Commit e6143c1

Browse files
committed
Merge branch 'main' of github.com:facebookincubator/velox into gha-add-cuda
2 parents 3866a8b + 8676e8d commit e6143c1

File tree

165 files changed

+3049
-1250
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

165 files changed

+3049
-1250
lines changed

.github/workflows/scheduled.yml

+99-10
Original file line numberDiff line numberDiff line change
@@ -102,16 +102,19 @@ jobs:
102102
presto_error: ${{ steps.sig-check.outputs.presto_error }}
103103
spark_bias: ${{ steps.sig-check.outputs.spark_functions }}
104104
spark_error: ${{ steps.sig-check.outputs.spark_error }}
105+
presto_aggregate_bias: ${{ steps.sig-check.outputs.presto_aggregate_functions }}
106+
presto_aggregate_error: ${{ steps.sig-check.outputs.presto_aggregate_error }}
105107

106108
steps:
107109

108110
- name: Get latest commit from main
109111
if: ${{ github.event_name != 'schedule' }}
112+
working-directory: ${{ github.workspace }}
110113
env:
111114
GH_TOKEN: ${{ github.token }}
112115
id: get-head
113116
run: |
114-
if [ '${{ github.event_name = 'push' }}' == "true" ]; then
117+
if [ '${{ github.event_name == 'push' }}' == "true" ]; then
115118
# get the parent commit of the current one to get the relevant function signatures
116119
head_main=$(gh api -q '.parents.[0].sha' '/repos/facebookincubator/velox/commits/${{ github.sha }}')
117120
else
@@ -147,14 +150,14 @@ jobs:
147150
mkdir -p /tmp/signatures
148151
149152
- name: Checkout Main
150-
if: ${{ github.even_name != 'schedule' && steps.get-sig.outputs.stash-hit != 'true' }}
153+
if: ${{ github.event_name != 'schedule' && steps.get-sig.outputs.stash-hit != 'true' }}
151154
uses: actions/checkout@v4
152155
with:
153156
ref: ${{ steps.get-head.outputs.head_main || 'main' }}
154157
path: velox_main
155158

156159
- name: Build PyVelox
157-
if: ${{ github.even_name != 'schedule' && steps.get-sig.outputs.stash-hit != 'true' }}
160+
if: ${{ github.event_name != 'schedule' && steps.get-sig.outputs.stash-hit != 'true' }}
158161
working-directory: velox_main
159162
run: |
160163
python3 -m venv .venv
@@ -163,16 +166,17 @@ jobs:
163166
make python-build
164167
165168
- name: Create Baseline Signatures
166-
if: ${{ github.even_name != 'schedule' && steps.get-sig.outputs.stash-hit != 'true' }}
169+
if: ${{ github.event_name != 'schedule' && steps.get-sig.outputs.stash-hit != 'true' }}
167170
working-directory: velox_main
168171
run: |
169172
source .venv/bin/activate
170173
python3 -m pip install deepdiff
171174
python3 scripts/signature.py export --spark /tmp/signatures/spark_signatures_main.json
172175
python3 scripts/signature.py export --presto /tmp/signatures/presto_signatures_main.json
176+
python3 scripts/signature.py export_aggregates --presto /tmp/signatures/presto_aggregate_signatures_main.json
173177
174178
- name: Save Function Signature Stash
175-
if: ${{ github.even_name == 'pull_request' && steps.get-sig.outputs.stash-hit != 'true' }}
179+
if: ${{ github.event_name == 'pull_request' && steps.get-sig.outputs.stash-hit != 'true' }}
176180
uses: assignUser/stash/save@v1
177181
with:
178182
path: /tmp/signatures
@@ -202,14 +206,14 @@ jobs:
202206
- name: Save ccache
203207
# see https://github.com/actions/upload-artifact/issues/543
204208
continue-on-error: true
205-
if: ${{ github.even_name != 'schedule' }}
209+
if: ${{ github.event_name != 'schedule' }}
206210
uses: assignUser/stash/save@v1
207211
with:
208212
path: "${{ env.CCACHE_DIR }}"
209213
key: ccache-fuzzer-centos
210214

211215
- name: Build PyVelox
212-
if: ${{ github.even_name != 'schedule' }}
216+
if: ${{ github.event_name != 'schedule' }}
213217
env:
214218
VELOX_BUILD_DIR: "_build/debug"
215219
run: |
@@ -218,15 +222,19 @@ jobs:
218222
python3 -m pip install -e .
219223
220224
- name: Create and test new function signatures
221-
if: ${{ github.even_name != 'schedule' }}
225+
if: ${{ github.event_name != 'schedule' }}
222226
id: sig-check
223227
run: |
224228
source .venv/bin/activate
225229
python3 -m pip install deepdiff
226230
python3 scripts/signature.py gh_bias_check presto spark
231+
python3 scripts/signature.py export_aggregates --presto /tmp/signatures/presto_aggregate_signatures_contendor.json
232+
python3 scripts/signature.py bias_aggregates /tmp/signatures/presto_aggregate_signatures_main.json \
233+
/tmp/signatures/presto_aggregate_signatures_contendor.json /tmp/signatures/presto_aggregate_bias_functions \
234+
/tmp/signatures/presto_aggregate_errors
227235
228236
- name: Upload Signature Artifacts
229-
if: ${{ github.even_name != 'schedule' }}
237+
if: ${{ github.event_name != 'schedule' }}
230238
uses: actions/upload-artifact@v4
231239
with:
232240
name: signatures
@@ -246,7 +254,7 @@ jobs:
246254
done
247255
248256
- name: Save Function Signature Stash
249-
if: ${{ github.even_name == 'push' }}
257+
if: ${{ github.event_name == 'push' }}
250258
uses: assignUser/stash/save@v1
251259
with:
252260
path: /tmp/signatures
@@ -703,6 +711,81 @@ jobs:
703711
path: |
704712
/tmp/aggregate_fuzzer_repro
705713
/tmp/server.log
714+
715+
716+
presto-bias-java-aggregation-fuzzer-run:
717+
name: Biased Aggregation Fuzzer with Presto as source of truth
718+
needs: compile
719+
runs-on: ubuntu-latest
720+
container: ghcr.io/facebookincubator/velox-dev:presto-java
721+
timeout-minutes: 120
722+
if: ${{ needs.compile.outputs.presto_aggregate_bias == 'true' }}
723+
env:
724+
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
725+
LINUX_DISTRO: "centos"
726+
steps:
727+
728+
- name: Download aggregation fuzzer
729+
uses: actions/download-artifact@v4
730+
with:
731+
name: aggregation
732+
733+
- name: "Checkout Repo"
734+
uses: actions/checkout@v4
735+
with:
736+
path: velox
737+
submodules: 'recursive'
738+
ref: "${{ inputs.ref }}"
739+
740+
- name: Fix git permissions
741+
# Usually actions/checkout does this but as we run in a container
742+
# it doesn't work
743+
run: git config --global --add safe.directory /__w/velox/velox/velox
744+
745+
- name: Download Signatures
746+
uses: actions/download-artifact@v4
747+
with:
748+
name: signatures
749+
path: /tmp/signatures
750+
751+
- name: "Run Bias Aggregate Fuzzer"
752+
run: |
753+
cd velox
754+
cp ./scripts/etc/hive.properties $PRESTO_HOME/etc/catalog
755+
ls -lR $PRESTO_HOME/etc
756+
$PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 &
757+
# Sleep for 60 seconds to allow Presto server to start.
758+
sleep 60
759+
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;'
760+
cd -
761+
mkdir -p /tmp/aggregate_fuzzer_repro/
762+
rm -rfv /tmp/aggregate_fuzzer_repro/*
763+
chmod -R 777 /tmp/aggregate_fuzzer_repro
764+
chmod +x velox_aggregation_fuzzer_test
765+
echo "signatures folder"
766+
ls /tmp/signatures/
767+
echo "Biased functions:"
768+
cat /tmp/signatures/presto_aggregate_bias_functions
769+
echo "Running Fuzzer for $DURATION"
770+
./velox_aggregation_fuzzer_test \
771+
--seed ${RANDOM} \
772+
--duration_sec $DURATION \
773+
--logtostderr=1 \
774+
--minloglevel=1 \
775+
--repro_persist_path=/tmp/aggregate_fuzzer_repro \
776+
--enable_sorted_aggregations=true \
777+
--only=$(cat /tmp/signatures/presto_aggregate_bias_functions) \
778+
--presto_url=http://127.0.0.1:8080 \
779+
&& echo -e "\n\nAggregation fuzzer run finished successfully."
780+
781+
- name: Archive bias aggregate production artifacts
782+
if: ${{ !cancelled() }}
783+
uses: actions/upload-artifact@v4
784+
with:
785+
name: presto-bias-sot-aggregate-fuzzer-failure-artifacts
786+
path: |
787+
/tmp/aggregate_fuzzer_repro
788+
/tmp/server.log
706789
707790
surface-signature-errors:
708791
name: Signature Changes
@@ -721,3 +804,9 @@ jobs:
721804
run: |
722805
cat /tmp/signatures/presto_errors
723806
exit 1
807+
808+
- name: Surface Aggregate function signature errors
809+
if: ${{ needs.compile.outputs.presto_aggregate_error == 'true' }}
810+
run: |
811+
cat /tmp/signatures/presto_aggregate_errors
812+
exit 1

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ settings.json
279279

280280
# User's build configuration
281281
Makefile.config
282+
CMakeUserPresets.json
282283

283284
# build, distribute, and bins (+ python proto bindings)
284285
build

CMake/resolve_dependency_modules/README.md

+2-3
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,15 @@ by Velox. See details on bundling below.
2323
| zstd | default | No |
2424
| openssl | default | No |
2525
| protobuf | 21 (exact) | Yes |
26-
| boost | 1.66.0 | Yes |
26+
| boost | 1.77.0 | Yes |
2727
| flex | 2.5.13 | No |
2828
| bison | 3.0.4 | No |
2929
| cmake | 3.14 | No |
3030
| double-conversion | 3.1.5 | No |
3131
| xsimd | 10.0.0 | Yes |
3232
| re2 | 2021-04-01 | Yes |
3333
| fmt | 10.1.1 | Yes |
34-
| simdjson | 3.2.0 | Yes |
34+
| simdjson | 3.8.0 | Yes |
3535
| folly | v2024.04.01.00 | Yes |
3636
| fizz | v2024.04.01.00 | No |
3737
| wangle | v2024.04.01.00 | No |
@@ -75,4 +75,3 @@ Ideally all patches should be upstream when possible and removed once merged.
7575
## Specify a custom url/file path for an offline build
7676

7777
Set environment variables `VELOX_<PACKAGE>_URL` to specify a custom dependency url or local tar file path, an optional sha256 checksum can be provided as `VELOX_<PACKAGE>_SHA256`.
78-

CMake/resolve_dependency_modules/simdjson.cmake

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
# limitations under the License.
1414
include_guard(GLOBAL)
1515

16-
set(VELOX_SIMDJSON_VERSION 3.2.0)
16+
set(VELOX_SIMDJSON_VERSION 3.8.0)
1717
set(VELOX_SIMDJSON_BUILD_SHA256_CHECKSUM
18-
75a684dbbe38cf72b8b3bdbdc430764813f3615899a6029931c26ddd89812da4)
18+
e28e3f46f0012d405b67de6c0a75e8d8c9a612b0548cb59687822337d73ca78b)
1919
set(VELOX_SIMDJSON_SOURCE_URL
2020
"https://github.com/simdjson/simdjson/archive/refs/tags/v${VELOX_SIMDJSON_VERSION}.tar.gz"
2121
)

CMakeLists.txt

+9-3
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@ set(VELOX_DEPENDENCY_SOURCE
8181
STRING
8282
"Default source for all dependencies with source builds enabled: AUTO SYSTEM BUNDLED."
8383
)
84+
set(VELOX_GFLAGS_TYPE
85+
"shared"
86+
CACHE
87+
STRING
88+
"Specify whether to find the gflags package as a shared or static package"
89+
)
8490
option(VELOX_ENABLE_EXEC "Build exec." ON)
8591
option(VELOX_ENABLE_AGGREGATES "Build aggregates." ON)
8692
option(VELOX_ENABLE_HIVE_CONNECTOR "Build Hive connector." ON)
@@ -378,13 +384,13 @@ set(BOOST_INCLUDE_LIBRARIES
378384
thread)
379385

380386
set_source(Boost)
381-
resolve_dependency(Boost 1.66.0 COMPONENTS ${BOOST_INCLUDE_LIBRARIES})
387+
resolve_dependency(Boost 1.77.0 COMPONENTS ${BOOST_INCLUDE_LIBRARIES})
382388

383389
# Range-v3 will be enable when the codegen code actually lands keeping it here
384390
# for reference. find_package(range-v3)
385391

386392
set_source(gflags)
387-
resolve_dependency(gflags COMPONENTS shared)
393+
resolve_dependency(gflags COMPONENTS ${VELOX_GFLAGS_TYPE})
388394
if(NOT TARGET gflags::gflags)
389395
# This is a bit convoluted, but we want to be able to use gflags::gflags as a
390396
# target even when velox is built as a subproject which uses
@@ -441,7 +447,7 @@ if(${VELOX_BUILD_PYTHON_PACKAGE})
441447
endif()
442448

443449
set_source(simdjson)
444-
resolve_dependency(simdjson 3.2.0)
450+
resolve_dependency(simdjson 3.8.0)
445451

446452
# Locate or build folly.
447453
add_compile_definitions(FOLLY_HAVE_INT128_T=1)

scripts/signature.py

+53
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import argparse
1515
import json
1616
import os
17+
import re
1718
import sys
1819
from typing import Any
1920

@@ -33,6 +34,9 @@ class bcolors:
3334
BOLD = "\033[1m"
3435

3536

37+
aggregate_pattern = re.compile("(.*)(_merge|_merge_extract|_partial)")
38+
39+
3640
def get_error_string(error_message):
3741
return f"""
3842
Incompatible changes in function signatures have been detected.
@@ -210,6 +214,48 @@ def bias_signatures(base_signatures, contender_signatures, tickets, error_path):
210214
return "", status
211215

212216

217+
def bias_aggregates(args):
218+
"""
219+
Finds and exports aggregates whose signatures have been modified agasint a baseline.
220+
Saves the results to a file and sets a Github Actions Output.
221+
Currently this is hardcoded to presto aggregates.
222+
"""
223+
with open(args.base) as f:
224+
base_signatures = json.load(f)
225+
226+
with open(args.contender) as f:
227+
contender_signatures = json.load(f)
228+
229+
delta, status = diff_signatures(
230+
base_signatures, contender_signatures, args.error_path
231+
)
232+
233+
set_gh_output("presto_aggregate_error", status == 1)
234+
235+
if not delta:
236+
print(f"{bcolors.BOLD} No changes detected: Nothing to do!")
237+
return status
238+
239+
function_set = set()
240+
for items in delta.values():
241+
for item in items:
242+
fn_name = item.get_root_key()
243+
pattern = aggregate_pattern.match(fn_name)
244+
if pattern:
245+
function_set.add(pattern.group(1))
246+
else:
247+
function_set.add(fn_name)
248+
249+
if function_set:
250+
biased_functions = ",".join(function_set)
251+
with open(args.output_path, "w") as f:
252+
print(f"{biased_functions}", file=f, end="")
253+
254+
set_gh_output("presto_aggregate_functions", True)
255+
256+
return 0
257+
258+
213259
def gh_bias_check(args):
214260
"""
215261
Exports signatures for the given group(s) and checks them for changes compared to a baseline.
@@ -294,6 +340,7 @@ def parse_args(args):
294340
"ticket_value", type=get_tickets, default=10, nargs="?"
295341
)
296342
bias_command_parser.add_argument("error_path", type=str, default="")
343+
297344
gh_command_parser = command.add_parser("gh_bias_check")
298345
gh_command_parser.add_argument(
299346
"group",
@@ -314,6 +361,12 @@ def parse_args(args):
314361
"--output_postfix", type=str, default="_bias_functions"
315362
)
316363

364+
bias_aggregate_command_parser = command.add_parser("bias_aggregates")
365+
bias_aggregate_command_parser.add_argument("base", type=str)
366+
bias_aggregate_command_parser.add_argument("contender", type=str)
367+
bias_aggregate_command_parser.add_argument("output_path", type=str)
368+
bias_aggregate_command_parser.add_argument("error_path", type=str, default="")
369+
317370
parser.set_defaults(command="help")
318371

319372
return parser.parse_args(args)

0 commit comments

Comments
 (0)