Skip to content

Commit 2fdfdd5

Browse files
helly25fmeum
andauthored
Find LLVM distributions automatically. (#471)
See issue: #473 Instead of implementing a complex way of predicting the basename find them by common prefixes in the database we anyway have. Rather then forcing maintainers to enter more complex information this keeps us at simply adding the versions as generated from the hasher script. This should also ultimately put issue #464 and any future occurrence to rest. The results of the new search based algorithm are generally be better. The algorithm removes a lot of the mistakes from the old implementation, is more correct in some cases and performs other new correct decisions. Yet, the new algorithm may not always be perfect, as it may have inherited some bad decisions of the old algorithm. As the diff in `toolchain/internal/llvm_distributions.golden.out.txt` shows we only drop `del:` lines, which means we can find more distributions. The diff in `` is harder to interpret. The diffs are best viewed side by side as both old and new file are supposed to have the same contents. At least their "keys" are in the same order. I use a custom diff program that is designed for this. It strips the actual error messages. When only looking at new errors (as shown below), then we see no difference: ```sh bazel run //mbo/diff -- --regex_replace_lhs='/(.*)(ERROR).*/\1\2/' --regex_replace_rhs='/(.*)(ERROR).*/\1\2/' ~/llvm_distributions.golden.sel.orig.txt ~/llvm_distributions.golden.sel.find.txt --algorithm=direct | awk '{l=c;c=$0}/^+.*ERROR/{print l"\n"c}' ``` If we look at all differences, then we see some 782 former errors now resulting in a distribution. They now find the likely best choice. Some of these can be improved. But imo it is not worth doing so. The PR does not change anything regarding manually provided information. But some folks will be able to switch to automatic selection instead. --------- Co-authored-by: Fabian Meumertzheim <fabian@meumertzhe.im>
1 parent 60c9695 commit 2fdfdd5

File tree

4 files changed

+18626
-18811
lines changed

4 files changed

+18626
-18811
lines changed

toolchain/internal/llvm_distributions.bzl

Lines changed: 260 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,6 @@ load(
1919
"exec_os_arch_dict_value",
2020
"host_info",
2121
)
22-
load(
23-
"//toolchain/internal:release_name.bzl",
24-
"llvm_release_name_context",
25-
"llvm_release_name_host_info",
26-
)
2722

2823
# If a new LLVM version is missing from this list, please add the shasums here
2924
# and the new version in toolchain/internal/llvm_distributions.golden.txt.
@@ -741,19 +736,268 @@ def _get_llvm_version(rctx):
741736
)
742737
return llvm_version
743738

739+
_UBUNTU_NAMES = [
740+
"arch",
741+
"linuxmint",
742+
"manjaro",
743+
"nixos",
744+
"pop",
745+
"ubuntu",
746+
]
747+
748+
_UBUNTU_VERSIONS = [
749+
"linux-ubuntu-20.04",
750+
"linux-ubuntu-18.04",
751+
"linux-ubuntu-18.04.6",
752+
"linux-ubuntu-18.04.5",
753+
"linux-ubuntu-16.04",
754+
"linux-gnu-ubuntu-22.04",
755+
"linux-gnu-ubuntu-20.10",
756+
"linux-gnu-ubuntu-20.04",
757+
"linux-gnu-ubuntu-18.04",
758+
"linux-gnu-ubuntu-16.04",
759+
"linux-gnu-ubuntu-14.04",
760+
"linux-gnu",
761+
"unknown-linux-gnu",
762+
"unknown-linux-gnu-rhel86",
763+
]
764+
765+
def _dist_to_os_names(dist, default_os_names = []):
766+
if dist.name in ["amzn", "suse"]:
767+
# For "amzn" based on the ID_LIKE field, sles seems like the closest
768+
# available distro for which LLVM releases are widely available.
769+
return [
770+
# The order is important here as we want to find the best match
771+
# without implmenting complex version comparisons.
772+
"linux-sles" + dist.version,
773+
"linux-sles12.4",
774+
"linux-sles12.3",
775+
"linux-sles12.2",
776+
"linux-sles11.3",
777+
"linux-sles",
778+
"unknown-linux-gnu-rhel86",
779+
"linux-gnu-ubuntu-24.04",
780+
"linux-gnu-ubuntu-22.04",
781+
"linux-gnu-ubuntu-20.04",
782+
"linux-gnu-ubuntu-18.04",
783+
"linux-gnu-ubuntu-16.04",
784+
"linux-gnu-ubuntu-",
785+
]
786+
if dist.name == "centos":
787+
return [
788+
"linux-gnu",
789+
"unknown-linux-gnu",
790+
# The Ubuntu list could be replaced with _UBUNTU_VERSIONS which
791+
# Spawns more selections and changes a few to near Ubuntu versions.
792+
"linux-gnu-ubuntu-22.04",
793+
"linux-gnu-ubuntu-20.04",
794+
"linux-gnu-ubuntu-18.04",
795+
"linux-gnu-ubuntu-16.04",
796+
]
797+
if dist.name == "fedora":
798+
return [
799+
"linux-gnu-Fedora27",
800+
"unknown-linux-gnu-rhel86",
801+
"linux-gnu",
802+
"unknown-linux-gnu",
803+
# The Ubuntu list could be replaced with _UBUNTU_VERSIONS which
804+
# Spawns more selections and changes a few to near Ubuntu versions.
805+
"linux-gnu-ubuntu-22.04",
806+
"linux-gnu-ubuntu-20.04",
807+
"linux-gnu-ubuntu-18.04",
808+
"linux-gnu-ubuntu-16.04",
809+
"linux-ubuntu-20.04",
810+
"linux-ubuntu-18.04",
811+
"linux-ubuntu-18.04.6",
812+
"linux-ubuntu-18.04.5",
813+
"linux-ubuntu-16.04",
814+
]
815+
if dist.name == "freebsd":
816+
return ["unknown-freebsd", "unknown-freebsd-"]
817+
if dist.name == "raspbian":
818+
return ["linux-gnueabihf", "linux-gnu"]
819+
if dist.name in ["rhel", "ol", "almalinux"]:
820+
return ["linux-rhel-", "linux-gnu-rhel-"]
821+
if dist.name == "debian":
822+
return [
823+
"linux-gnu-debian8",
824+
] + _UBUNTU_VERSIONS
825+
if dist.name in _UBUNTU_NAMES:
826+
return [
827+
"linux-gnu-ubuntu-" + dist.version,
828+
"linux-ubuntu-" + dist.version,
829+
] + _UBUNTU_VERSIONS
830+
return default_os_names
831+
832+
def _find_llvm_basenames_by_stem(prefixes, *, is_prefix = False, return_first_match = False):
833+
basenames = []
834+
for prefix in prefixes:
835+
for suffix in [".tar.gz", ".tar.xz"]:
836+
basename = prefix + suffix
837+
if basename in _llvm_distributions:
838+
return [basename]
839+
if not is_prefix:
840+
continue
841+
for llvm_dist in _llvm_distributions:
842+
if not llvm_dist.startswith(prefix):
843+
continue
844+
for suffix in [".tar.gz", ".tar.xz"]:
845+
if llvm_dist.endswith(suffix) and llvm_dist not in basenames:
846+
if return_first_match:
847+
return [llvm_dist]
848+
basenames.append(llvm_dist)
849+
return basenames
850+
744851
def _find_llvm_basename_list(llvm_version, host_info):
745852
"""Lookup (llvm_version, arch, os) in the list of basenames in `_llvm_distributions.`"""
746-
name, _ = llvm_release_name_host_info(llvm_version, host_info)
747-
if name in _llvm_distributions:
748-
return [name]
853+
arch = host_info.arch
854+
os = host_info.os
855+
dist = host_info.dist
856+
857+
# Prefer new LLVM distributions if available
858+
basenames = _find_llvm_basenames_by_stem([
859+
"LLVM-{llvm_version}-{os}-{arch}".format(
860+
llvm_version = llvm_version,
861+
arch = {
862+
"aarch64": "ARM64",
863+
"x86_64": "X64",
864+
}.get(arch, arch),
865+
os = {
866+
"darwin": "macOS",
867+
"linux": "Linux",
868+
"windows": "Windows",
869+
}.get(os, os),
870+
),
871+
])
872+
if basenames:
873+
return basenames
874+
875+
# First by 'os'', then by 'dist', then the remaining Linux variants'...
876+
if os == "darwin":
877+
return _find_llvm_basenames_by_stem([
878+
"clang+llvm-{llvm_version}-{arch}-{os}".format(
879+
llvm_version = llvm_version,
880+
arch = {
881+
"aarch64": "arm64",
882+
}.get(arch, arch),
883+
os = select_os,
884+
)
885+
for select_os in ["apple-darwin", "apple-macos", "darwin-apple"]
886+
], is_prefix = True)
887+
elif os == "windows":
888+
return _find_llvm_basenames_by_stem([
889+
"clang+llvm-{llvm_version}-{arch}-{os}".format(
890+
llvm_version = llvm_version,
891+
arch = arch,
892+
os = "pc-windows-msvc",
893+
),
894+
])
895+
elif dist.name in ["amzn", "suse"] and arch == "x86_64":
896+
return _find_llvm_basenames_by_stem([
897+
"clang+llvm-{llvm_version}-{arch}-{os}".format(
898+
llvm_version = llvm_version,
899+
arch = arch,
900+
os = suse_os,
901+
)
902+
for suse_os in _dist_to_os_names(dist)
903+
], is_prefix = True, return_first_match = True)
904+
elif dist.name in _UBUNTU_NAMES:
905+
arch_list = {
906+
"sparcv9": ["sparc64", "sparcv9"],
907+
}.get(arch, [arch])
908+
return _find_llvm_basenames_by_stem([
909+
"clang+llvm-{llvm_version}-{arch}-{os}".format(
910+
llvm_version = llvm_version,
911+
arch = select_arch,
912+
os = select_os,
913+
)
914+
for select_os in _dist_to_os_names(dist)
915+
for select_arch in arch_list
916+
], return_first_match = True)
917+
918+
elif dist.name == "raspbian":
919+
return _find_llvm_basenames_by_stem([
920+
"clang+llvm-{llvm_version}-{arch}-{os}".format(
921+
llvm_version = llvm_version,
922+
arch = arch,
923+
os = select_os,
924+
)
925+
for select_os in _dist_to_os_names(dist)
926+
])
927+
elif os == "linux":
928+
if arch in ["aarch64", "armv7a", "mips", "mipsel"]:
929+
return _find_llvm_basenames_by_stem(["clang+llvm-{llvm_version}-{arch}-{os}".format(
930+
llvm_version = llvm_version,
931+
arch = arch,
932+
os = "linux-gnu",
933+
)])
934+
elif arch in ["sparc64", "sparcv9"]:
935+
return _find_llvm_basenames_by_stem([
936+
"clang+llvm-{llvm_version}-{arch}-{os}".format(
937+
llvm_version = llvm_version,
938+
arch = arch_alias,
939+
os = "unknown-linux-gnu",
940+
)
941+
for arch_alias in ["sparc64", "sparcv9"]
942+
])
943+
944+
arch_alias_list = {
945+
"x86_32": ["x86_32", "i386"],
946+
"x86_64": ["x86_64", "amd64", "i686"],
947+
"powerpc64": ["powerpc64", "final_powerpc64"],
948+
}.get(arch, [arch])
949+
950+
prefixes = []
951+
for dist_name in _dist_to_os_names(dist, [dist.name]):
952+
for arch_alias in arch_alias_list:
953+
basenames = _find_llvm_basenames_by_stem([
954+
"clang+llvm-{llvm_version}-{arch}-{dist_name}{dist_version}".format(
955+
llvm_version = llvm_version,
956+
arch = arch_alias,
957+
dist_name = dist_name,
958+
dist_version = dist.version,
959+
),
960+
])
961+
if basenames:
962+
return basenames
963+
if dist.name not in ["freebsd"]:
964+
prefixes.append("clang+llvm-{llvm_version}-{arch}-{dist_name}".format(
965+
llvm_version = llvm_version,
966+
arch = arch_alias,
967+
dist_name = dist_name,
968+
))
969+
return _find_llvm_basenames_by_stem(prefixes, is_prefix = True, return_first_match = True)
749970
return []
750971

972+
def _find_llvm_basename_or_error(llvm_version, host_info):
973+
basenames = _find_llvm_basename_list(llvm_version, host_info)
974+
if len(basenames) > 1:
975+
return None, "ERROR: Multiple configurations found [{basenames}].".format(
976+
basenames = ", ".join(basenames),
977+
)
978+
if not basenames:
979+
return None, "ERROR: No version selected"
980+
# TODO(helly25): Enable better error message:
981+
#"ERROR: No matching config could be found for version {llvm_version} on {os}/{dist_name}/{dist_version} with arch {arch}.".format(
982+
# llvm_version = llvm_version,
983+
# os = host_info.os,
984+
# dist_name = host_info.dist.name,
985+
# dist_version = host_info.dist.version,
986+
# arch = host_info.arch,
987+
#)
988+
989+
# Use the following for debugging:
990+
# print("Found LLVM: " + basenames[0]) # buildifier: disable=print
991+
return basenames[0], None
992+
751993
def _distribution_urls(rctx):
752994
"""Return LLVM `urls`, `shha256` and `strip_prefix` for the given context."""
753995
llvm_version = _get_llvm_version(rctx)
754996

755997
if rctx.attr.distribution == "auto":
756-
basename = llvm_release_name_context(rctx, llvm_version)
998+
basename, error = _find_llvm_basename_or_error(llvm_version, host_info(rctx))
999+
if error:
1000+
fail(error)
7571001
else:
7581002
basename = rctx.attr.distribution
7591003

@@ -912,7 +1156,7 @@ def _write_distributions_impl(ctx):
9121156
)
9131157
basenames = _find_llvm_basename_list(_version_string(version), host_info)
9141158
if version <= MAX_VERSION:
915-
predicted, error = llvm_release_name_host_info(
1159+
predicted, error = _find_llvm_basename_or_error(
9161160
_version_string(version),
9171161
host_info,
9181162
)
@@ -921,7 +1165,13 @@ def _write_distributions_impl(ctx):
9211165
error = "ERROR: Windows .exe is not supported: " + predicted
9221166
elif predicted not in _llvm_distributions:
9231167
error = "ERROR: Unavailable prediction: " + predicted
1168+
elif len(basenames) == 0:
1169+
predicted = "ERROR: No version selected"
1170+
elif len(basenames) == 1:
1171+
predicted = basenames[0]
9241172
else:
1173+
predicted = "ERROR: Multiple selections"
1174+
if not error:
9251175
arch_found = [arch for arch in arch_list if arch in predicted]
9261176
if len(arch_found) == 1 and arch_found[0] != arch:
9271177
error = "ERROR: Bad arch selection: " + predicted

0 commit comments

Comments
 (0)