From 97c609262cde43f3207a0e28a130f5a25f814582 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 12 Oct 2024 13:29:19 -0600 Subject: [PATCH] Collapse the OOB framework We played with alternative implementations over the years, but nothing ever really stuck. So let's simplify the code by removing the framework and the associated loopbacks that were built into it (e.g., if a message cannot be sent, then loop it back into the OOB base to see if another component is available that can send it). The code badly needs reorganization as I've made no attempt to do so here. A pass to see if event steps can be eliminated would also be good - I've cleaned up a few of them, but what remains could use another pair of eyes. Signed-off-by: Ralph Castain (from upstream commit 7b54c48930ea5d3dec056db939ac0fdecc34abc1) --- src/mca/ess/base/ess_base_std_prted.c | 60 +- src/mca/ess/hnp/ess_hnp_module.c | 41 +- src/mca/oob/Makefile.am | 40 - src/mca/oob/base/Makefile.am | 32 - src/mca/oob/base/oob_base_frame.c | 111 -- src/mca/oob/base/oob_base_select.c | 161 -- src/mca/oob/base/oob_base_stubs.c | 382 ----- src/mca/oob/base/owner.txt | 7 - src/mca/oob/oob.h | 83 -- src/mca/oob/tcp/Makefile.am | 63 - src/mca/oob/tcp/configure.m4 | 39 - src/mca/oob/tcp/oob_tcp.c | 282 ---- src/mca/oob/tcp/oob_tcp_component.c | 1293 ----------------- src/mca/oob/tcp/oob_tcp_component.h | 103 -- src/mca/plm/ssh/plm_ssh_module.c | 7 +- src/prted/prted_comm.c | 1 - src/rml/Makefile.am | 4 +- src/rml/oob/Makefile.am | 46 + .../oob/base => rml/oob}/help-oob-base.txt | 1 + src/{mca/oob/tcp => rml/oob}/help-oob-tcp.txt | 1 + src/{mca/oob/base/base.h => rml/oob/oob.h} | 70 +- src/rml/oob/oob_base_stubs.c | 494 +++++++ src/rml/oob/oob_tcp.c | 813 +++++++++++ src/{mca/oob/tcp => rml/oob}/oob_tcp.h | 35 +- src/{mca/oob/tcp => rml/oob}/oob_tcp_common.c | 53 +- src/{mca/oob/tcp => rml/oob}/oob_tcp_common.h | 12 +- src/rml/oob/oob_tcp_component.c | 266 ++++ .../oob/tcp => rml/oob}/oob_tcp_connection.c | 116 +- .../oob/tcp => rml/oob}/oob_tcp_connection.h | 22 +- src/{mca/oob/tcp => rml/oob}/oob_tcp_hdr.h | 2 +- .../oob/tcp => rml/oob}/oob_tcp_listener.c | 156 +- .../oob/tcp => rml/oob}/oob_tcp_listener.h | 4 +- src/{mca/oob/tcp => rml/oob}/oob_tcp_peer.h | 6 +- .../oob/tcp => rml/oob}/oob_tcp_sendrecv.c | 51 +- .../oob/tcp => rml/oob}/oob_tcp_sendrecv.h | 18 +- src/{mca/oob/tcp => rml/oob}/owner.txt | 0 src/rml/rml.c | 72 +- src/rml/rml.h | 5 +- src/rml/rml_send.c | 4 +- src/runtime/prte_init.c | 1 - src/util/proc_info.c | 1 + src/util/proc_info.h | 1 + 42 files changed, 1971 insertions(+), 2988 deletions(-) delete mode 100644 src/mca/oob/Makefile.am delete mode 100644 src/mca/oob/base/Makefile.am delete mode 100644 src/mca/oob/base/oob_base_frame.c delete mode 100644 src/mca/oob/base/oob_base_select.c delete mode 100644 src/mca/oob/base/oob_base_stubs.c delete mode 100644 src/mca/oob/base/owner.txt delete mode 100644 src/mca/oob/oob.h delete mode 100644 src/mca/oob/tcp/Makefile.am delete mode 100644 src/mca/oob/tcp/configure.m4 delete mode 100644 src/mca/oob/tcp/oob_tcp.c delete mode 100644 src/mca/oob/tcp/oob_tcp_component.c delete mode 100644 src/mca/oob/tcp/oob_tcp_component.h create mode 100644 src/rml/oob/Makefile.am rename src/{mca/oob/base => rml/oob}/help-oob-base.txt (93%) rename src/{mca/oob/tcp => rml/oob}/help-oob-tcp.txt (98%) rename src/{mca/oob/base/base.h => rml/oob/oob.h} (65%) create mode 100644 src/rml/oob/oob_base_stubs.c create mode 100644 src/rml/oob/oob_tcp.c rename src/{mca/oob/tcp => rml/oob}/oob_tcp.h (64%) rename src/{mca/oob/tcp => rml/oob}/oob_tcp_common.c (77%) rename src/{mca/oob/tcp => rml/oob}/oob_tcp_common.h (77%) create mode 100644 src/rml/oob/oob_tcp_component.c rename src/{mca/oob/tcp => rml/oob}/oob_tcp_connection.c (92%) rename src/{mca/oob/tcp => rml/oob}/oob_tcp_connection.h (83%) rename src/{mca/oob/tcp => rml/oob}/oob_tcp_hdr.h (97%) rename src/{mca/oob/tcp => rml/oob}/oob_tcp_listener.c (81%) rename src/{mca/oob/tcp => rml/oob}/oob_tcp_listener.h (93%) rename src/{mca/oob/tcp => rml/oob}/oob_tcp_peer.h (96%) rename src/{mca/oob/tcp => rml/oob}/oob_tcp_sendrecv.c (94%) rename src/{mca/oob/tcp => rml/oob}/oob_tcp_sendrecv.h (95%) rename src/{mca/oob/tcp => rml/oob}/owner.txt (100%) diff --git a/src/mca/ess/base/ess_base_std_prted.c b/src/mca/ess/base/ess_base_std_prted.c index 4e48cce7d6..e130b0fa0b 100644 --- a/src/mca/ess/base/ess_base_std_prted.c +++ b/src/mca/ess/base/ess_base_std_prted.c @@ -53,7 +53,6 @@ #include "src/mca/grpcomm/grpcomm.h" #include "src/mca/iof/base/base.h" #include "src/mca/odls/base/base.h" -#include "src/mca/oob/base/base.h" #include "src/mca/plm/base/base.h" #include "src/mca/prtereachable/base/base.h" #include "src/mca/rmaps/base/base.h" @@ -99,9 +98,8 @@ int prte_ess_base_prted_setup(void) int fd; char log_file[PATH_MAX]; char *error = NULL; - char *uri = NULL; - char *tmp; - prte_job_t *jdata; + char *tmp = NULL; + prte_job_t *jdata = NULL; prte_proc_t *proc; prte_app_context_t *app; hwloc_obj_t obj; @@ -109,7 +107,6 @@ int prte_ess_base_prted_setup(void) prte_topology_t *t; prte_ess_base_signal_t *sig; int idx; - pmix_value_t val; plm_in_use = false; @@ -245,7 +242,7 @@ int prte_ess_base_prted_setup(void) /* obviously, we have "reported" */ jdata->num_reported = 1; - /* setup my session directory here as the OOB may need it */ + /* setup my session directory here */ PMIX_OUTPUT_VERBOSE( (2, prte_ess_base_framework.framework_output, "%s setting up session dir with\n\ttmpdir: %s\n\thost %s", @@ -316,61 +313,15 @@ int prte_ess_base_prted_setup(void) error = "prte_prtereachable_base_select"; goto error; } - if (PRTE_SUCCESS - != (ret = pmix_mca_base_framework_open(&prte_oob_base_framework, - PMIX_MCA_BASE_OPEN_DEFAULT))) { - PRTE_ERROR_LOG(ret); - error = "prte_oob_base_open"; - goto error; - } - if (PRTE_SUCCESS != (ret = prte_oob_base_select())) { + if (PRTE_SUCCESS != (ret = prte_rml_open())) { PRTE_ERROR_LOG(ret); - error = "prte_oob_base_select"; + error = "prte_rml_open"; goto error; } - prte_rml_open(); /* it is now safe to start the pmix server */ pmix_server_start(); - /* store our URI for later */ - prte_oob_base_get_addr(&uri); - PMIX_VALUE_LOAD(&val, uri, PMIX_STRING); - ret = PMIx_Store_internal(PRTE_PROC_MY_NAME, PMIX_PROC_URI, &val); - if (PMIX_SUCCESS != ret) { - PMIX_VALUE_DESTRUCT(&val); - error = "store MY URI"; - ret = PRTE_ERROR; - goto error; - } - PMIX_VALUE_DESTRUCT(&val); - free(uri); - - if (NULL != prte_process_info.my_hnp_uri) { - /* extract the HNP's name so we can update the routing table */ - ret = prte_rml_parse_uris(prte_process_info.my_hnp_uri, - PRTE_PROC_MY_HNP, - NULL); - if (PRTE_SUCCESS != ret) { - PRTE_ERROR_LOG(ret); - error = "prte_rml_parse_HNP"; - goto error; - } - /* Set the contact info in the RML - this won't actually establish - * the connection, but just tells the RML how to reach the HNP - * if/when we attempt to send to it - */ - PMIX_VALUE_LOAD(&val, prte_process_info.my_hnp_uri, PMIX_STRING); - ret = PMIx_Store_internal(PRTE_PROC_MY_HNP, PMIX_PROC_URI, &val); - if (PMIX_SUCCESS != ret) { - PMIX_VALUE_DESTRUCT(&val); - error = "store HNP URI"; - ret = PRTE_ERROR; - goto error; - } - PMIX_VALUE_DESTRUCT(&val); - } - /* select the errmgr */ if (PRTE_SUCCESS != (ret = prte_errmgr_base_select())) { PRTE_ERROR_LOG(ret); @@ -547,7 +498,6 @@ int prte_ess_base_prted_finalize(void) (void) pmix_mca_base_framework_close(&prte_odls_base_framework); (void) pmix_mca_base_framework_close(&prte_errmgr_base_framework); prte_rml_close(); - (void) pmix_mca_base_framework_close(&prte_oob_base_framework); (void) pmix_mca_base_framework_close(&prte_prtereachable_base_framework); (void) pmix_mca_base_framework_close(&prte_state_base_framework); diff --git a/src/mca/ess/hnp/ess_hnp_module.c b/src/mca/ess/hnp/ess_hnp_module.c index ca0185349f..14b0ad4e59 100644 --- a/src/mca/ess/hnp/ess_hnp_module.c +++ b/src/mca/ess/hnp/ess_hnp_module.c @@ -59,7 +59,6 @@ #include "src/mca/grpcomm/base/base.h" #include "src/mca/iof/base/base.h" #include "src/mca/odls/base/base.h" -#include "src/mca/oob/base/base.h" #include "src/mca/plm/base/base.h" #include "src/mca/plm/plm.h" #include "src/mca/prtereachable/base/base.h" @@ -107,8 +106,6 @@ static int rte_init(int argc, char **argv) prte_app_context_t *app; int idx; prte_topology_t *t; - pmix_value_t pval; - pmix_status_t pret; PRTE_HIDE_UNUSED_PARAMS(argc); /* run the prolog */ @@ -234,7 +231,7 @@ static int rte_init(int argc, char **argv) jdata->num_reported = 1; jdata->num_daemons_reported = 1; - /* setup my session directory here as the OOB may need it */ + /* setup my session directory here */ PMIX_OUTPUT_VERBOSE((2, prte_debug_output, "%s setting up session dir with\n\ttmpdir: %s\n\thost %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), @@ -268,38 +265,11 @@ static int rte_init(int argc, char **argv) error = "prte_prtereachable_base_select"; goto error; } - /* - * OOB Layer - */ - if (PRTE_SUCCESS - != (ret = pmix_mca_base_framework_open(&prte_oob_base_framework, - PMIX_MCA_BASE_OPEN_DEFAULT))) { - error = "prte_oob_base_open"; - goto error; - } - if (PRTE_SUCCESS != (ret = prte_oob_base_select())) { - error = "prte_oob_base_select"; - goto error; - } - - // set our RML address - prte_oob_base_get_addr(&proc->rml_uri); - prte_process_info.my_hnp_uri = strdup(proc->rml_uri); - /* store it in the local PMIx repo for later retrieval */ - PMIX_VALUE_LOAD(&pval, proc->rml_uri, PMIX_STRING); - if (PMIX_SUCCESS != (pret = PMIx_Store_internal(PRTE_PROC_MY_NAME, PMIX_PROC_URI, &pval))) { - PMIX_ERROR_LOG(pret); - ret = PRTE_ERROR; - PMIX_VALUE_DESTRUCT(&pval); - error = "store uri"; + if (PRTE_SUCCESS != (ret = prte_rml_open())) { + PRTE_ERROR_LOG(ret); + error = "prte_rml_open"; goto error; } - PMIX_VALUE_DESTRUCT(&pval); - - /* - * Runtime Messaging Layer - */ - prte_rml_open(); /* it is now safe to start the pmix server */ pmix_server_start(); @@ -471,7 +441,7 @@ static int rte_init(int argc, char **argv) static int rte_finalize(void) { /* first stage shutdown of the errmgr, deregister the handler but keep - * the required facilities until the rml and oob are offline */ + * the required facilities until the rml is offline */ prte_errmgr.finalize(); /* close frameworks */ @@ -486,7 +456,6 @@ static int rte_finalize(void) (void) pmix_mca_base_framework_close(&prte_rtc_base_framework); (void) pmix_mca_base_framework_close(&prte_odls_base_framework); prte_rml_close(); - (void) pmix_mca_base_framework_close(&prte_oob_base_framework); (void) pmix_mca_base_framework_close(&prte_prtereachable_base_framework); (void) pmix_mca_base_framework_close(&prte_errmgr_base_framework); (void) pmix_mca_base_framework_close(&prte_state_base_framework); diff --git a/src/mca/oob/Makefile.am b/src/mca/oob/Makefile.am deleted file mode 100644 index 1a97fbbe79..0000000000 --- a/src/mca/oob/Makefile.am +++ /dev/null @@ -1,40 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved -# Copyright (c) 2019 Intel, Inc. All rights reserved. -# Copyright (c) 2022 Nanook Consulting. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libprtemca_oob.la -libprtemca_oob_la_SOURCES = - -# pkgdata setup -dist_prtedata_DATA = - -# local files -headers = oob.h -libprtemca_oob_la_SOURCES += $(headers) - -# Conditionally install the header files -prtedir = $(prteincludedir)/$(subdir) -nobase_prte_HEADERS = $(headers) - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/src/mca/oob/base/Makefile.am b/src/mca/oob/base/Makefile.am deleted file mode 100644 index b3cdb6a3d1..0000000000 --- a/src/mca/oob/base/Makefile.am +++ /dev/null @@ -1,32 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights -# reserved. -# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. -# Copyright (c) 2020 Cisco Systems, Inc. All rights reserved -# Copyright (c) 2022 Nanook Consulting. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_prtedata_DATA += base/help-oob-base.txt - -headers += \ - base/base.h - -libprtemca_oob_la_SOURCES += \ - base/oob_base_stubs.c \ - base/oob_base_frame.c \ - base/oob_base_select.c diff --git a/src/mca/oob/base/oob_base_frame.c b/src/mca/oob/base/oob_base_frame.c deleted file mode 100644 index 9102fa94d9..0000000000 --- a/src/mca/oob/base/oob_base_frame.c +++ /dev/null @@ -1,111 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015-2019 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2017-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "prte_config.h" -#include "constants.h" - -#include "src/class/pmix_bitmap.h" -#include "src/mca/base/pmix_base.h" -#include "src/mca/mca.h" -#include "src/pmix/pmix-internal.h" -#include "src/runtime/prte_progress_threads.h" -#include "src/util/pmix_output.h" - -#include "src/mca/oob/base/base.h" -#include "src/rml/rml.h" - -/* - * The following file was created by configure. It contains extern - * statements and the definition of an array of pointers to each - * component's public pmix_mca_base_component_t struct. - */ - -#include "src/mca/oob/base/static-components.h" - -/* - * Global variables - */ -prte_oob_base_t prte_oob_base = {0}; - -static int prte_oob_base_close(void) -{ - prte_oob_base_component_t *component; - pmix_mca_base_component_list_item_t *cli; - - /* shutdown all active transports */ - while (NULL - != (cli = (pmix_mca_base_component_list_item_t *) pmix_list_remove_first( - &prte_oob_base.actives))) { - component = (prte_oob_base_component_t *) cli->cli_component; - if (NULL != component->shutdown) { - component->shutdown(); - } - PMIX_RELEASE(cli); - } - - /* destruct our internal lists */ - PMIX_DESTRUCT(&prte_oob_base.actives); - - /* release all peers from the list */ - PMIX_LIST_DESTRUCT(&prte_oob_base.peers); - - return pmix_mca_base_framework_components_close(&prte_oob_base_framework, NULL); -} - -/** - * Function for finding and opening either all MCA components, - * or the one that was specifically requested via a MCA parameter. - */ -static int prte_oob_base_open(pmix_mca_base_open_flag_t flags) -{ - /* setup globals */ - prte_oob_base.max_uri_length = -1; - PMIX_CONSTRUCT(&prte_oob_base.peers, pmix_list_t); - PMIX_CONSTRUCT(&prte_oob_base.actives, pmix_list_t); - - /* Open up all available components */ - return pmix_mca_base_framework_components_open(&prte_oob_base_framework, flags); -} - -PMIX_MCA_BASE_FRAMEWORK_DECLARE(prte, oob, "Out-of-Band Messaging Subsystem", NULL, - prte_oob_base_open, prte_oob_base_close, - prte_oob_base_static_components, - PMIX_MCA_BASE_FRAMEWORK_FLAG_DEFAULT); - -PMIX_CLASS_INSTANCE(prte_oob_send_t, pmix_object_t, NULL, NULL); - -static void pr_cons(prte_oob_base_peer_t *ptr) -{ - PMIX_LOAD_PROCID(&ptr->name, NULL, PMIX_RANK_INVALID); - ptr->component = NULL; - PMIX_CONSTRUCT(&ptr->addressable, pmix_bitmap_t); - pmix_bitmap_init(&ptr->addressable, 8); -} -static void pr_des(prte_oob_base_peer_t *ptr) -{ - PMIX_DESTRUCT(&ptr->addressable); -} -PMIX_CLASS_INSTANCE(prte_oob_base_peer_t, pmix_list_item_t, pr_cons, pr_des); diff --git a/src/mca/oob/base/oob_base_select.c b/src/mca/oob/base/oob_base_select.c deleted file mode 100644 index 290a9589f1..0000000000 --- a/src/mca/oob/base/oob_base_select.c +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "prte_config.h" -#include "constants.h" - -#include -#include - -#include "src/mca/base/pmix_base.h" -#include "src/mca/mca.h" -#include "src/util/pmix_output.h" - -#include "src/util/pmix_show_help.h" - -#include "src/mca/oob/base/base.h" -#include "src/mca/oob/oob.h" -#include "src/runtime/prte_globals.h" - -/** - * Function for selecting all runnable modules from those that are - * available. - * - * Call the init function on all available modules. - */ -int prte_oob_base_select(void) -{ - pmix_mca_base_component_list_item_t *cli, *cmp, *c2; - prte_oob_base_component_t *component, *c3; - bool added; - int i, rc; - - /* Query all available components and ask if their transport is available */ - PMIX_LIST_FOREACH(cli, &prte_oob_base_framework.framework_components, - pmix_mca_base_component_list_item_t) - { - component = (prte_oob_base_component_t *) cli->cli_component; - - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: checking available component %s", - component->oob_base.pmix_mca_component_name); - - /* If there's no query function, skip it */ - if (NULL == component->available) { - pmix_output_verbose( - 5, prte_oob_base_framework.framework_output, - "mca:oob:select: Skipping component [%s]. It does not implement a query function", - component->oob_base.pmix_mca_component_name); - continue; - } - - /* Query the component */ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: Querying component [%s]", - component->oob_base.pmix_mca_component_name); - - rc = component->available(); - - /* If the component is not available, then skip it as - * it has no available interfaces - */ - if (PRTE_SUCCESS != rc && PRTE_ERR_FORCE_SELECT != rc) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: Skipping component [%s] - no available interfaces", - component->oob_base.pmix_mca_component_name); - continue; - } - - /* if it fails to startup, then skip it */ - if (PRTE_SUCCESS != component->startup()) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: Skipping component [%s] - failed to startup", - component->oob_base.pmix_mca_component_name); - continue; - } - - if (PRTE_ERR_FORCE_SELECT == rc) { - /* this component shall be the *only* component allowed - * for use, so shutdown and remove any prior ones */ - while (NULL - != (cmp = (pmix_mca_base_component_list_item_t *) pmix_list_remove_first( - &prte_oob_base.actives))) { - c3 = (prte_oob_base_component_t *) cmp->cli_component; - if (NULL != c3->shutdown) { - c3->shutdown(); - } - PMIX_RELEASE(cmp); - } - c2 = PMIX_NEW(pmix_mca_base_component_list_item_t); - c2->cli_component = (pmix_mca_base_component_t *) component; - pmix_list_append(&prte_oob_base.actives, &c2->super); - break; - } - - /* record it, but maintain priority order */ - added = false; - PMIX_LIST_FOREACH(cmp, &prte_oob_base.actives, pmix_mca_base_component_list_item_t) - { - c3 = (prte_oob_base_component_t *) cmp->cli_component; - if (c3->priority > component->priority) { - continue; - } - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: Inserting component"); - c2 = PMIX_NEW(pmix_mca_base_component_list_item_t); - c2->cli_component = (pmix_mca_base_component_t *) component; - pmix_list_insert_pos(&prte_oob_base.actives, &cmp->super, &c2->super); - added = true; - break; - } - if (!added) { - /* add to end */ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: Adding component to end"); - c2 = PMIX_NEW(pmix_mca_base_component_list_item_t); - c2->cli_component = (pmix_mca_base_component_t *) component; - pmix_list_append(&prte_oob_base.actives, &c2->super); - } - } - - if (0 == pmix_list_get_size(&prte_oob_base.actives)) { - /* no support available means we really cannot run */ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: Init failed to return any available transports"); - pmix_show_help("help-oob-base.txt", "no-interfaces-avail", true); - return PRTE_ERR_SILENT; - } - - /* provide them an index so we can track their usability in a bitmap */ - i = 0; - PMIX_LIST_FOREACH(cmp, &prte_oob_base.actives, pmix_mca_base_component_list_item_t) - { - c3 = (prte_oob_base_component_t *) cmp->cli_component; - c3->idx = i++; - } - - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: Found %d active transports", - (int) pmix_list_get_size(&prte_oob_base.actives)); - return PRTE_SUCCESS; -} diff --git a/src/mca/oob/base/oob_base_stubs.c b/src/mca/oob/base/oob_base_stubs.c deleted file mode 100644 index 16f380a730..0000000000 --- a/src/mca/oob/base/oob_base_stubs.c +++ /dev/null @@ -1,382 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "prte_config.h" -#include "constants.h" - -#include "src/pmix/pmix-internal.h" -#include "src/runtime/prte_globals.h" -#include "src/util/pmix_argv.h" -#include "src/util/pmix_output.h" -#include "src/util/pmix_printf.h" - -#include "src/mca/errmgr/errmgr.h" -#include "src/mca/oob/base/base.h" -#include "src/rml/rml.h" -#include "src/mca/state/state.h" -#include "src/threads/pmix_threads.h" - -static prte_oob_base_peer_t* process_uri(char *uri); - -void prte_oob_base_send_nb(int fd, short args, void *cbdata) -{ - prte_oob_send_t *cd = (prte_oob_send_t *) cbdata; - prte_rml_send_t *msg; - pmix_mca_base_component_list_item_t *cli; - prte_oob_base_peer_t *pr; - int rc; - bool msg_sent; - prte_oob_base_component_t *component; - bool reachable; - char *uri; - PRTE_HIDE_UNUSED_PARAMS(fd, args); - - PMIX_ACQUIRE_OBJECT(cd); - - /* done with this. release it now */ - msg = cd->msg; - PMIX_RELEASE(cd); - - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s oob:base:send to target %s - attempt %u", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&msg->dst), - msg->retries); - - /* don't try forever - if we have exceeded the number of retries, - * then report this message as undeliverable even if someone continues - * to think they could reach it */ - if (prte_rml_base.max_retries <= msg->retries) { - msg->status = PRTE_ERR_NO_PATH_TO_TARGET; - PRTE_RML_SEND_COMPLETE(msg); - return; - } - - /* check if we have this peer in our list */ - pr = prte_oob_base_get_peer(&msg->dst); - if (NULL == pr) { - /* if we are abnormally terminating, or terminating the DVM, then - * don't bother looking for it */ - if (prte_abnormal_term_ordered || prte_never_launched || prte_dvm_abort_ordered) { - return; - } - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s oob:base:send unknown peer %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - PRTE_NAME_PRINT(&msg->dst)); - /* for direct launched procs, the URI might be in the database, - * so check there next - if it is, the peer object will be added - * to our hash table. However, we don't want to chase up to the - * server after it, so indicate it is optional - */ - PRTE_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_PROC_URI, &msg->dst, (char **) &uri, PMIX_STRING); - if (PRTE_SUCCESS == rc) { - if (NULL != uri) { - pr = process_uri(uri); - if (NULL == pr) { - /* that is just plain wrong */ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s oob:base:send addressee unknown %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - PRTE_NAME_PRINT(&msg->dst)); - PRTE_ERROR_LOG(PRTE_ERR_ADDRESSEE_UNKNOWN); - msg->status = PRTE_ERR_ADDRESSEE_UNKNOWN; - PRTE_RML_SEND_COMPLETE(msg); - return; - } - } else { - PRTE_ERROR_LOG(PRTE_ERR_ADDRESSEE_UNKNOWN); - msg->status = PRTE_ERR_ADDRESSEE_UNKNOWN; - PRTE_RML_SEND_COMPLETE(msg); - return; - } - } else { - /* even though we don't know about this peer yet, we still might - * be able to get to it via routing, so ask each component if - * it can reach it - */ - reachable = false; - pr = NULL; - PMIX_LIST_FOREACH(cli, &prte_oob_base.actives, pmix_mca_base_component_list_item_t) - { - component = (prte_oob_base_component_t *) cli->cli_component; - if (NULL != component->is_reachable) { - if (component->is_reachable(&msg->dst)) { - /* there is a way to reach this peer - record it - * so we don't waste this time again - */ - if (NULL == pr) { - pr = PMIX_NEW(prte_oob_base_peer_t); - PMIX_XFER_PROCID(&pr->name, &msg->dst); - pmix_list_append(&prte_oob_base.peers, &pr->super); - } - /* mark that this component can reach the peer */ - pmix_bitmap_set_bit(&pr->addressable, component->idx); - /* flag that at least one component can reach this peer */ - reachable = true; - } - } - } - /* if nobody could reach it, then that's an error */ - if (!reachable) { - /* if we are a daemon or HNP, then it could be that - * this is a local proc we just haven't heard from - * yet due to a race condition. Check that situation */ - if (PRTE_PROC_IS_DAEMON || PRTE_PROC_IS_MASTER) { - ++msg->retries; - if (msg->retries < prte_rml_base.max_retries) { - PRTE_OOB_SEND(msg); - return; - } - } - msg->status = PRTE_ERR_ADDRESSEE_UNKNOWN; - PRTE_RML_SEND_COMPLETE(msg); - return; - } - } - } - - /* if we already have a connection to this peer, use it */ - if (NULL != pr->component) { - /* post this msg for send by this transport - the component - * runs on our event base, so we can just call their function - */ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s oob:base:send known transport for peer %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&msg->dst)); - if (PRTE_SUCCESS == (rc = pr->component->send_nb(msg))) { - return; - } - } - - /* if we haven't identified a transport to this peer, - * loop across all available components in priority order until - * one replies that it has a module that can reach this peer. - * Let it try to make the connection - */ - msg_sent = false; - PMIX_LIST_FOREACH(cli, &prte_oob_base.actives, pmix_mca_base_component_list_item_t) - { - component = (prte_oob_base_component_t *) cli->cli_component; - /* is this peer reachable via this component? */ - if (!component->is_reachable(&msg->dst)) { - continue; - } - /* it is addressable, so attempt to send via that transport */ - if (PRTE_SUCCESS == (rc = component->send_nb(msg))) { - /* the msg status will be set upon send completion/failure */ - msg_sent = true; - /* point to this transport for any future messages */ - pr->component = component; - break; - } else if (PRTE_ERR_TAKE_NEXT_OPTION != rc) { - /* components return "next option" if they can't connect - * to this peer. anything else is a true error. - */ - PRTE_ERROR_LOG(rc); - msg->status = rc; - PRTE_RML_SEND_COMPLETE(msg); - return; - } - } - - /* if no component can reach this peer, that's an error - post - * it back to the RML for handling - */ - if (!msg_sent) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s oob:base:send no path to target %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&msg->dst)); - msg->status = PRTE_ERR_NO_PATH_TO_TARGET; - PRTE_RML_SEND_COMPLETE(msg); - } -} - -/** - * Obtain a uri for initial connection purposes - * - * During initial wireup, we can only transfer contact info on the daemon - * command line. This limits what we can send to a string representation of - * the actual contact info, which gets sent in a uri-like form. Not every - * oob module can support this transaction, so this function will loop - * across all oob components/modules, letting each add to the uri string if - * it supports bootstrap operations. An error will be returned in the cbfunc - * if NO component can successfully provide a contact. - * - * Note: since there is a limit to what an OS will allow on a cmd line, we - * impose a limit on the length of the resulting uri via an MCA param. The - * default value of -1 implies unlimited - however, users with large numbers - * of interfaces on their nodes may wish to restrict the size. - */ -void prte_oob_base_get_addr(char **uri) -{ - char *turi, *final = NULL, *tmp; - size_t len = 0; - bool one_added = false; - pmix_mca_base_component_list_item_t *cli; - prte_oob_base_component_t *component; - pmix_status_t rc; - - /* start with our process name */ - rc = prte_util_convert_process_name_to_string(&final, PRTE_PROC_MY_NAME); - if (PRTE_SUCCESS != rc) { - PRTE_ERROR_LOG(rc); - *uri = NULL; - return; - } - len = strlen(final); - - /* loop across all available modules to get their input - * up to the max length - */ - PMIX_LIST_FOREACH(cli, &prte_oob_base.actives, pmix_mca_base_component_list_item_t) - { - component = (prte_oob_base_component_t *) cli->cli_component; - /* ask the component for its input, obtained when it - * opened its modules - */ - if (NULL == component->get_addr) { - /* doesn't support this ability */ - continue; - } - /* the components operate within our event base, so we - * can directly call their get_uri function to get the - * pointer to the uri - this is not a copy, so - * do NOT free it! - */ - turi = component->get_addr(); - if (NULL != turi) { - /* check overall length for limits */ - if (0 < prte_oob_base.max_uri_length - && prte_oob_base.max_uri_length < (int) (len + strlen(turi))) { - /* cannot accept the payload */ - continue; - } - /* add new value to final one */ - pmix_asprintf(&tmp, "%s;%s", final, turi); - free(turi); - free(final); - final = tmp; - len = strlen(final); - /* flag that at least one contributed */ - one_added = true; - } - } - - if (!one_added) { - /* nobody could contribute */ - if (NULL != final) { - free(final); - final = NULL; - } - } - - *uri = final; -} - -static prte_oob_base_peer_t* process_uri(char *uri) -{ - pmix_proc_t peer; - char *cptr; - pmix_mca_base_component_list_item_t *cli; - prte_oob_base_component_t *component; - char **uris = NULL; - prte_oob_base_peer_t *pr; - - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s:set_addr processing uri %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), uri); - - /* find the first semi-colon in the string */ - cptr = strchr(uri, ';'); - if (NULL == cptr) { - /* got a problem - there must be at least two fields, - * the first containing the process name of our peer - * and all others containing the OOB contact info - */ - PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); - return NULL; - } - *cptr = '\0'; - cptr++; - /* the first field is the process name, so convert it */ - prte_util_convert_string_to_process_name(&peer, uri); - - /* if the peer is us, no need to go further as we already - * know our own contact info - */ - if (PMIX_CHECK_PROCID(&peer, PRTE_PROC_MY_NAME)) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s:set_addr peer %s is me", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - PRTE_NAME_PRINT(&peer)); - return NULL; - } - - /* split the rest of the uri into component parts */ - uris = PMIX_ARGV_SPLIT_COMPAT(cptr, ';'); - - /* get the peer object for this process */ - pr = prte_oob_base_get_peer(&peer); - if (NULL == pr) { - pr = PMIX_NEW(prte_oob_base_peer_t); - PMIX_XFER_PROCID(&pr->name, &peer); - pmix_list_append(&prte_oob_base.peers, &pr->super); - } - - /* loop across all available components and let them extract - * whatever piece(s) of the uri they find relevant - they - * are all operating on our event base, so we can just - * directly call their functions - */ - PMIX_LIST_FOREACH(cli, &prte_oob_base.actives, pmix_mca_base_component_list_item_t) - { - component = (prte_oob_base_component_t *) cli->cli_component; - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s:set_addr checking if peer %s is reachable via component %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer), - component->oob_base.pmix_mca_component_name); - if (NULL != component->set_addr) { - if (PRTE_SUCCESS == component->set_addr(&peer, uris)) { - /* this component found reachable addresses - * in the uris - */ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s: peer %s is reachable via component %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer), - component->oob_base.pmix_mca_component_name); - pmix_bitmap_set_bit(&pr->addressable, component->idx); - } else { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s: peer %s is NOT reachable via component %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer), - component->oob_base.pmix_mca_component_name); - } - } - } - PMIX_ARGV_FREE_COMPAT(uris); - return pr; -} - -prte_oob_base_peer_t *prte_oob_base_get_peer(const pmix_proc_t *pr) -{ - prte_oob_base_peer_t *peer; - - PMIX_LIST_FOREACH(peer, &prte_oob_base.peers, prte_oob_base_peer_t) - { - if (PMIX_CHECK_PROCID(pr, &peer->name)) { - return peer; - } - } - return NULL; -} diff --git a/src/mca/oob/base/owner.txt b/src/mca/oob/base/owner.txt deleted file mode 100644 index 2d23c9be65..0000000000 --- a/src/mca/oob/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: project -status: maintenance diff --git a/src/mca/oob/oob.h b/src/mca/oob/oob.h deleted file mode 100644 index 6c6e3294f3..0000000000 --- a/src/mca/oob/oob.h +++ /dev/null @@ -1,83 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * Contains the internal functions and typedefs for the use of the oob - */ - -#ifndef MCA_OOB_H_ -#define MCA_OOB_H_ - -#include "prte_config.h" -#include "types.h" - -#ifdef HAVE_UNISTD_H -# include -#endif -#ifdef HAVE_SYS_UIO_H -# include -#endif -#ifdef HAVE_NET_UIO_H -# include -#endif - -#include "src/class/pmix_list.h" -#include "src/class/pmix_pointer_array.h" -#include "src/mca/mca.h" -#include "src/pmix/pmix-internal.h" - -#include "src/rml/rml_types.h" - -BEGIN_C_DECLS - -typedef int (*mca_oob_base_component_avail_fn_t)(void); -typedef int (*mca_oob_base_component_startup_fn_t)(void); -typedef void (*mca_oob_base_component_shutdown_fn_t)(void); -typedef int (*mca_oob_base_component_send_fn_t)(prte_rml_send_t *msg); -typedef char *(*mca_oob_base_component_get_addr_fn_t)(void); -typedef int (*mca_oob_base_component_set_addr_fn_t)(pmix_proc_t *peer, char **uris); -typedef bool (*mca_oob_base_component_is_reachable_fn_t)(pmix_proc_t *peer); -typedef void (*mca_oob_ping_callback_fn_t)(int status, void *cbdata); - -typedef struct { - pmix_mca_base_component_t oob_base; - int idx; - int priority; - mca_oob_base_component_avail_fn_t available; - mca_oob_base_component_startup_fn_t startup; - mca_oob_base_component_shutdown_fn_t shutdown; - mca_oob_base_component_send_fn_t send_nb; - mca_oob_base_component_get_addr_fn_t get_addr; - mca_oob_base_component_set_addr_fn_t set_addr; - mca_oob_base_component_is_reachable_fn_t is_reachable; -} prte_oob_base_component_t; - -/** - * Macro for use in components that are of type oob - */ -#define PRTE_OOB_BASE_VERSION_2_0_0 PRTE_MCA_BASE_VERSION_3_0_0("oob", 2, 0, 0) - -END_C_DECLS - -#endif diff --git a/src/mca/oob/tcp/Makefile.am b/src/mca/oob/tcp/Makefile.am deleted file mode 100644 index 07e6c7bfbd..0000000000 --- a/src/mca/oob/tcp/Makefile.am +++ /dev/null @@ -1,63 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved -# Copyright (c) 2012-2013 Los Alamos National Security, LLC. -# All rights reserved -# Copyright (c) 2014-2020 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# Copyright (c) 2022 Nanook Consulting. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_prtedata_DATA = help-oob-tcp.txt - -sources = \ - oob_tcp_component.h \ - oob_tcp.h \ - oob_tcp_listener.h \ - oob_tcp_common.h \ - oob_tcp_component.c \ - oob_tcp_connection.h \ - oob_tcp_sendrecv.h \ - oob_tcp_hdr.h \ - oob_tcp_peer.h \ - oob_tcp.c \ - oob_tcp_listener.c \ - oob_tcp_common.c \ - oob_tcp_connection.c \ - oob_tcp_sendrecv.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_prte_oob_tcp_DSO -component_noinst = -component_install = prte_mca_oob_tcp.la -else -component_noinst = libprtemca_oob_tcp.la -component_install = -endif - -mcacomponentdir = $(prtelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -prte_mca_oob_tcp_la_SOURCES = $(sources) -prte_mca_oob_tcp_la_LDFLAGS = -module -avoid-version -prte_mca_oob_tcp_la_LIBADD = $(top_builddir)/src/libprrte.la - -noinst_LTLIBRARIES = $(component_noinst) -libprtemca_oob_tcp_la_SOURCES = $(sources) -libprtemca_oob_tcp_la_LDFLAGS = -module -avoid-version diff --git a/src/mca/oob/tcp/configure.m4 b/src/mca/oob/tcp/configure.m4 deleted file mode 100644 index 25ea055125..0000000000 --- a/src/mca/oob/tcp/configure.m4 +++ /dev/null @@ -1,39 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved -# Copyright (c) 2019 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_oob_tcp_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_prte_oob_tcp_CONFIG],[ - AC_CONFIG_FILES([src/mca/oob/tcp/Makefile]) - - # check for sockaddr_in (a good sign we have TCP) - AC_CHECK_TYPES([struct sockaddr_in], - [oob_tcp_happy="yes"], - [oob_tcp_happy="no"], - [AC_INCLUDES_DEFAULT -#ifdef HAVE_NETINET_IN_H -#include -#endif]) - - AS_IF([test "$oob_tcp_happy" = "yes"], [$1], [$2]) -])dnl diff --git a/src/mca/oob/tcp/oob_tcp.c b/src/mca/oob/tcp/oob_tcp.c deleted file mode 100644 index a78ce60079..0000000000 --- a/src/mca/oob/tcp/oob_tcp.c +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2016-2019 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include "prte_config.h" -#include "types.h" - -#ifdef HAVE_UNISTD_H -# include -#endif -#ifdef HAVE_SYS_TYPES_H -# include -#endif -#include -#ifdef HAVE_NETINET_IN_H -# include -#endif -#ifdef HAVE_ARPA_INET_H -# include -#endif -#ifdef HAVE_NETDB_H -# include -#endif -#include - -#include "src/include/prte_socket_errno.h" -#include "src/runtime/prte_progress_threads.h" -#include "src/util/pmix_argv.h" -#include "src/util/error.h" -#include "src/util/pmix_if.h" -#include "src/util/pmix_net.h" -#include "src/util/pmix_output.h" -#include "src/util/pmix_show_help.h" - -#include "src/mca/errmgr/errmgr.h" -#include "src/mca/ess/ess.h" -#include "src/runtime/prte_globals.h" -#include "src/threads/pmix_threads.h" -#include "src/util/name_fns.h" -#include "src/util/pmix_parse_options.h" -#include "src/util/pmix_show_help.h" - -#include "src/mca/oob/tcp/oob_tcp.h" -#include "src/mca/oob/tcp/oob_tcp_common.h" -#include "src/mca/oob/tcp/oob_tcp_component.h" -#include "src/mca/oob/tcp/oob_tcp_connection.h" -#include "src/mca/oob/tcp/oob_tcp_peer.h" -#include "src/mca/oob/tcp/oob_tcp_sendrecv.h" - -static void accept_connection(const int accepted_fd, const struct sockaddr *addr); -static void ping(const pmix_proc_t *proc); -static void send_nb(prte_rml_send_t *msg); - -prte_oob_tcp_module_t prte_oob_tcp_module = {.accept_connection = accept_connection, - .ping = ping, - .send_nb = send_nb}; - -/* - * Local utility functions - */ -static void recv_handler(int sd, short flags, void *user); - -/* Called by prte_oob_tcp_accept() and connection_handler() on - * a socket that has been accepted. This call finishes processing the - * socket, including setting socket options and registering for the - * OOB-level connection handshake. Used in both the threaded and - * event listen modes. - */ -static void accept_connection(const int accepted_fd, const struct sockaddr *addr) -{ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s accept_connection: %s:%d\n", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - pmix_net_get_hostname(addr), pmix_net_get_port(addr)); - - /* setup socket options */ - prte_oob_tcp_set_socket_options(accepted_fd); - - /* use a one-time event to wait for receipt of peer's - * process ident message to complete this connection - */ - PRTE_ACTIVATE_TCP_ACCEPT_STATE(accepted_fd, addr, recv_handler); -} - -/* API functions */ -static void ping(const pmix_proc_t *proc) -{ - prte_oob_tcp_peer_t *peer; - - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s:[%s:%d] processing ping to peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - __FILE__, __LINE__, PRTE_NAME_PRINT(proc)); - - /* do we know this peer? */ - if (NULL == (peer = prte_oob_tcp_peer_lookup(proc))) { - /* push this back to the component so it can try - * another module within this transport. If no - * module can be found, the component can push back - * to the framework so another component can try - */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s:[%s:%d] hop %s unknown", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - __FILE__, __LINE__, PRTE_NAME_PRINT(proc)); - PRTE_ACTIVATE_TCP_MSG_ERROR(NULL, NULL, proc, prte_mca_oob_tcp_component_hop_unknown); - return; - } - - /* if we are already connected, there is nothing to do */ - if (MCA_OOB_TCP_CONNECTED == peer->state) { - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s:[%s:%d] already connected to peer %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, - PRTE_NAME_PRINT(proc)); - return; - } - - /* if we are already connecting, there is nothing to do */ - if (MCA_OOB_TCP_CONNECTING == peer->state || MCA_OOB_TCP_CONNECT_ACK == peer->state) { - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s:[%s:%d] already connecting to peer %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, - PRTE_NAME_PRINT(proc)); - return; - } - - /* attempt the connection */ - peer->state = MCA_OOB_TCP_CONNECTING; - PRTE_ACTIVATE_TCP_CONN_STATE(peer, prte_oob_tcp_peer_try_connect); -} - -static void send_nb(prte_rml_send_t *msg) -{ - prte_oob_tcp_peer_t *peer; - pmix_proc_t hop; - - /* do we have a route to this peer (could be direct)? */ - PMIX_LOAD_NSPACE(hop.nspace, PRTE_PROC_MY_NAME->nspace); - hop.rank = prte_rml_get_route(msg->dst.rank); - /* do we know this hop? */ - if (NULL == (peer = prte_oob_tcp_peer_lookup(&hop))) { - /* if this message is going to the HNP, send it direct */ - if (PRTE_PROC_MY_HNP->rank == msg->dst.rank) { - hop.rank = PRTE_PROC_MY_HNP->rank; - peer = prte_oob_tcp_peer_lookup(&hop); - if (NULL != peer) { - goto send; - } - } - /* push this back to the component so it can try - * another module within this transport. If no - * module can be found, the component can push back - * to the framework so another component can try - */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s:[%s:%d] processing send to peer %s:%d seq_num = %d hop %s unknown", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, - PRTE_NAME_PRINT(&msg->dst), msg->tag, msg->seq_num, - PRTE_NAME_PRINT(&hop)); - PRTE_ACTIVATE_TCP_NO_ROUTE(msg, &hop, prte_mca_oob_tcp_component_no_route); - return; - } - -send: - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s:[%s:%d] processing send to peer %s:%d seq_num = %d via %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, - PRTE_NAME_PRINT(&msg->dst), msg->tag, msg->seq_num, - PRTE_NAME_PRINT(&peer->name)); - - /* add the msg to the hop's send queue */ - if (MCA_OOB_TCP_CONNECTED == peer->state) { - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s tcp:send_nb: already connected to %s - queueing for send", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); - MCA_OOB_TCP_QUEUE_SEND(msg, peer); - return; - } - - /* add the message to the queue for sending after the - * connection is formed - */ - MCA_OOB_TCP_QUEUE_PENDING(msg, peer); - - if (MCA_OOB_TCP_CONNECTING != peer->state && MCA_OOB_TCP_CONNECT_ACK != peer->state) { - /* we have to initiate the connection - again, we do not - * want to block while the connection is created. - * So throw us into an event that will create - * the connection via a mini-state-machine :-) - */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s tcp:send_nb: initiating connection to %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); - peer->state = MCA_OOB_TCP_CONNECTING; - PRTE_ACTIVATE_TCP_CONN_STATE(peer, prte_oob_tcp_peer_try_connect); - } -} - -/* - * Event callback when there is data available on the registered - * socket to recv. This is called for the listen sockets to accept an - * incoming connection, on new sockets trying to complete the software - * connection process, and for probes. Data on an established - * connection is handled elsewhere. - */ -static void recv_handler(int sd, short flg, void *cbdata) -{ - prte_oob_tcp_conn_op_t *op = (prte_oob_tcp_conn_op_t *) cbdata; - int flags; - prte_oob_tcp_hdr_t hdr; - prte_oob_tcp_peer_t *peer; - PRTE_HIDE_UNUSED_PARAMS(flg); - - PMIX_ACQUIRE_OBJECT(op); - - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s:tcp:recv:handler called", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); - - /* get the handshake */ - if (PRTE_SUCCESS != prte_oob_tcp_peer_recv_connect_ack(NULL, sd, &hdr)) { - goto cleanup; - } - - /* finish processing ident */ - if (MCA_OOB_TCP_IDENT == hdr.type) { - if (NULL == (peer = prte_oob_tcp_peer_lookup(&hdr.origin))) { - /* should never happen */ - prte_oob_tcp_peer_close(peer); - goto cleanup; - } - /* set socket up to be non-blocking */ - if ((flags = fcntl(sd, F_GETFL, 0)) < 0) { - pmix_output(0, "%s prte_oob_tcp_recv_connect: fcntl(F_GETFL) failed: %s (%d)", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), strerror(prte_socket_errno), - prte_socket_errno); - } else { - flags |= O_NONBLOCK; - if (fcntl(sd, F_SETFL, flags) < 0) { - pmix_output(0, "%s prte_oob_tcp_recv_connect: fcntl(F_SETFL) failed: %s (%d)", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), strerror(prte_socket_errno), - prte_socket_errno); - } - } - /* is the peer instance willing to accept this connection */ - peer->sd = sd; - if (prte_oob_tcp_peer_accept(peer) == false) { - if (OOB_TCP_DEBUG_CONNECT - <= pmix_output_get_verbosity(prte_oob_base_framework.framework_output)) { - pmix_output(0, - "%s-%s prte_oob_tcp_recv_connect: " - "rejected connection from %s connection state %d", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), - PRTE_NAME_PRINT(&(hdr.origin)), peer->state); - } - CLOSE_THE_SOCKET(sd); - } - } - -cleanup: - PMIX_RELEASE(op); -} diff --git a/src/mca/oob/tcp/oob_tcp_component.c b/src/mca/oob/tcp/oob_tcp_component.c deleted file mode 100644 index e915198f95..0000000000 --- a/src/mca/oob/tcp/oob_tcp_component.c +++ /dev/null @@ -1,1293 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2015-2019 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 IBM Corporation. All rights reserved. - * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights - * reserved. - * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * In windows, many of the socket functions return an EWOULDBLOCK - * instead of things like EAGAIN, EINPROGRESS, etc. It has been - * verified that this will not conflict with other error codes that - * are returned by these functions under UNIX/Linux environments - */ - -#include "prte_config.h" -#include "types.h" - -#ifdef HAVE_UNISTD_H -# include -#endif -#ifdef HAVE_SYS_TYPES_H -# include -#endif -#include -#ifdef HAVE_NET_IF_H -# include -#endif -#ifdef HAVE_NETINET_IN_H -# include -#endif -#ifdef HAVE_ARPA_INET_H -# include -#endif -#ifdef HAVE_NETDB_H -# include -#endif -#include -#include -#include - -#ifndef MIN -# define MIN(a, b) ((a) < (b) ? (a) : (b)) -#endif - -#include "src/class/pmix_list.h" -#include "src/event/event-internal.h" -#include "src/include/prte_socket_errno.h" -#include "src/runtime/prte_progress_threads.h" -#include "src/util/pmix_argv.h" -#include "src/util/pmix_if.h" -#include "src/util/error.h" -#include "src/util/pmix_net.h" -#include "src/util/pmix_output.h" -#include "src/util/pmix_show_help.h" - -#include "src/mca/errmgr/errmgr.h" -#include "src/mca/ess/ess.h" -#include "src/rml/rml.h" -#include "src/mca/state/state.h" -#include "src/runtime/prte_globals.h" -#include "src/runtime/prte_wait.h" -#include "src/threads/pmix_threads.h" -#include "src/util/attr.h" -#include "src/util/name_fns.h" -#include "src/util/pmix_parse_options.h" -#include "src/util/pmix_show_help.h" - -#include "oob_tcp_peer.h" -#include "src/mca/oob/tcp/oob_tcp.h" -#include "src/mca/oob/tcp/oob_tcp_common.h" -#include "src/mca/oob/tcp/oob_tcp_component.h" -#include "src/mca/oob/tcp/oob_tcp_connection.h" -#include "src/mca/oob/tcp/oob_tcp_listener.h" -#include "src/mca/oob/tcp/oob_tcp_peer.h" - -/* - * Local utility functions - */ - -static int tcp_component_register(void); -static int tcp_component_open(void); -static int tcp_component_close(void); - -static int component_available(void); -static int component_startup(void); -static void component_shutdown(void); -static int component_send(prte_rml_send_t *msg); -static char *component_get_addr(void); -static int component_set_addr(pmix_proc_t *peer, char **uris); -static bool component_is_reachable(pmix_proc_t *peer); - -/* - * Struct of function pointers and all that to let us be initialized - */ -prte_mca_oob_tcp_component_t prte_mca_oob_tcp_component = { - .super = { - .oob_base = { - PRTE_OOB_BASE_VERSION_2_0_0, - .pmix_mca_component_name = "tcp", - PMIX_MCA_BASE_MAKE_VERSION(component, - PRTE_MAJOR_VERSION, - PRTE_MINOR_VERSION, - PMIX_RELEASE_VERSION), - .pmix_mca_open_component = tcp_component_open, - .pmix_mca_close_component = tcp_component_close, - .pmix_mca_register_component_params = tcp_component_register, - }, - .priority = 30, // default priority of this transport - .available = component_available, - .startup = component_startup, - .shutdown = component_shutdown, - .send_nb = component_send, - .get_addr = component_get_addr, - .set_addr = component_set_addr, - .is_reachable = component_is_reachable, - } -}; - -/* - * Initialize global variables used w/in this module. - */ -static int tcp_component_open(void) -{ - PMIX_CONSTRUCT(&prte_mca_oob_tcp_component.peers, pmix_list_t); - PMIX_CONSTRUCT(&prte_mca_oob_tcp_component.listeners, pmix_list_t); - if (PRTE_PROC_IS_MASTER) { - PMIX_CONSTRUCT(&prte_mca_oob_tcp_component.listen_thread, pmix_thread_t); - prte_mca_oob_tcp_component.listen_thread_active = false; - prte_mca_oob_tcp_component.listen_thread_tv.tv_sec = 3600; - prte_mca_oob_tcp_component.listen_thread_tv.tv_usec = 0; - } - prte_mca_oob_tcp_component.addr_count = 0; - prte_mca_oob_tcp_component.ipv4conns = NULL; - prte_mca_oob_tcp_component.ipv4ports = NULL; - prte_mca_oob_tcp_component.ipv6conns = NULL; - prte_mca_oob_tcp_component.ipv6ports = NULL; - prte_mca_oob_tcp_component.if_masks = NULL; - - PMIX_CONSTRUCT(&prte_mca_oob_tcp_component.local_ifs, pmix_list_t); - return PRTE_SUCCESS; -} - -/* - * Cleanup of global variables used by this module. - */ -static int tcp_component_close(void) -{ - PMIX_LIST_DESTRUCT(&prte_mca_oob_tcp_component.local_ifs); - PMIX_LIST_DESTRUCT(&prte_mca_oob_tcp_component.peers); - - if (NULL != prte_mca_oob_tcp_component.ipv4conns) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.ipv4conns); - } - if (NULL != prte_mca_oob_tcp_component.ipv4ports) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.ipv4ports); - } - -#if PRTE_ENABLE_IPV6 - if (NULL != prte_mca_oob_tcp_component.ipv6conns) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.ipv6conns); - } - if (NULL != prte_mca_oob_tcp_component.ipv6ports) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.ipv6ports); - } -#endif - if (NULL != prte_mca_oob_tcp_component.if_masks) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.if_masks); - } - return PRTE_SUCCESS; -} -static char *static_port_string; -#if PRTE_ENABLE_IPV6 -static char *static_port_string6; -#endif // PRTE_ENABLE_IPV6 - -static char *dyn_port_string; -#if PRTE_ENABLE_IPV6 -static char *dyn_port_string6; -#endif - -static int tcp_component_register(void) -{ - pmix_mca_base_component_t *component = &prte_mca_oob_tcp_component.super.oob_base; - - /* register oob module parameters */ - prte_mca_oob_tcp_component.peer_limit = -1; - (void) pmix_mca_base_component_var_register(component, "peer_limit", - "Maximum number of peer connections to simultaneously maintain (-1 = infinite)", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.peer_limit); - - prte_mca_oob_tcp_component.max_retries = 2; - (void) pmix_mca_base_component_var_register(component, "peer_retries", - "Number of times to try shutting down a connection before giving up", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.max_retries); - - prte_mca_oob_tcp_component.tcp_sndbuf = 0; - (void) pmix_mca_base_component_var_register(component, "sndbuf", - "TCP socket send buffering size (in bytes, 0 => leave system default)", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.tcp_sndbuf); - - prte_mca_oob_tcp_component.tcp_rcvbuf = 0; - (void) pmix_mca_base_component_var_register(component, "rcvbuf", - "TCP socket receive buffering size (in bytes, 0 => leave system default)", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.tcp_rcvbuf); - - - static_port_string = NULL; - (void) pmix_mca_base_component_var_register(component, "static_ipv4_ports", - "Static ports for daemons and procs (IPv4)", - PMIX_MCA_BASE_VAR_TYPE_STRING, - &static_port_string); - - /* if ports were provided, parse the provided range */ - if (NULL != static_port_string) { - pmix_util_parse_range_options(static_port_string, &prte_mca_oob_tcp_component.tcp_static_ports); - if (0 == strcmp(prte_mca_oob_tcp_component.tcp_static_ports[0], "-1")) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.tcp_static_ports); - prte_mca_oob_tcp_component.tcp_static_ports = NULL; - } - } else { - prte_mca_oob_tcp_component.tcp_static_ports = NULL; - } - -#if PRTE_ENABLE_IPV6 - static_port_string6 = NULL; - (void) pmix_mca_base_component_var_register(component, "static_ipv6_ports", - "Static ports for daemons and procs (IPv6)", - PMIX_MCA_BASE_VAR_TYPE_STRING, - &static_port_string6); - - /* if ports were provided, parse the provided range */ - if (NULL != static_port_string6) { - pmix_util_parse_range_options(static_port_string6, - &prte_mca_oob_tcp_component.tcp6_static_ports); - if (0 == strcmp(prte_mca_oob_tcp_component.tcp6_static_ports[0], "-1")) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.tcp6_static_ports); - prte_mca_oob_tcp_component.tcp6_static_ports = NULL; - } - } else { - prte_mca_oob_tcp_component.tcp6_static_ports = NULL; - } -#endif // PRTE_ENABLE_IPV6 - - if (NULL != prte_mca_oob_tcp_component.tcp_static_ports - || NULL != prte_mca_oob_tcp_component.tcp6_static_ports) { - prte_static_ports = true; - } - - dyn_port_string = NULL; - (void) pmix_mca_base_component_var_register(component, "dynamic_ipv4_ports", - "Range of ports to be dynamically used by daemons and procs (IPv4)", - PMIX_MCA_BASE_VAR_TYPE_STRING, - &dyn_port_string); - /* if ports were provided, parse the provided range */ - if (NULL != dyn_port_string) { - /* can't have both static and dynamic ports! */ - if (prte_static_ports) { - char *err = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.tcp_static_ports, ','); - pmix_show_help("help-oob-tcp.txt", "static-and-dynamic", true, err, dyn_port_string); - free(err); - return PRTE_ERROR; - } - pmix_util_parse_range_options(dyn_port_string, &prte_mca_oob_tcp_component.tcp_dyn_ports); - if (0 == strcmp(prte_mca_oob_tcp_component.tcp_dyn_ports[0], "-1")) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.tcp_dyn_ports); - prte_mca_oob_tcp_component.tcp_dyn_ports = NULL; - } - } else { - prte_mca_oob_tcp_component.tcp_dyn_ports = NULL; - } - -#if PRTE_ENABLE_IPV6 - dyn_port_string6 = NULL; - (void) pmix_mca_base_component_var_register(component, "dynamic_ipv6_ports", - "Range of ports to be dynamically used by daemons and procs (IPv6)", - PMIX_MCA_BASE_VAR_TYPE_STRING, - &dyn_port_string6); - /* if ports were provided, parse the provided range */ - if (NULL != dyn_port_string6) { - /* can't have both static and dynamic ports! */ - if (prte_static_ports) { - char *err4 = NULL, *err6 = NULL; - if (NULL != prte_mca_oob_tcp_component.tcp_static_ports) { - err4 = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.tcp_static_ports, ','); - } - if (NULL != prte_mca_oob_tcp_component.tcp6_static_ports) { - err6 = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.tcp6_static_ports, ','); - } - pmix_show_help("help-oob-tcp.txt", "static-and-dynamic-ipv6", true, - (NULL == err4) ? "N/A" : err4, (NULL == err6) ? "N/A" : err6, - dyn_port_string6); - if (NULL != err4) { - free(err4); - } - if (NULL != err6) { - free(err6); - } - return PRTE_ERROR; - } - pmix_util_parse_range_options(dyn_port_string6, &prte_mca_oob_tcp_component.tcp6_dyn_ports); - if (0 == strcmp(prte_mca_oob_tcp_component.tcp6_dyn_ports[0], "-1")) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.tcp6_dyn_ports); - prte_mca_oob_tcp_component.tcp6_dyn_ports = NULL; - } - } else { - prte_mca_oob_tcp_component.tcp6_dyn_ports = NULL; - } -#endif // PRTE_ENABLE_IPV6 - - prte_mca_oob_tcp_component.disable_ipv4_family = false; - (void) pmix_mca_base_component_var_register(component, "disable_ipv4_family", - "Disable the IPv4 interfaces", - PMIX_MCA_BASE_VAR_TYPE_BOOL, - &prte_mca_oob_tcp_component.disable_ipv4_family); - -#if PRTE_ENABLE_IPV6 - prte_mca_oob_tcp_component.disable_ipv6_family = false; - (void) pmix_mca_base_component_var_register(component, "disable_ipv6_family", - "Disable the IPv6 interfaces", - PMIX_MCA_BASE_VAR_TYPE_BOOL, - &prte_mca_oob_tcp_component.disable_ipv6_family); -#endif // PRTE_ENABLE_IPV6 - - // Wait for this amount of time before sending the first keepalive probe - prte_mca_oob_tcp_component.keepalive_time = 300; - (void) pmix_mca_base_component_var_register(component, "keepalive_time", - "Idle time in seconds before starting to send keepalives (keepalive_time <= 0 disables " - "keepalive functionality)", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.keepalive_time); - - // Resend keepalive probe every INT seconds - prte_mca_oob_tcp_component.keepalive_intvl = 20; - (void) pmix_mca_base_component_var_register(component, "keepalive_intvl", - "Time between successive keepalive pings when peer has not responded, in seconds (ignored " - "if keepalive_time <= 0)", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.keepalive_intvl); - - // After sending PR probes every INT seconds consider the connection dead - prte_mca_oob_tcp_component.keepalive_probes = 9; - (void) pmix_mca_base_component_var_register(component, "keepalive_probes", - "Number of keepalives that can be missed before " - "declaring error (ignored if keepalive_time <= 0)", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.keepalive_probes); - - prte_mca_oob_tcp_component.retry_delay = 0; - (void) pmix_mca_base_component_var_register(component, "retry_delay", - "Time (in sec) to wait before trying to connect to peer again", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.retry_delay); - - prte_mca_oob_tcp_component.max_recon_attempts = 10; - (void) pmix_mca_base_component_var_register(component, "max_recon_attempts", - "Max number of times to attempt connection before giving up (-1 -> never give up)", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.max_recon_attempts); - - return PRTE_SUCCESS; -} - -static char **split_and_resolve(char **orig_str, char *name); - -static int component_available(void) -{ - pmix_pif_t *copied_interface, *selected_interface; - struct sockaddr_storage my_ss; - /* Larger than necessary, used for copying mask */ - char string[50], **interfaces = NULL; - int kindex; - int i, rc; - bool keeploopback = false; - bool including = false; - - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "oob:tcp: component_available called"); - - /* if interface include was given, construct a list - * of those interfaces which match the specifications - remember, - * the includes could be given as named interfaces, IP addrs, or - * subnet+mask - */ - if (NULL != prte_if_include) { - interfaces = split_and_resolve(&prte_if_include, - "include"); - including = true; - } else if (NULL != prte_if_exclude) { - interfaces = split_and_resolve(&prte_if_exclude, - "exclude"); - } - - /* if we are the master, then check the interfaces for loopbacks - * and keep loopbacks only if no non-loopback interface exists */ - if (PRTE_PROC_IS_MASTER) { - keeploopback = true; - PMIX_LIST_FOREACH(selected_interface, &pmix_if_list, pmix_pif_t) - { - if (!(selected_interface->if_flags & IFF_LOOPBACK)) { - keeploopback = false; - break; - } - } - } - - /* look at all available interfaces */ - PMIX_LIST_FOREACH(selected_interface, &pmix_if_list, pmix_pif_t) - { - if ((selected_interface->if_flags & IFF_LOOPBACK) && - !keeploopback) { - continue; - } - - - i = selected_interface->if_index; - kindex = selected_interface->if_kernel_index; - memcpy((struct sockaddr *) &my_ss, &selected_interface->if_addr, - MIN(sizeof(struct sockaddr_storage), sizeof(selected_interface->if_addr))); - - /* ignore non-ip4/6 interfaces */ - if (AF_INET != my_ss.ss_family -#if PRTE_ENABLE_IPV6 - && AF_INET6 != my_ss.ss_family -#endif - ) { - continue; - } - - /* ignore any virtual interfaces */ - if (0 == strncmp(selected_interface->if_name, "vir", 3)) { - continue; - } - - /* handle include/exclude directives */ - if (NULL != interfaces) { - /* check for match */ - rc = pmix_ifmatches(kindex, interfaces); - /* if one of the network specifications isn't parseable, then - * error out as we can't do what was requested - */ - if (PRTE_ERR_NETWORK_NOT_PARSEABLE == rc) { - pmix_show_help("help-oob-tcp.txt", "not-parseable", true); - PMIX_ARGV_FREE_COMPAT(interfaces); - return PRTE_ERR_BAD_PARAM; - } - /* if we are including, then ignore this if not present */ - if (including) { - if (PMIX_SUCCESS != rc) { - pmix_output_verbose(20, prte_oob_base_framework.framework_output, - "%s oob:tcp:init rejecting interface %s (not in include list)", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), selected_interface->if_name); - continue; - } - } else { - /* we are excluding, so ignore if present */ - if (PMIX_SUCCESS == rc) { - pmix_output_verbose(20, prte_oob_base_framework.framework_output, - "%s oob:tcp:init rejecting interface %s (in exclude list)", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), selected_interface->if_name); - continue; - } - } - } - - /* Refs ticket #3019 - * it would probably be worthwhile to print out a warning if PRRTE detects multiple - * IP interfaces that are "up" on the same subnet (because that's a Bad Idea). Note - * that we should only check for this after applying the relevant include/exclude - * list MCA params. If we detect redundant ports, we can also automatically ignore - * them so that applications won't hang. - */ - - /* add this address to our connections */ - if (AF_INET == my_ss.ss_family) { - pmix_output_verbose(10, prte_oob_base_framework.framework_output, - "%s oob:tcp:init adding %s to our list of %s connections", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - pmix_net_get_hostname((struct sockaddr *) &my_ss), - (AF_INET == my_ss.ss_family) ? "V4" : "V6"); - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_mca_oob_tcp_component.ipv4conns, - pmix_net_get_hostname((struct sockaddr *) &my_ss)); - } else if (AF_INET6 == my_ss.ss_family) { -#if PRTE_ENABLE_IPV6 - pmix_output_verbose(10, prte_oob_base_framework.framework_output, - "%s oob:tcp:init adding %s to our list of %s connections", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - pmix_net_get_hostname((struct sockaddr *) &my_ss), - (AF_INET == my_ss.ss_family) ? "V4" : "V6"); - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_mca_oob_tcp_component.ipv6conns, - pmix_net_get_hostname((struct sockaddr *) &my_ss)); -#endif // PRTE_ENABLE_IPV6 - } else { - pmix_output_verbose(10, prte_oob_base_framework.framework_output, - "%s oob:tcp:init ignoring %s from out list of connections", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - pmix_net_get_hostname((struct sockaddr *) &my_ss)); - continue; - } - copied_interface = PMIX_NEW(pmix_pif_t); - if (NULL == copied_interface) { - return PRTE_ERR_OUT_OF_RESOURCE; - } - pmix_string_copy(copied_interface->if_name, selected_interface->if_name, PMIX_IF_NAMESIZE); - copied_interface->if_index = i; - copied_interface->if_kernel_index = kindex; - copied_interface->af_family = my_ss.ss_family; - copied_interface->if_flags = selected_interface->if_flags; - copied_interface->if_speed = selected_interface->if_speed; - memcpy(&copied_interface->if_addr, &selected_interface->if_addr, - sizeof(struct sockaddr_storage)); - copied_interface->if_mask = selected_interface->if_mask; - /* If bandwidth is not found, set to arbitrary non zero value */ - copied_interface->if_bandwidth = selected_interface->if_bandwidth > 0 - ? selected_interface->if_bandwidth - : 1; - memcpy(&copied_interface->if_mac, &selected_interface->if_mac, - sizeof(copied_interface->if_mac)); - copied_interface->ifmtu = selected_interface->ifmtu; - /* Add the if_mask to the list */ - sprintf(string, "%d", selected_interface->if_mask); - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_mca_oob_tcp_component.if_masks, string); - pmix_list_append(&prte_mca_oob_tcp_component.local_ifs, &(copied_interface->super)); - } - - if (0 == PMIX_ARGV_COUNT_COMPAT(prte_mca_oob_tcp_component.ipv4conns) -#if PRTE_ENABLE_IPV6 - && 0 == PMIX_ARGV_COUNT_COMPAT(prte_mca_oob_tcp_component.ipv6conns) -#endif - ) { - return PRTE_ERR_NOT_AVAILABLE; - } - - return PRTE_SUCCESS; -} - -/* Start all modules */ -static int component_startup(void) -{ - int rc = PRTE_SUCCESS; - - pmix_output_verbose(2, prte_oob_base_framework.framework_output, "%s TCP STARTUP", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); - - /* if we are a daemon/HNP, - * then it is possible that someone else may initiate a - * connection to us. In these cases, we need to start the - * listening thread/event. Otherwise, we will be the one - * initiating communication, and there is no need for - * a listener */ - if (PRTE_PROC_IS_MASTER || PRTE_PROC_IS_DAEMON) { - if (PRTE_SUCCESS != (rc = prte_oob_tcp_start_listening())) { - PRTE_ERROR_LOG(rc); - } - } - - return rc; -} - -static void component_shutdown(void) -{ - int i = 0, rc; - - pmix_output_verbose(2, prte_oob_base_framework.framework_output, "%s TCP SHUTDOWN", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); - - if (PRTE_PROC_IS_MASTER && prte_mca_oob_tcp_component.listen_thread_active) { - prte_mca_oob_tcp_component.listen_thread_active = false; - /* tell the thread to exit */ - rc = write(prte_mca_oob_tcp_component.stop_thread[1], &i, sizeof(int)); - if (0 < rc) { - pmix_thread_join(&prte_mca_oob_tcp_component.listen_thread, NULL); - } - - close(prte_mca_oob_tcp_component.stop_thread[0]); - close(prte_mca_oob_tcp_component.stop_thread[1]); - - } else { - pmix_output_verbose(2, prte_oob_base_framework.framework_output, "no hnp or not active"); - } - - /* cleanup listen event list */ - PMIX_LIST_DESTRUCT(&prte_mca_oob_tcp_component.listeners); - - pmix_output_verbose(2, prte_oob_base_framework.framework_output, "%s TCP SHUTDOWN done", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); -} - -static int component_send(prte_rml_send_t *msg) -{ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s oob:tcp:send_nb to peer %s:%d seq = %d", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&msg->dst), msg->tag, - msg->seq_num); - - /* The module will first see if it knows - * of a way to send the data to the target, and then - * attempt to send the data. It will call the cbfunc - * with the status upon completion - if it can't do it for - * some reason, it will pass the error to our fn below so - * it can do something about it - */ - prte_oob_tcp_module.send_nb(msg); - return PRTE_SUCCESS; -} - -static char *component_get_addr(void) -{ - char *cptr = NULL, *tmp, *tp, *tm; - - if (!prte_mca_oob_tcp_component.disable_ipv4_family && - NULL != prte_mca_oob_tcp_component.ipv4conns) { - tmp = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.ipv4conns, ','); - tp = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.ipv4ports, ','); - tm = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.if_masks, ','); - pmix_asprintf(&cptr, "tcp://%s:%s:%s", tmp, tp, tm); - free(tmp); - free(tp); - free(tm); - } -#if PRTE_ENABLE_IPV6 - if (!prte_mca_oob_tcp_component.disable_ipv6_family && NULL != prte_mca_oob_tcp_component.ipv6conns) { - char *tmp2; - - /* Fixes #2498 - * RFC 3986, section 3.2.2 - * The notation in that case is to encode the IPv6 IP number in square brackets: - * "http://[2001:db8:1f70::999:de8:7648:6e8]:100/" - * A host identified by an Internet Protocol literal address, version 6 [RFC3513] - * or later, is distinguished by enclosing the IP literal within square brackets. - * This is the only place where square bracket characters are allowed in the URI - * syntax. In anticipation of future, as-yet-undefined IP literal address formats, - * an implementation may use an optional version flag to indicate such a format - * explicitly rather than rely on heuristic determination. - */ - tmp = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.ipv6conns, ','); - tp = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.ipv6ports, ','); - tm = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.if_masks, ','); - if (NULL == cptr) { - /* no ipv4 stuff */ - pmix_asprintf(&cptr, "tcp6://[%s]:%s:%s", tmp, tp, tm); - } else { - pmix_asprintf(&tmp2, "%s;tcp6://[%s]:%s:%s", cptr, tmp, tp, tm); - free(cptr); - cptr = tmp2; - } - free(tmp); - free(tp); - free(tm); - } -#endif // PRTE_ENABLE_IPV6 - - /* return our uri */ - return cptr; -} - -/* the host in this case is always in "dot" notation, and - * thus we do not need to do a DNS lookup to convert it */ -static int parse_uri(const uint16_t af_family, const char *host, const char *port, - struct sockaddr_storage *inaddr) -{ - struct sockaddr_in *in; - - if (AF_INET == af_family) { - memset(inaddr, 0, sizeof(struct sockaddr_in)); - in = (struct sockaddr_in *) inaddr; - in->sin_family = AF_INET; - in->sin_addr.s_addr = inet_addr(host); - if (in->sin_addr.s_addr == INADDR_NONE) { - return PRTE_ERR_BAD_PARAM; - } - ((struct sockaddr_in *) inaddr)->sin_port = htons(atoi(port)); - } -#if PRTE_ENABLE_IPV6 - else if (AF_INET6 == af_family) { - struct sockaddr_in6 *in6; - memset(inaddr, 0, sizeof(struct sockaddr_in6)); - in6 = (struct sockaddr_in6 *) inaddr; - - if (0 == inet_pton(AF_INET6, host, (void *) &in6->sin6_addr)) { - pmix_output(0, "oob_tcp_parse_uri: Could not convert %s\n", host); - return PRTE_ERR_BAD_PARAM; - } - in6->sin6_family = AF_INET6; - in6->sin6_port = htons(atoi(port)); - } -#endif - else { - return PRTE_ERR_NOT_SUPPORTED; - } - return PRTE_SUCCESS; -} - -static int component_set_addr(pmix_proc_t *peer, char **uris) -{ - char **addrs, **masks, *hptr; - char *tcpuri = NULL, *host, *ports, *masks_string; - int i, j, rc; - uint16_t af_family = AF_UNSPEC; - uint64_t ui64; - bool found; - prte_oob_tcp_peer_t *pr; - prte_oob_tcp_addr_t *maddr; - - memcpy(&ui64, (char *) peer, sizeof(uint64_t)); - /* cycle across component parts and see if one belongs to us */ - found = false; - - for (i = 0; NULL != uris[i]; i++) { - tcpuri = strdup(uris[i]); - if (NULL == tcpuri) { - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s oob:tcp: out of memory", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); - continue; - } - if (0 == strncmp(uris[i], "tcp:", 4)) { - af_family = AF_INET; - host = tcpuri + strlen("tcp://"); - } else if (0 == strncmp(uris[i], "tcp6:", 5)) { -#if PRTE_ENABLE_IPV6 - af_family = AF_INET6; - host = tcpuri + strlen("tcp6://"); -#else // PRTE_ENABLE_IPV6 - /* we don't support this connection type */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s oob:tcp: address %s not supported", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), uris[i]); - free(tcpuri); - continue; -#endif // PRTE_ENABLE_IPV6 - } else { - /* not one of ours */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s oob:tcp: ignoring address %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), uris[i]); - free(tcpuri); - continue; - } - - /* this one is ours - record the peer */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s oob:tcp: working peer %s address %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(peer), uris[i]); - - /* separate the mask from the network addrs */ - masks_string = strrchr(tcpuri, ':'); - if (NULL == masks_string) { - PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); - free(tcpuri); - continue; - } - *masks_string = '\0'; - masks_string++; - masks = PMIX_ARGV_SPLIT_COMPAT(masks_string, ','); - - /* separate the ports from the network addrs */ - ports = strrchr(tcpuri, ':'); - if (NULL == ports) { - PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); - free(tcpuri); - continue; - } - *ports = '\0'; - ports++; - - /* split the addrs */ - /* if this is a tcp6 connection, the first one will have a '[' - * at the beginning of it, and the last will have a ']' at the - * end - we need to remove those extra characters - */ - hptr = host; -#if PRTE_ENABLE_IPV6 - if (AF_INET6 == af_family) { - if ('[' == host[0]) { - hptr = &host[1]; - } - if (']' == host[strlen(host) - 1]) { - host[strlen(host) - 1] = '\0'; - } - } -#endif // PRTE_ENABLE_IPV6 - addrs = PMIX_ARGV_SPLIT_COMPAT(hptr, ','); - - /* cycle across the provided addrs */ - for (j = 0; NULL != addrs[j]; j++) { - if (NULL == masks[j]) { - /* Missing mask information */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s oob:tcp: uri missing mask information.", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); - return PRTE_ERR_TAKE_NEXT_OPTION; - } - /* if they gave us "localhost", then just take the first conn on our list */ - if (0 == strcasecmp(addrs[j], "localhost")) { -#if PRTE_ENABLE_IPV6 - if (AF_INET6 == af_family) { - if (NULL == prte_mca_oob_tcp_component.ipv6conns - || NULL == prte_mca_oob_tcp_component.ipv6conns[0]) { - continue; - } - host = prte_mca_oob_tcp_component.ipv6conns[0]; - } else { -#endif // PRTE_ENABLE_IPV6 - if (NULL == prte_mca_oob_tcp_component.ipv4conns - || NULL == prte_mca_oob_tcp_component.ipv4conns[0]) { - continue; - } - host = prte_mca_oob_tcp_component.ipv4conns[0]; -#if PRTE_ENABLE_IPV6 - } -#endif - } else { - host = addrs[j]; - } - - if (NULL == (pr = prte_oob_tcp_peer_lookup(peer))) { - pr = PMIX_NEW(prte_oob_tcp_peer_t); - PMIX_XFER_PROCID(&pr->name, peer); - pmix_output_verbose(20, prte_oob_base_framework.framework_output, - "%s SET_PEER ADDING PEER %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(peer)); - pmix_list_append(&prte_mca_oob_tcp_component.peers, &pr->super); - } - - maddr = PMIX_NEW(prte_oob_tcp_addr_t); - ((struct sockaddr_storage *) &(maddr->addr))->ss_family = af_family; - if (PRTE_SUCCESS - != (rc = parse_uri(af_family, host, ports, - (struct sockaddr_storage *) &(maddr->addr)))) { - PRTE_ERROR_LOG(rc); - PMIX_RELEASE(maddr); - pmix_list_remove_item(&prte_mca_oob_tcp_component.peers, &pr->super); - PMIX_RELEASE(pr); - return PRTE_ERR_TAKE_NEXT_OPTION; - } - maddr->if_mask = atoi(masks[j]); - - pmix_output_verbose(20, prte_oob_base_framework.framework_output, - "%s set_peer: peer %s is listening on net %s port %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(peer), - (NULL == host) ? "NULL" : host, (NULL == ports) ? "NULL" : ports); - pmix_list_append(&pr->addrs, &maddr->super); - - found = true; - } - PMIX_ARGV_FREE_COMPAT(addrs); - free(tcpuri); - } - if (found) { - /* indicate that this peer is addressable by this component */ - return PRTE_SUCCESS; - } - - /* otherwise indicate that it is not addressable by us */ - return PRTE_ERR_TAKE_NEXT_OPTION; -} - -static bool component_is_reachable(pmix_proc_t *peer) -{ - PRTE_HIDE_UNUSED_PARAMS(peer); - - /* assume we can reach the hop - the module will tell us if it can't - * when we try to send the first time, and then we'll correct it */ - return true; -} - -void prte_mca_oob_tcp_component_set_module(int fd, short args, void *cbdata) -{ - prte_oob_tcp_peer_op_t *pop = (prte_oob_tcp_peer_op_t *) cbdata; - prte_oob_base_peer_t *bpr; - PRTE_HIDE_UNUSED_PARAMS(fd, args); - - PMIX_ACQUIRE_OBJECT(pop); - - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s tcp:set_module called for peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - PRTE_NAME_PRINT(&pop->peer)); - - /* make sure the OOB knows that we can reach this peer - we - * are in the same event base as the OOB base, so we can - * directly access its storage - */ - bpr = prte_oob_base_get_peer(&pop->peer); - if (NULL == bpr) { - bpr = PMIX_NEW(prte_oob_base_peer_t); - PMIX_XFER_PROCID(&bpr->name, &pop->peer); - pmix_list_append(&prte_oob_base.peers, &bpr->super); - } - pmix_bitmap_set_bit(&bpr->addressable, prte_mca_oob_tcp_component.super.idx); - bpr->component = &prte_mca_oob_tcp_component.super; - - PMIX_RELEASE(pop); -} - -void prte_mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata) -{ - prte_oob_tcp_peer_op_t *pop = (prte_oob_tcp_peer_op_t *) cbdata; - prte_oob_base_peer_t *bpr; - PRTE_HIDE_UNUSED_PARAMS(fd, args); - - PMIX_ACQUIRE_OBJECT(pop); - - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s tcp:lost connection called for peer %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&pop->peer)); - - /* Mark that we no longer support this peer */ - bpr = prte_oob_base_get_peer(&pop->peer); - if (NULL != bpr) { - pmix_bitmap_clear_bit(&bpr->addressable, prte_mca_oob_tcp_component.super.idx); - pmix_list_remove_item(&prte_oob_base.peers, &bpr->super); - PMIX_RELEASE(bpr); - } - - if (!prte_finalizing) { - /* activate the proc state */ - if (PRTE_SUCCESS != prte_rml_route_lost(pop->peer.rank)) { - PRTE_ACTIVATE_PROC_STATE(&pop->peer, PRTE_PROC_STATE_LIFELINE_LOST); - } else { - PRTE_ACTIVATE_PROC_STATE(&pop->peer, PRTE_PROC_STATE_COMM_FAILED); - } - } - PMIX_RELEASE(pop); -} - -void prte_mca_oob_tcp_component_no_route(int fd, short args, void *cbdata) -{ - prte_oob_tcp_msg_error_t *mop = (prte_oob_tcp_msg_error_t *) cbdata; - prte_oob_base_peer_t *bpr; - PRTE_HIDE_UNUSED_PARAMS(fd, args); - - PMIX_ACQUIRE_OBJECT(mop); - - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s tcp:no route called for peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - PRTE_NAME_PRINT(&mop->hop)); - - /* mark that we cannot reach this hop */ - bpr = prte_oob_base_get_peer(&mop->hop); - if (NULL == bpr) { - bpr = PMIX_NEW(prte_oob_base_peer_t); - PMIX_XFER_PROCID(&bpr->name, &mop->hop); - } - pmix_bitmap_clear_bit(&bpr->addressable, prte_mca_oob_tcp_component.super.idx); - - /* report the error back to the OOB and let it try other components - * or declare a problem - */ - mop->rmsg->retries++; - /* activate the OOB send state */ - PRTE_OOB_SEND(mop->rmsg); - - PMIX_RELEASE(mop); -} - -void prte_mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata) -{ - prte_oob_tcp_msg_error_t *mop = (prte_oob_tcp_msg_error_t *) cbdata; - prte_rml_send_t *snd; - prte_oob_base_peer_t *bpr; - pmix_status_t rc; - pmix_byte_object_t bo; - PRTE_HIDE_UNUSED_PARAMS(fd, args); - - PMIX_ACQUIRE_OBJECT(mop); - - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s tcp:unknown hop called for peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - PRTE_NAME_PRINT(&mop->hop)); - - if (prte_finalizing || prte_abnormal_term_ordered) { - /* just ignore the problem */ - PMIX_RELEASE(mop); - return; - } - - /* mark that this component cannot reach this hop */ - bpr = prte_oob_base_get_peer(&mop->hop); - if (NULL == bpr) { - /* the overall OOB has no knowledge of this hop. Only - * way this could happen is if the peer contacted us - * via this component, and it wasn't entered into the - * OOB framework hash table. We have no way of knowing - * what to do next, so just output an error message and - * abort */ - pmix_output(0, - "%s ERROR: message to %s requires routing and the OOB has no knowledge of the " - "reqd hop %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&mop->snd->hdr.dst), - PRTE_NAME_PRINT(&mop->hop)); - PRTE_ACTIVATE_PROC_STATE(&mop->hop, PRTE_PROC_STATE_UNABLE_TO_SEND_MSG); - PMIX_RELEASE(mop); - return; - } - pmix_bitmap_clear_bit(&bpr->addressable, prte_mca_oob_tcp_component.super.idx); - - /* mark that this component cannot reach this destination either */ - bpr = prte_oob_base_get_peer(&mop->snd->hdr.dst); - if (NULL == bpr) { - pmix_output( - 0, - "%s ERROR: message to %s requires routing and the OOB has no knowledge of this process", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&mop->snd->hdr.dst)); - PRTE_ACTIVATE_PROC_STATE(&mop->hop, PRTE_PROC_STATE_UNABLE_TO_SEND_MSG); - PMIX_RELEASE(mop); - return; - } - pmix_bitmap_clear_bit(&bpr->addressable, prte_mca_oob_tcp_component.super.idx); - - /* post the message to the OOB so it can see - * if another component can transfer it - */ - MCA_OOB_TCP_HDR_NTOH(&mop->snd->hdr); - snd = PMIX_NEW(prte_rml_send_t); - snd->retries = mop->rmsg->retries + 1; - PMIX_XFER_PROCID(&snd->dst, &mop->snd->hdr.dst); - PMIX_XFER_PROCID(&snd->origin, &mop->snd->hdr.origin); - snd->tag = mop->snd->hdr.tag; - snd->seq_num = mop->snd->hdr.seq_num; - bo.bytes = mop->snd->data; - bo.size = mop->snd->hdr.nbytes; - PMIX_DATA_BUFFER_CREATE(snd->dbuf); - rc = PMIx_Data_load(snd->dbuf, &bo); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - } - snd->cbfunc = NULL; - snd->cbdata = NULL; - /* activate the OOB send state */ - PRTE_OOB_SEND(snd); - /* protect the data */ - mop->snd->data = NULL; - - PMIX_RELEASE(mop); -} - -void prte_mca_oob_tcp_component_failed_to_connect(int fd, short args, void *cbdata) -{ - prte_oob_tcp_peer_op_t *pop = (prte_oob_tcp_peer_op_t *) cbdata; - PRTE_HIDE_UNUSED_PARAMS(fd, args); - - PMIX_ACQUIRE_OBJECT(pop); - - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s tcp:failed_to_connect called for peer %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&pop->peer)); - - /* if we are terminating, then don't attempt to reconnect */ - if (prte_prteds_term_ordered || prte_finalizing || prte_abnormal_term_ordered) { - PMIX_RELEASE(pop); - return; - } - - /* activate the proc state */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s tcp:failed_to_connect unable to reach peer %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&pop->peer)); - - PRTE_ACTIVATE_PROC_STATE(&pop->peer, PRTE_PROC_STATE_FAILED_TO_CONNECT); - PMIX_RELEASE(pop); -} - - -/* - * Go through a list of argv; if there are any subnet specifications - * (a.b.c.d/e), resolve them to an interface name (Currently only - * supporting IPv4). If unresolvable, warn and remove. - */ -static char **split_and_resolve(char **orig_str, char *name) -{ - pmix_pif_t *selected_interface; - int i, n, ret, match_count, interface_count; - char **argv, **interfaces, *str, *tmp; - char if_name[IF_NAMESIZE]; - struct sockaddr_storage argv_inaddr, if_inaddr; - uint32_t argv_prefix; - - /* Sanity check */ - if (NULL == orig_str || NULL == *orig_str) { - return NULL; - } - - argv = PMIX_ARGV_SPLIT_COMPAT(*orig_str, ','); - if (NULL == argv) { - return NULL; - } - interface_count = 0; - interfaces = NULL; - for (i = 0; NULL != argv[i]; ++i) { - if (isalpha(argv[i][0])) { - /* This is an interface name. If not already in the interfaces array, add it */ - for (n = 0; n < interface_count; n++) { - if (0 == strcmp(argv[i], interfaces[n])) { - break; - } - } - if (n == interface_count) { - pmix_output_verbose(20, - prte_oob_base_framework.framework_output, - "oob:tcp: Using interface: %s ", argv[i]); - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&interfaces, argv[i]); - ++interface_count; - } - continue; - } - - /* Found a subnet notation. Convert it to an IP - address/netmask. Get the prefix first. */ - argv_prefix = 0; - tmp = strdup(argv[i]); - str = strchr(argv[i], '/'); - if (NULL == str) { - pmix_show_help("help-oob-tcp.txt", "invalid if_inexclude", - true, name, prte_process_info.nodename, - tmp, "Invalid specification (missing \"/\")"); - free(argv[i]); - free(tmp); - continue; - } - *str = '\0'; - argv_prefix = atoi(str + 1); - - /* Now convert the IPv4 address */ - ((struct sockaddr*) &argv_inaddr)->sa_family = AF_INET; - ret = inet_pton(AF_INET, argv[i], - &((struct sockaddr_in*) &argv_inaddr)->sin_addr); - free(argv[i]); - - if (1 != ret) { - pmix_show_help("help-oob-tcp.txt", "invalid if_inexclude", - true, name, prte_process_info.nodename, tmp, - "Invalid specification (inet_pton() failed)"); - free(tmp); - continue; - } - pmix_output_verbose(20, prte_oob_base_framework.framework_output, - "%s oob:tcp: Searching for %s address+prefix: %s / %u", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - name, - pmix_net_get_hostname((struct sockaddr*) &argv_inaddr), - argv_prefix); - - /* Go through all interfaces and see if we can find a match */ - match_count = 0; - PMIX_LIST_FOREACH(selected_interface, &pmix_if_list, pmix_pif_t) { - pmix_ifindextoaddr(selected_interface->if_kernel_index, - (struct sockaddr*) &if_inaddr, - sizeof(if_inaddr)); - if (pmix_net_samenetwork((struct sockaddr_storage*) &argv_inaddr, - (struct sockaddr_storage*) &if_inaddr, - argv_prefix)) { - /* We found a match. If it's not already in the interfaces array, - add it. If it's already in the array, treat it as a match */ - match_count = match_count + 1; - pmix_ifindextoname(selected_interface->if_kernel_index, if_name, sizeof(if_name)); - for (n = 0; n < interface_count; n++) { - if (0 == strcmp(if_name, interfaces[n])) { - break; - } - } - if (n == interface_count) { - pmix_output_verbose(20, - prte_oob_base_framework.framework_output, - "oob:tcp: Found match: %s (%s)", - pmix_net_get_hostname((struct sockaddr*) &if_inaddr), - if_name); - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&interfaces, if_name); - ++interface_count; - } - } - } - /* If we didn't find a match, keep trying */ - if (0 == match_count) { - pmix_show_help("help-oob-tcp.txt", "invalid if_inexclude", - true, name, prte_process_info.nodename, tmp, - "Did not find interface matching this subnet"); - free(tmp); - continue; - } - - free(tmp); - } - - /* Mark the end of the interface name array with NULL */ - if (NULL != interfaces) { - interfaces[interface_count] = NULL; - } - free(argv); - free(*orig_str); - *orig_str = PMIX_ARGV_JOIN_COMPAT(interfaces, ','); - return interfaces; -} - -/* OOB TCP Class instances */ - -static void peer_cons(prte_oob_tcp_peer_t *peer) -{ - peer->auth_method = NULL; - peer->sd = -1; - PMIX_CONSTRUCT(&peer->addrs, pmix_list_t); - peer->active_addr = NULL; - peer->state = MCA_OOB_TCP_UNCONNECTED; - peer->num_retries = 0; - PMIX_CONSTRUCT(&peer->send_queue, pmix_list_t); - peer->send_msg = NULL; - peer->recv_msg = NULL; - peer->send_ev_active = false; - peer->recv_ev_active = false; - peer->timer_ev_active = false; -} -static void peer_des(prte_oob_tcp_peer_t *peer) -{ - if (NULL != peer->auth_method) { - free(peer->auth_method); - } - if (peer->send_ev_active) { - prte_event_del(&peer->send_event); - } - if (peer->recv_ev_active) { - prte_event_del(&peer->recv_event); - } - if (peer->timer_ev_active) { - prte_event_del(&peer->timer_event); - } - if (0 <= peer->sd) { - pmix_output_verbose(2, prte_oob_base_framework.framework_output, "%s CLOSING SOCKET %d", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), peer->sd); - CLOSE_THE_SOCKET(peer->sd); - } - PMIX_LIST_DESTRUCT(&peer->addrs); - PMIX_LIST_DESTRUCT(&peer->send_queue); -} -PMIX_CLASS_INSTANCE(prte_oob_tcp_peer_t, pmix_list_item_t, peer_cons, peer_des); - -static void padd_cons(prte_oob_tcp_addr_t *ptr) -{ - memset(&ptr->addr, 0, sizeof(ptr->addr)); - ptr->retries = 0; - ptr->state = MCA_OOB_TCP_UNCONNECTED; -} -PMIX_CLASS_INSTANCE(prte_oob_tcp_addr_t, pmix_list_item_t, padd_cons, NULL); - -static void pop_cons(prte_oob_tcp_peer_op_t *pop) -{ - pop->net = NULL; - pop->port = NULL; -} -static void pop_des(prte_oob_tcp_peer_op_t *pop) -{ - if (NULL != pop->net) { - free(pop->net); - } - if (NULL != pop->port) { - free(pop->port); - } -} -PMIX_CLASS_INSTANCE(prte_oob_tcp_peer_op_t, pmix_object_t, pop_cons, pop_des); - -PMIX_CLASS_INSTANCE(prte_oob_tcp_msg_op_t, pmix_object_t, NULL, NULL); - -PMIX_CLASS_INSTANCE(prte_oob_tcp_conn_op_t, pmix_object_t, NULL, NULL); - -static void nicaddr_cons(prte_oob_tcp_nicaddr_t *ptr) -{ - ptr->af_family = PF_UNSPEC; - memset(&ptr->addr, 0, sizeof(ptr->addr)); -} -PMIX_CLASS_INSTANCE(prte_oob_tcp_nicaddr_t, pmix_list_item_t, nicaddr_cons, NULL); diff --git a/src/mca/oob/tcp/oob_tcp_component.h b/src/mca/oob/tcp/oob_tcp_component.h deleted file mode 100644 index 9131e100fb..0000000000 --- a/src/mca/oob/tcp/oob_tcp_component.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2019 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights - * reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * Copyright (c) 2023 Triad National Security, LLC. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef _MCA_OOB_TCP_COMPONENT_H_ -#define _MCA_OOB_TCP_COMPONENT_H_ - -#include "prte_config.h" - -#ifdef HAVE_SYS_TIME_H -# include -#endif - -#include "src/include/prte_stdatomic.h" -#include "src/class/pmix_bitmap.h" -#include "src/class/pmix_list.h" -#include "src/class/pmix_pointer_array.h" -#include "src/event/event-internal.h" - -#include "oob_tcp.h" -#include "src/mca/oob/oob.h" - -/** - * OOB TCP Component - */ -typedef struct { - prte_oob_base_component_t super; /**< base OOB component */ - uint32_t addr_count; /**< total number of addresses */ - int num_links; /**< number of logical links per physical device */ - int max_retries; /**< max number of retries before declaring peer gone */ - pmix_list_t events; /**< events for monitoring connections */ - int peer_limit; /**< max size of tcp peer cache */ - pmix_list_t peers; // connection addresses for peers - - /* Port specifications */ - int tcp_sndbuf; /**< socket send buffer size */ - int tcp_rcvbuf; /**< socket recv buffer size */ - - /* IPv4 support */ - bool disable_ipv4_family; /**< disable this AF */ - char **tcp_static_ports; /**< Static ports - IPV4 */ - char **tcp_dyn_ports; /**< Dynamic ports - IPV4 */ - char **ipv4conns; - char **ipv4ports; - - /* IPv6 support */ - bool disable_ipv6_family; /**< disable this AF */ - char **tcp6_static_ports; /**< Static ports - IPV6 */ - char **tcp6_dyn_ports; /**< Dynamic ports - IPV6 */ - char **ipv6conns; - char **ipv6ports; - - /* connection support */ - pmix_list_t local_ifs; /**< prte list of local pmix_pif_t interfaces */ - char **if_masks; - char *my_uri; /**< uri for connecting to the TCP module */ - int num_hnp_ports; /**< number of ports the HNP should listen on */ - pmix_list_t listeners; /**< List of sockets being monitored by event or thread */ - pmix_thread_t listen_thread; /**< handle to the listening thread */ - prte_atomic_bool_t listen_thread_active; - struct timeval listen_thread_tv; /**< Timeout when using listen thread */ - int stop_thread[2]; /**< pipe used to exit the listen thread */ - int keepalive_probes; /**< number of keepalives that can be missed before declaring error */ - int keepalive_time; /**< idle time in seconds before starting to send keepalives */ - int keepalive_intvl; /**< time between keepalives, in seconds */ - int retry_delay; /**< time to wait before retrying connection */ - int max_recon_attempts; /**< maximum number of times to attempt connect before giving up (-1 for - never) */ -} prte_mca_oob_tcp_component_t; - -PRTE_MODULE_EXPORT extern prte_mca_oob_tcp_component_t prte_mca_oob_tcp_component; - -PRTE_MODULE_EXPORT void prte_mca_oob_tcp_component_set_module(int fd, short args, void *cbdata); -PRTE_MODULE_EXPORT void prte_mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata); -PRTE_MODULE_EXPORT void prte_mca_oob_tcp_component_failed_to_connect(int fd, short args, void *cbdata); -PRTE_MODULE_EXPORT void prte_mca_oob_tcp_component_no_route(int fd, short args, void *cbdata); -PRTE_MODULE_EXPORT void prte_mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata); - -#endif /* _MCA_OOB_TCP_COMPONENT_H_ */ diff --git a/src/mca/plm/ssh/plm_ssh_module.c b/src/mca/plm/ssh/plm_ssh_module.c index 3608ae64a7..b14e38a21d 100644 --- a/src/mca/plm/ssh/plm_ssh_module.c +++ b/src/mca/plm/ssh/plm_ssh_module.c @@ -17,7 +17,7 @@ * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -88,7 +88,6 @@ #include "src/mca/ess/base/base.h" #include "src/mca/ess/ess.h" #include "src/mca/grpcomm/base/base.h" -#include "src/mca/oob/base/base.h" #include "src/mca/rmaps/rmaps.h" #include "src/rml/rml_contact.h" #include "src/rml/rml.h" @@ -652,11 +651,9 @@ static int setup_launch(int *argcptr, char ***argvptr, char *nodename, int *node * uri of their parent (me) */ if (!prte_mca_plm_ssh_component.no_tree_spawn) { pmix_argv_append(&argc, &argv, "--tree-spawn"); - prte_oob_base_get_addr(¶m); pmix_argv_append(&argc, &argv, "--prtemca"); pmix_argv_append(&argc, &argv, "prte_parent_uri"); - pmix_argv_append(&argc, &argv, param); - free(param); + pmix_argv_append(&argc, &argv, prte_process_info.my_uri); } /* protect the params */ diff --git a/src/prted/prted_comm.c b/src/prted/prted_comm.c index c78fa51d88..054740afc8 100644 --- a/src/prted/prted_comm.c +++ b/src/prted/prted_comm.c @@ -65,7 +65,6 @@ #include "src/mca/grpcomm/base/base.h" #include "src/mca/iof/base/base.h" #include "src/mca/odls/base/base.h" -#include "src/mca/oob/base/base.h" #include "src/mca/plm/base/base.h" #include "src/mca/plm/plm.h" #include "src/mca/rmaps/rmaps_types.h" diff --git a/src/rml/Makefile.am b/src/rml/Makefile.am index 5cc9f3ca0e..c864b2c2e4 100644 --- a/src/rml/Makefile.am +++ b/src/rml/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2019 Intel, Inc. All rights reserved. -# Copyright (c) 2022 Nanook Consulting. All rights reserved. +# Copyright (c) 2022-2024 Nanook Consulting All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -32,3 +32,5 @@ libprrte_la_SOURCES += \ rml/rml_base_contact.c \ rml/rml_base_msg_handlers.c \ rml/routed_radix.c + +include rml/oob/Makefile.am diff --git a/src/rml/oob/Makefile.am b/src/rml/oob/Makefile.am new file mode 100644 index 0000000000..25704f9b3a --- /dev/null +++ b/src/rml/oob/Makefile.am @@ -0,0 +1,46 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2012-2013 Los Alamos National Security, LLC. +# All rights reserved +# Copyright (c) 2014-2020 Intel, Inc. All rights reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2022-2024 Nanook Consulting All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +dist_prtedata_DATA += \ + rml/oob/help-oob-base.txt \ + rml/oob/help-oob-tcp.txt + +headers += \ + rml/oob/oob.h \ + rml/oob/oob_tcp.h \ + rml/oob/oob_tcp_listener.h \ + rml/oob/oob_tcp_common.h \ + rml/oob/oob_tcp_connection.h \ + rml/oob/oob_tcp_sendrecv.h \ + rml/oob/oob_tcp_hdr.h \ + rml/oob/oob_tcp_peer.h + +libprrte_la_SOURCES += \ + rml/oob/oob_tcp_component.c \ + rml/oob/oob_tcp.c \ + rml/oob/oob_tcp_listener.c \ + rml/oob/oob_tcp_common.c \ + rml/oob/oob_tcp_connection.c \ + rml/oob/oob_tcp_sendrecv.c \ + rml/oob/oob_base_stubs.c diff --git a/src/mca/oob/base/help-oob-base.txt b/src/rml/oob/help-oob-base.txt similarity index 93% rename from src/mca/oob/base/help-oob-base.txt rename to src/rml/oob/help-oob-base.txt index 41ae1761d7..009fcf0e0e 100644 --- a/src/mca/oob/base/help-oob-base.txt +++ b/src/rml/oob/help-oob-base.txt @@ -11,6 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2014-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2024 Nanook Consulting All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/src/mca/oob/tcp/help-oob-tcp.txt b/src/rml/oob/help-oob-tcp.txt similarity index 98% rename from src/mca/oob/tcp/help-oob-tcp.txt rename to src/rml/oob/help-oob-tcp.txt index edbce3ef98..950599d810 100644 --- a/src/mca/oob/tcp/help-oob-tcp.txt +++ b/src/rml/oob/help-oob-tcp.txt @@ -12,6 +12,7 @@ # All rights reserved. # Copyright (c) 2014-2020 Intel, Inc. All rights reserved. # Copyright (c) 2015-2020 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2024 Nanook Consulting All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/src/mca/oob/base/base.h b/src/rml/oob/oob.h similarity index 65% rename from src/mca/oob/base/base.h rename to src/rml/oob/oob.h index c3f1f04142..b901997b4b 100644 --- a/src/mca/oob/base/base.h +++ b/src/rml/oob/oob.h @@ -15,7 +15,7 @@ * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -46,12 +46,11 @@ #include "src/class/pmix_hash_table.h" #include "src/class/pmix_list.h" #include "src/event/event-internal.h" +#include "src/include/prte_stdatomic.h" #include "src/util/pmix_printf.h" - -#include "src/mca/mca.h" #include "src/threads/pmix_threads.h" -#include "src/mca/oob/oob.h" +#include "src/rml/rml_types.h" BEGIN_C_DECLS @@ -59,26 +58,55 @@ BEGIN_C_DECLS * Convenience Typedef */ typedef struct { - char *include; - char *exclude; - pmix_list_t components; - pmix_list_t actives; + int output; + uint32_t addr_count; /**< total number of addresses */ + int num_links; /**< number of logical links per physical device */ + int max_retries; /**< max number of retries before declaring peer gone */ int max_uri_length; - pmix_list_t peers; + pmix_list_t events; /**< events for monitoring connections */ + int peer_limit; /**< max size of tcp peer cache */ + pmix_list_t peers; // connection addresses for peers + + /* Port specifications */ + int tcp_sndbuf; /**< socket send buffer size */ + int tcp_rcvbuf; /**< socket recv buffer size */ + + /* IPv4 support */ + bool disable_ipv4_family; /**< disable this AF */ + char **tcp_static_ports; /**< Static ports - IPV4 */ + char **tcp_dyn_ports; /**< Dynamic ports - IPV4 */ + char **ipv4conns; + char **ipv4ports; + + /* IPv6 support */ + bool disable_ipv6_family; /**< disable this AF */ + char **tcp6_static_ports; /**< Static ports - IPV6 */ + char **tcp6_dyn_ports; /**< Dynamic ports - IPV6 */ + char **ipv6conns; + char **ipv6ports; + + /* connection support */ + pmix_list_t local_ifs; /**< prte list of local pmix_pif_t interfaces */ + char **if_masks; + int num_hnp_ports; /**< number of ports the HNP should listen on */ + pmix_list_t listeners; /**< List of sockets being monitored by event or thread */ + pmix_thread_t listen_thread; /**< handle to the listening thread */ + prte_atomic_bool_t listen_thread_active; + struct timeval listen_thread_tv; /**< Timeout when using listen thread */ + int stop_thread[2]; /**< pipe used to exit the listen thread */ + int keepalive_probes; /**< number of keepalives that can be missed before declaring error */ + int keepalive_time; /**< idle time in seconds before starting to send keepalives */ + int keepalive_intvl; /**< time between keepalives, in seconds */ + int retry_delay; /**< time to wait before retrying connection */ + int max_recon_attempts; /**< maximum number of times to attempt connect before giving up (-1 for + never) */ } prte_oob_base_t; PRTE_EXPORT extern prte_oob_base_t prte_oob_base; -typedef struct { - pmix_list_item_t super; - pmix_proc_t name; - prte_oob_base_component_t *component; - pmix_bitmap_t addressable; -} prte_oob_base_peer_t; -PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_oob_base_peer_t); - /* MCA framework */ -PRTE_EXPORT extern pmix_mca_base_framework_t prte_oob_base_framework; -PRTE_EXPORT int prte_oob_base_select(void); +PRTE_EXPORT int prte_oob_open(void); +PRTE_EXPORT void prte_oob_close(void); +PRTE_EXPORT int prte_oob_register(void); /* Access the OOB internal functions via set of event-based macros * for inserting messages and other commands into the @@ -112,15 +140,13 @@ PRTE_EXPORT void prte_oob_base_send_nb(int fd, short args, void *cbdata); #define PRTE_OOB_SEND(m) \ do { \ prte_oob_send_t *prte_oob_send_cd; \ - pmix_output_verbose(1, prte_oob_base_framework.framework_output, "%s OOB_SEND: %s:%d", \ + pmix_output_verbose(1, prte_oob_base.output, "%s OOB_SEND: %s:%d", \ PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__); \ prte_oob_send_cd = PMIX_NEW(prte_oob_send_t); \ prte_oob_send_cd->msg = (m); \ PRTE_PMIX_THREADSHIFT(prte_oob_send_cd, prte_event_base, prte_oob_base_send_nb); \ } while (0) -PRTE_EXPORT prte_oob_base_peer_t *prte_oob_base_get_peer(const pmix_proc_t *pr); - /* During initial wireup, we can only transfer contact info on the daemon * command line. This limits what we can send to a string representation of * the actual contact info, which gets sent in a uri-like form. Not every diff --git a/src/rml/oob/oob_base_stubs.c b/src/rml/oob/oob_base_stubs.c new file mode 100644 index 0000000000..31b2edf409 --- /dev/null +++ b/src/rml/oob/oob_base_stubs.c @@ -0,0 +1,494 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. + * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "prte_config.h" +#include "constants.h" + +#include "src/pmix/pmix-internal.h" +#include "src/runtime/prte_globals.h" +#include "src/util/pmix_argv.h" +#include "src/util/pmix_output.h" +#include "src/util/pmix_printf.h" +#include "src/mca/errmgr/errmgr.h" +#include "src/rml/rml.h" +#include "src/mca/state/state.h" +#include "src/threads/pmix_threads.h" + +#include "src/rml/oob/oob.h" +#include "src/rml/oob/oob_tcp_common.h" +#include "src/rml/oob/oob_tcp_connection.h" +#include "src/rml/oob/oob_tcp_peer.h" + +static prte_oob_tcp_peer_t* process_uri(char *uri); + +void prte_oob_base_send_nb(int fd, short args, void *cbdata) +{ + prte_oob_send_t *cd = (prte_oob_send_t *) cbdata; + prte_rml_send_t *msg; + prte_oob_tcp_peer_t *peer; + pmix_proc_t hop; + int rc; + char *uri = NULL; + PRTE_HIDE_UNUSED_PARAMS(fd, args); + + PMIX_ACQUIRE_OBJECT(cd); + + /* done with this. release it now */ + msg = cd->msg; + PMIX_RELEASE(cd); + + pmix_output_verbose(5, prte_oob_base.output, + "%s oob:base:send to target %s - attempt %u", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&msg->dst), + msg->retries); + + /* don't try forever - if we have exceeded the number of retries, + * then report this message as undeliverable even if someone continues + * to think they could reach it */ + if (prte_rml_base.max_retries <= msg->retries) { + msg->status = PRTE_ERR_NO_PATH_TO_TARGET; + PRTE_RML_SEND_COMPLETE(msg); + return; + } + + /* do we have a route to this peer (could be direct)? */ + PMIX_LOAD_NSPACE(hop.nspace, PRTE_PROC_MY_NAME->nspace); + hop.rank = prte_rml_get_route(msg->dst.rank); + /* do we know this hop? */ + if (NULL == (peer = prte_oob_tcp_peer_lookup(&hop))) { + /* if this message is going to the HNP, send it direct */ + if (PRTE_PROC_MY_HNP->rank == msg->dst.rank) { + hop.rank = PRTE_PROC_MY_HNP->rank; + peer = prte_oob_tcp_peer_lookup(&hop); + if (NULL != peer) { + goto send; + } + } + // see if we know the contact info for it + PRTE_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_PROC_URI, &hop, (char **) &uri, PMIX_STRING); + if (PRTE_SUCCESS == rc && NULL != uri) { + peer = process_uri(uri); + if (NULL == peer) { + /* that is just plain wrong */ + pmix_output_verbose(5, prte_oob_base.output, + "%s oob:base:send addressee unknown %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + PRTE_NAME_PRINT(&msg->dst)); + + if (prte_prteds_term_ordered || prte_finalizing || prte_abnormal_term_ordered) { + /* just ignore the problem */ + PMIX_RELEASE(msg); + return; + } + PRTE_ACTIVATE_PROC_STATE(&hop, PRTE_PROC_STATE_UNABLE_TO_SEND_MSG); + PMIX_RELEASE(msg); + return; + } + } else { + // unable to send it + if (prte_prteds_term_ordered || prte_finalizing || prte_abnormal_term_ordered) { + /* just ignore the problem */ + PMIX_RELEASE(msg); + return; + } + PRTE_ACTIVATE_PROC_STATE(&hop, PRTE_PROC_STATE_UNABLE_TO_SEND_MSG); + PMIX_RELEASE(msg); + return; + } + } + +send: + pmix_output_verbose(2, prte_oob_base.output, + "%s:[%s:%d] processing send to peer %s:%d seq_num = %d via %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, + PRTE_NAME_PRINT(&msg->dst), msg->tag, msg->seq_num, + PRTE_NAME_PRINT(&peer->name)); + + /* add the msg to the hop's send queue */ + if (MCA_OOB_TCP_CONNECTED == peer->state) { + pmix_output_verbose(2, prte_oob_base.output, + "%s tcp:send_nb: already connected to %s - queueing for send", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); + MCA_OOB_TCP_QUEUE_SEND(msg, peer); + return; + } + + /* add the message to the queue for sending after the + * connection is formed + */ + MCA_OOB_TCP_QUEUE_PENDING(msg, peer); + + if (MCA_OOB_TCP_CONNECTING != peer->state && MCA_OOB_TCP_CONNECT_ACK != peer->state) { + /* we have to initiate the connection - again, we do not + * want to block while the connection is created. + * So throw us into an event that will create + * the connection via a mini-state-machine :-) + */ + pmix_output_verbose(2, prte_oob_base.output, + "%s tcp:send_nb: initiating connection to %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); + peer->state = MCA_OOB_TCP_CONNECTING; + PRTE_ACTIVATE_TCP_CONN_STATE(peer, prte_oob_tcp_peer_try_connect); + } +} + +/** + * Obtain a uri for initial connection purposes + * + * During initial wireup, we can only transfer contact info on the daemon + * command line. This limits what we can send to a string representation of + * the actual contact info, which gets sent in a uri-like form. Not every + * oob module can support this transaction, so this function will loop + * across all oob components/modules, letting each add to the uri string if + * it supports bootstrap operations. An error will be returned in the cbfunc + * if NO component can successfully provide a contact. + * + * Note: since there is a limit to what an OS will allow on a cmd line, we + * impose a limit on the length of the resulting uri via an MCA param. The + * default value of -1 implies unlimited - however, users with large numbers + * of interfaces on their nodes may wish to restrict the size. + */ +void prte_oob_base_get_addr(char **uri) +{ + char *final = NULL, *tmp; + char *cptr = NULL, *tp, *tm; + size_t len = 0; + pmix_status_t rc; + + /* start with our process name */ + rc = prte_util_convert_process_name_to_string(&final, PRTE_PROC_MY_NAME); + if (PRTE_SUCCESS != rc) { + PRTE_ERROR_LOG(rc); + *uri = NULL; + return; + } + len = strlen(final); + + if (!prte_oob_base.disable_ipv4_family && + NULL != prte_oob_base.ipv4conns) { + tmp = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.ipv4conns, ','); + tp = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.ipv4ports, ','); + tm = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.if_masks, ','); + pmix_asprintf(&cptr, "tcp://%s:%s:%s", tmp, tp, tm); + free(tmp); + free(tp); + free(tm); + } +#if PRTE_ENABLE_IPV6 + if (!prte_oob_base.disable_ipv6_family && + NULL != prte_oob_base.ipv6conns) { + char *tmp2; + + /* Fixes #2498 + * RFC 3986, section 3.2.2 + * The notation in that case is to encode the IPv6 IP number in square brackets: + * "http://[2001:db8:1f70::999:de8:7648:6e8]:100/" + * A host identified by an Internet Protocol literal address, version 6 [RFC3513] + * or later, is distinguished by enclosing the IP literal within square brackets. + * This is the only place where square bracket characters are allowed in the URI + * syntax. In anticipation of future, as-yet-undefined IP literal address formats, + * an implementation may use an optional version flag to indicate such a format + * explicitly rather than rely on heuristic determination. + */ + tmp = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.ipv6conns, ','); + tp = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.ipv6ports, ','); + tm = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.if_masks, ','); + if (NULL == cptr) { + /* no ipv4 stuff */ + pmix_asprintf(&cptr, "tcp6://[%s]:%s:%s", tmp, tp, tm); + } else { + pmix_asprintf(&tmp2, "%s;tcp6://[%s]:%s:%s", cptr, tmp, tp, tm); + free(cptr); + cptr = tmp2; + } + free(tmp); + free(tp); + free(tm); + } +#endif // PRTE_ENABLE_IPV6 + + /* check overall length for limits */ + if (0 < prte_oob_base.max_uri_length + && prte_oob_base.max_uri_length < (int) (len + strlen(cptr))) { + /* cannot accept the payload */ + free(final); + free(cptr); + *uri = NULL; + return; + } + /* add new value to final one */ + pmix_asprintf(&tmp, "%s;%s", final, cptr); + free(cptr); + free(final); + final = tmp; + + *uri = final; +} + +/* the host in this case is always in "dot" notation, and + * thus we do not need to do a DNS lookup to convert it */ +static int parse_uri(const uint16_t af_family, const char *host, const char *port, + struct sockaddr_storage *inaddr) +{ + struct sockaddr_in *in; + + if (AF_INET == af_family) { + memset(inaddr, 0, sizeof(struct sockaddr_in)); + in = (struct sockaddr_in *) inaddr; + in->sin_family = AF_INET; + in->sin_addr.s_addr = inet_addr(host); + if (in->sin_addr.s_addr == INADDR_NONE) { + return PRTE_ERR_BAD_PARAM; + } + ((struct sockaddr_in *) inaddr)->sin_port = htons(atoi(port)); + } +#if PRTE_ENABLE_IPV6 + else if (AF_INET6 == af_family) { + struct sockaddr_in6 *in6; + memset(inaddr, 0, sizeof(struct sockaddr_in6)); + in6 = (struct sockaddr_in6 *) inaddr; + + if (0 == inet_pton(AF_INET6, host, (void *) &in6->sin6_addr)) { + pmix_output(0, "oob_tcp_parse_uri: Could not convert %s\n", host); + return PRTE_ERR_BAD_PARAM; + } + in6->sin6_family = AF_INET6; + in6->sin6_port = htons(atoi(port)); + } +#endif + else { + return PRTE_ERR_NOT_SUPPORTED; + } + return PRTE_SUCCESS; +} + +static void set_addr(pmix_proc_t *peer, char **uris) +{ + char **addrs, **masks, *hptr; + char *tcpuri = NULL, *host, *ports, *masks_string; + int i, j, rc; + uint16_t af_family = AF_UNSPEC; + uint64_t ui64; + prte_oob_tcp_peer_t *pr; + prte_oob_tcp_addr_t *maddr; + + memcpy(&ui64, (char *) peer, sizeof(uint64_t)); + + for (i = 0; NULL != uris[i]; i++) { + tcpuri = strdup(uris[i]); + if (NULL == tcpuri) { + pmix_output_verbose(2, prte_oob_base.output, + "%s oob:tcp: out of memory", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); + continue; + } + if (0 == strncmp(uris[i], "tcp:", 4)) { + af_family = AF_INET; + host = tcpuri + strlen("tcp://"); + } else if (0 == strncmp(uris[i], "tcp6:", 5)) { +#if PRTE_ENABLE_IPV6 + af_family = AF_INET6; + host = tcpuri + strlen("tcp6://"); +#else // PRTE_ENABLE_IPV6 + /* we don't support this connection type */ + pmix_output_verbose(2, prte_oob_base.output, + "%s oob:tcp: address %s not supported", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), uris[i]); + free(tcpuri); + continue; +#endif // PRTE_ENABLE_IPV6 + } else { + /* not one of ours */ + pmix_output_verbose(2, prte_oob_base.output, + "%s oob:tcp: ignoring address %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), uris[i]); + free(tcpuri); + continue; + } + + /* this one is ours - record the peer */ + pmix_output_verbose(2, prte_oob_base.output, + "%s oob:tcp: working peer %s address %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(peer), uris[i]); + + /* separate the mask from the network addrs */ + masks_string = strrchr(tcpuri, ':'); + if (NULL == masks_string) { + PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); + free(tcpuri); + continue; + } + *masks_string = '\0'; + masks_string++; + masks = PMIX_ARGV_SPLIT_COMPAT(masks_string, ','); + + /* separate the ports from the network addrs */ + ports = strrchr(tcpuri, ':'); + if (NULL == ports) { + PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); + free(tcpuri); + continue; + } + *ports = '\0'; + ports++; + + /* split the addrs */ + /* if this is a tcp6 connection, the first one will have a '[' + * at the beginning of it, and the last will have a ']' at the + * end - we need to remove those extra characters + */ + hptr = host; +#if PRTE_ENABLE_IPV6 + if (AF_INET6 == af_family) { + if ('[' == host[0]) { + hptr = &host[1]; + } + if (']' == host[strlen(host) - 1]) { + host[strlen(host) - 1] = '\0'; + } + } +#endif // PRTE_ENABLE_IPV6 + addrs = PMIX_ARGV_SPLIT_COMPAT(hptr, ','); + + /* cycle across the provided addrs */ + for (j = 0; NULL != addrs[j]; j++) { + if (NULL == masks[j]) { + /* Missing mask information */ + pmix_output_verbose(2, prte_oob_base.output, + "%s oob:tcp: uri missing mask information.", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); + return; + } + /* if they gave us "localhost", then just take the first conn on our list */ + if (0 == strcasecmp(addrs[j], "localhost")) { +#if PRTE_ENABLE_IPV6 + if (AF_INET6 == af_family) { + if (NULL == prte_oob_base.ipv6conns + || NULL == prte_oob_base.ipv6conns[0]) { + continue; + } + host = prte_oob_base.ipv6conns[0]; + } else { +#endif // PRTE_ENABLE_IPV6 + if (NULL == prte_oob_base.ipv4conns + || NULL == prte_oob_base.ipv4conns[0]) { + continue; + } + host = prte_oob_base.ipv4conns[0]; +#if PRTE_ENABLE_IPV6 + } +#endif + } else { + host = addrs[j]; + } + + if (NULL == (pr = prte_oob_tcp_peer_lookup(peer))) { + pr = PMIX_NEW(prte_oob_tcp_peer_t); + PMIX_XFER_PROCID(&pr->name, peer); + pmix_output_verbose(20, prte_oob_base.output, + "%s SET_PEER ADDING PEER %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(peer)); + pmix_list_append(&prte_oob_base.peers, &pr->super); + } + + maddr = PMIX_NEW(prte_oob_tcp_addr_t); + ((struct sockaddr_storage *) &(maddr->addr))->ss_family = af_family; + if (PRTE_SUCCESS + != (rc = parse_uri(af_family, host, ports, + (struct sockaddr_storage *) &(maddr->addr)))) { + PRTE_ERROR_LOG(rc); + PMIX_RELEASE(maddr); + pmix_list_remove_item(&prte_oob_base.peers, &pr->super); + PMIX_RELEASE(pr); + return; + } + maddr->if_mask = atoi(masks[j]); + + pmix_output_verbose(20, prte_oob_base.output, + "%s set_peer: peer %s is listening on net %s port %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(peer), + (NULL == host) ? "NULL" : host, (NULL == ports) ? "NULL" : ports); + pmix_list_append(&pr->addrs, &maddr->super); + } + PMIX_ARGV_FREE_COMPAT(addrs); + free(tcpuri); + } +} + +static prte_oob_tcp_peer_t *get_peer(const pmix_proc_t *pr); + +static prte_oob_tcp_peer_t* process_uri(char *uri) +{ + pmix_proc_t peer; + char *cptr; + char **uris = NULL; + prte_oob_tcp_peer_t *pr; + + pmix_output_verbose(5, prte_oob_base.output, + "%s:set_addr processing uri %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), uri); + + /* find the first semi-colon in the string */ + cptr = strchr(uri, ';'); + if (NULL == cptr) { + /* got a problem - there must be at least two fields, + * the first containing the process name of our peer + * and all others containing the OOB contact info + */ + PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); + return NULL; + } + *cptr = '\0'; + cptr++; + /* the first field is the process name, so convert it */ + prte_util_convert_string_to_process_name(&peer, uri); + + /* if the peer is us, no need to go further as we already + * know our own contact info + */ + if (PMIX_CHECK_PROCID(&peer, PRTE_PROC_MY_NAME)) { + pmix_output_verbose(5, prte_oob_base.output, + "%s:set_addr peer %s is me", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + PRTE_NAME_PRINT(&peer)); + return NULL; + } + + /* split the rest of the uri into component parts */ + uris = PMIX_ARGV_SPLIT_COMPAT(cptr, ';'); + + /* get the peer object for this process */ + pr = get_peer(&peer); + if (NULL == pr) { + pr = PMIX_NEW(prte_oob_tcp_peer_t); + PMIX_XFER_PROCID(&pr->name, &peer); + pmix_list_append(&prte_oob_base.peers, &pr->super); + } + + set_addr(&pr->name, uris); + PMIX_ARGV_FREE_COMPAT(uris); + return pr; +} + +static prte_oob_tcp_peer_t *get_peer(const pmix_proc_t *pr) +{ + prte_oob_tcp_peer_t *peer; + + PMIX_LIST_FOREACH(peer, &prte_oob_base.peers, prte_oob_tcp_peer_t) + { + if (PMIX_CHECK_PROCID(pr, &peer->name)) { + return peer; + } + } + return NULL; +} diff --git a/src/rml/oob/oob_tcp.c b/src/rml/oob/oob_tcp.c new file mode 100644 index 0000000000..0c01b47852 --- /dev/null +++ b/src/rml/oob/oob_tcp.c @@ -0,0 +1,813 @@ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "prte_config.h" +#include "types.h" + +#ifdef HAVE_UNISTD_H +# include +#endif +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#include +#ifdef HAVE_NET_IF_H +# include +#endif +#ifdef HAVE_NETINET_IN_H +# include +#endif +#ifdef HAVE_ARPA_INET_H +# include +#endif +#ifdef HAVE_NETDB_H +# include +#endif +#include + +#include "src/include/prte_socket_errno.h" +#include "src/runtime/prte_progress_threads.h" +#include "src/util/pmix_argv.h" +#include "src/util/error.h" +#include "src/util/pmix_if.h" +#include "src/util/pmix_net.h" +#include "src/util/pmix_output.h" +#include "src/util/pmix_show_help.h" + +#include "src/mca/errmgr/errmgr.h" +#include "src/mca/ess/ess.h" +#include "src/runtime/prte_globals.h" +#include "src/threads/pmix_threads.h" +#include "src/util/name_fns.h" +#include "src/util/pmix_parse_options.h" +#include "src/util/pmix_show_help.h" + +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_common.h" +#include "src/rml/oob/oob_tcp_connection.h" +#include "src/rml/oob/oob_tcp_listener.h" +#include "src/rml/oob/oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp_sendrecv.h" + +prte_oob_base_t prte_oob_base = { + .output = -1, + .addr_count = 0, + .num_links = 0, + .max_retries = 0, + .max_uri_length = -1, + .events = PMIX_LIST_STATIC_INIT, + .peer_limit = 0, + .peers = PMIX_LIST_STATIC_INIT, + + .tcp_sndbuf = 0, + .tcp_rcvbuf = 0, + + .disable_ipv4_family = false, + .tcp_static_ports = NULL, + .tcp_dyn_ports = NULL, + .ipv4conns = NULL, + .ipv4ports = NULL, + + .disable_ipv6_family = true, + .tcp6_static_ports = NULL, + .tcp6_dyn_ports = NULL, + .ipv6conns = NULL, + .ipv6ports = NULL, + + .local_ifs = PMIX_LIST_STATIC_INIT, + .if_masks = NULL, + .num_hnp_ports = 1, + .listeners = PMIX_LIST_STATIC_INIT, + .listen_thread_active = false, + .listen_thread_tv = {3600, 0}, + .stop_thread = {-1, -1}, + .keepalive_probes = 0, + .keepalive_time = 0, + .keepalive_intvl = 0, + .retry_delay = 0, + .max_recon_attempts = 0 +}; + +static char **split_and_resolve(char **orig_str, char *name); + +int prte_oob_open(void) +{ + pmix_pif_t *copied_interface, *selected_interface; + struct sockaddr_storage my_ss; + /* Larger than necessary, used for copying mask */ + char string[50], **interfaces = NULL; + int kindex; + int i, rc; + bool keeploopback = false; + bool including = false; + + pmix_output_verbose(5, prte_oob_base.output, + "oob:tcp: component_available called"); + + PMIX_CONSTRUCT(&prte_oob_base.listeners, pmix_list_t); + if (PRTE_PROC_IS_MASTER) { + PMIX_CONSTRUCT(&prte_oob_base.listen_thread, pmix_thread_t); + prte_oob_base.listen_thread_active = false; + prte_oob_base.listen_thread_tv.tv_sec = 3600; + prte_oob_base.listen_thread_tv.tv_usec = 0; + } + prte_oob_base.addr_count = 0; + prte_oob_base.ipv4conns = NULL; + prte_oob_base.ipv4ports = NULL; + prte_oob_base.ipv6conns = NULL; + prte_oob_base.ipv6ports = NULL; + prte_oob_base.if_masks = NULL; + + PMIX_CONSTRUCT(&prte_oob_base.local_ifs, pmix_list_t); + PMIX_CONSTRUCT(&prte_oob_base.peers, pmix_list_t); + + /* if interface include was given, construct a list + * of those interfaces which match the specifications - remember, + * the includes could be given as named interfaces, IP addrs, or + * subnet+mask + */ + if (NULL != prte_if_include) { + interfaces = split_and_resolve(&prte_if_include, + "include"); + including = true; + } else if (NULL != prte_if_exclude) { + interfaces = split_and_resolve(&prte_if_exclude, + "exclude"); + } + + /* if we are the master, then check the interfaces for loopbacks + * and keep loopbacks only if no non-loopback interface exists */ + if (PRTE_PROC_IS_MASTER) { + keeploopback = true; + PMIX_LIST_FOREACH(selected_interface, &pmix_if_list, pmix_pif_t) + { + if (!(selected_interface->if_flags & IFF_LOOPBACK)) { + keeploopback = false; + break; + } + } + } + + /* look at all available interfaces */ + PMIX_LIST_FOREACH(selected_interface, &pmix_if_list, pmix_pif_t) + { + if ((selected_interface->if_flags & IFF_LOOPBACK) && + !keeploopback) { + continue; + } + + + i = selected_interface->if_index; + kindex = selected_interface->if_kernel_index; + memcpy((struct sockaddr *) &my_ss, &selected_interface->if_addr, + MIN(sizeof(struct sockaddr_storage), sizeof(selected_interface->if_addr))); + + /* ignore non-ip4/6 interfaces */ + if (AF_INET != my_ss.ss_family +#if PRTE_ENABLE_IPV6 + && AF_INET6 != my_ss.ss_family +#endif + ) { + continue; + } + + /* ignore any virtual interfaces */ + if (0 == strncmp(selected_interface->if_name, "vir", 3)) { + continue; + } + + /* handle include/exclude directives */ + if (NULL != interfaces) { + /* check for match */ + rc = pmix_ifmatches(kindex, interfaces); + /* if one of the network specifications isn't parseable, then + * error out as we can't do what was requested + */ + if (PRTE_ERR_NETWORK_NOT_PARSEABLE == rc) { + pmix_show_help("help-oob-tcp.txt", "not-parseable", true); + PMIX_ARGV_FREE_COMPAT(interfaces); + return PRTE_ERR_BAD_PARAM; + } + /* if we are including, then ignore this if not present */ + if (including) { + if (PMIX_SUCCESS != rc) { + pmix_output_verbose(20, prte_oob_base.output, + "%s oob:tcp:init rejecting interface %s (not in include list)", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), selected_interface->if_name); + continue; + } + } else { + /* we are excluding, so ignore if present */ + if (PMIX_SUCCESS == rc) { + pmix_output_verbose(20, prte_oob_base.output, + "%s oob:tcp:init rejecting interface %s (in exclude list)", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), selected_interface->if_name); + continue; + } + } + } + + /* Refs ticket #3019 + * it would probably be worthwhile to print out a warning if PRRTE detects multiple + * IP interfaces that are "up" on the same subnet (because that's a Bad Idea). Note + * that we should only check for this after applying the relevant include/exclude + * list MCA params. If we detect redundant ports, we can also automatically ignore + * them so that applications won't hang. + */ + + /* add this address to our connections */ + if (AF_INET == my_ss.ss_family) { + pmix_output_verbose(10, prte_oob_base.output, + "%s oob:tcp:init adding %s to our list of %s connections", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + pmix_net_get_hostname((struct sockaddr *) &my_ss), + (AF_INET == my_ss.ss_family) ? "V4" : "V6"); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_oob_base.ipv4conns, + pmix_net_get_hostname((struct sockaddr *) &my_ss)); + } else if (AF_INET6 == my_ss.ss_family) { +#if PRTE_ENABLE_IPV6 + pmix_output_verbose(10, prte_oob_base.output, + "%s oob:tcp:init adding %s to our list of %s connections", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + pmix_net_get_hostname((struct sockaddr *) &my_ss), + (AF_INET == my_ss.ss_family) ? "V4" : "V6"); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_oob_base.ipv6conns, + pmix_net_get_hostname((struct sockaddr *) &my_ss)); +#endif // PRTE_ENABLE_IPV6 + } else { + pmix_output_verbose(10, prte_oob_base.output, + "%s oob:tcp:init ignoring %s from out list of connections", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + pmix_net_get_hostname((struct sockaddr *) &my_ss)); + continue; + } + copied_interface = PMIX_NEW(pmix_pif_t); + if (NULL == copied_interface) { + return PRTE_ERR_OUT_OF_RESOURCE; + } + pmix_string_copy(copied_interface->if_name, selected_interface->if_name, PMIX_IF_NAMESIZE); + copied_interface->if_index = i; + copied_interface->if_kernel_index = kindex; + copied_interface->af_family = my_ss.ss_family; + copied_interface->if_flags = selected_interface->if_flags; + copied_interface->if_speed = selected_interface->if_speed; + memcpy(&copied_interface->if_addr, &selected_interface->if_addr, + sizeof(struct sockaddr_storage)); + copied_interface->if_mask = selected_interface->if_mask; + /* If bandwidth is not found, set to arbitrary non zero value */ + copied_interface->if_bandwidth = selected_interface->if_bandwidth > 0 + ? selected_interface->if_bandwidth + : 1; + memcpy(&copied_interface->if_mac, &selected_interface->if_mac, + sizeof(copied_interface->if_mac)); + copied_interface->ifmtu = selected_interface->ifmtu; + /* Add the if_mask to the list */ + sprintf(string, "%d", selected_interface->if_mask); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_oob_base.if_masks, string); + pmix_list_append(&prte_oob_base.local_ifs, &(copied_interface->super)); + } + + if (0 == PMIX_ARGV_COUNT_COMPAT(prte_oob_base.ipv4conns) +#if PRTE_ENABLE_IPV6 + && 0 == PMIX_ARGV_COUNT_COMPAT(prte_oob_base.ipv6conns) +#endif + ) { + return PRTE_ERR_NOT_AVAILABLE; + } + + // start the listeners + if (PRTE_SUCCESS != (rc = prte_oob_tcp_start_listening())) { + PRTE_ERROR_LOG(rc); + } + return rc; +} + +void prte_oob_close(void) +{ + int i = 0, rc; + + if (PRTE_PROC_IS_MASTER && prte_oob_base.listen_thread_active) { + prte_oob_base.listen_thread_active = false; + /* tell the thread to exit */ + rc = write(prte_oob_base.stop_thread[1], &i, sizeof(int)); + if (0 < rc) { + pmix_thread_join(&prte_oob_base.listen_thread, NULL); + } + + close(prte_oob_base.stop_thread[0]); + close(prte_oob_base.stop_thread[1]); + + } + + PMIX_LIST_DESTRUCT(&prte_oob_base.local_ifs); + PMIX_LIST_DESTRUCT(&prte_oob_base.peers); + + if (NULL != prte_oob_base.ipv4conns) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.ipv4conns); + } + if (NULL != prte_oob_base.ipv4ports) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.ipv4ports); + } + +#if PRTE_ENABLE_IPV6 + if (NULL != prte_oob_base.ipv6conns) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.ipv6conns); + } + if (NULL != prte_oob_base.ipv6ports) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.ipv6ports); + } +#endif + if (NULL != prte_oob_base.if_masks) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.if_masks); + } + + if (0 <= prte_oob_base.output) { + pmix_output_close(prte_oob_base.output); + } +} + +static char *static_port_string; +#if PRTE_ENABLE_IPV6 +static char *static_port_string6; +#endif // PRTE_ENABLE_IPV6 + +static char *dyn_port_string; +#if PRTE_ENABLE_IPV6 +static char *dyn_port_string6; +#endif + +int prte_oob_register(void) +{ + prte_oob_base.peer_limit = -1; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "peer_limit", + "Maximum number of peer connections to simultaneously maintain (-1 = infinite)", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.peer_limit); + + prte_oob_base.max_retries = 2; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "peer_retries", + "Number of times to try shutting down a connection before giving up", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.max_retries); + + prte_oob_base.tcp_sndbuf = 0; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "sndbuf", + "TCP socket send buffering size (in bytes, 0 => leave system default)", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.tcp_sndbuf); + + prte_oob_base.tcp_rcvbuf = 0; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "rcvbuf", + "TCP socket receive buffering size (in bytes, 0 => leave system default)", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.tcp_rcvbuf); + + + static_port_string = NULL; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "static_ipv4_ports", + "Static ports for daemons and procs (IPv4)", + PMIX_MCA_BASE_VAR_TYPE_STRING, + &static_port_string); + + /* if ports were provided, parse the provided range */ + if (NULL != static_port_string) { + pmix_util_parse_range_options(static_port_string, &prte_oob_base.tcp_static_ports); + if (0 == strcmp(prte_oob_base.tcp_static_ports[0], "-1")) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.tcp_static_ports); + prte_oob_base.tcp_static_ports = NULL; + } + } else { + prte_oob_base.tcp_static_ports = NULL; + } + +#if PRTE_ENABLE_IPV6 + static_port_string6 = NULL; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "static_ipv6_ports", + "Static ports for daemons and procs (IPv6)", + PMIX_MCA_BASE_VAR_TYPE_STRING, + &static_port_string6); + + /* if ports were provided, parse the provided range */ + if (NULL != static_port_string6) { + pmix_util_parse_range_options(static_port_string6, + &prte_oob_base.tcp6_static_ports); + if (0 == strcmp(prte_oob_base.tcp6_static_ports[0], "-1")) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.tcp6_static_ports); + prte_oob_base.tcp6_static_ports = NULL; + } + } else { + prte_oob_base.tcp6_static_ports = NULL; + } +#endif // PRTE_ENABLE_IPV6 + + if (NULL != prte_oob_base.tcp_static_ports + || NULL != prte_oob_base.tcp6_static_ports) { + prte_static_ports = true; + } + + dyn_port_string = NULL; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "dynamic_ipv4_ports", + "Range of ports to be dynamically used by daemons and procs (IPv4)", + PMIX_MCA_BASE_VAR_TYPE_STRING, + &dyn_port_string); + /* if ports were provided, parse the provided range */ + if (NULL != dyn_port_string) { + /* can't have both static and dynamic ports! */ + if (prte_static_ports) { + char *err = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.tcp_static_ports, ','); + pmix_show_help("help-oob-tcp.txt", "static-and-dynamic", true, err, dyn_port_string); + free(err); + return PRTE_ERROR; + } + pmix_util_parse_range_options(dyn_port_string, &prte_oob_base.tcp_dyn_ports); + if (0 == strcmp(prte_oob_base.tcp_dyn_ports[0], "-1")) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.tcp_dyn_ports); + prte_oob_base.tcp_dyn_ports = NULL; + } + } else { + prte_oob_base.tcp_dyn_ports = NULL; + } + +#if PRTE_ENABLE_IPV6 + dyn_port_string6 = NULL; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "dynamic_ipv6_ports", + "Range of ports to be dynamically used by daemons and procs (IPv6)", + PMIX_MCA_BASE_VAR_TYPE_STRING, + &dyn_port_string6); + /* if ports were provided, parse the provided range */ + if (NULL != dyn_port_string6) { + /* can't have both static and dynamic ports! */ + if (prte_static_ports) { + char *err4 = NULL, *err6 = NULL; + if (NULL != prte_oob_base.tcp_static_ports) { + err4 = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.tcp_static_ports, ','); + } + if (NULL != prte_oob_base.tcp6_static_ports) { + err6 = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.tcp6_static_ports, ','); + } + pmix_show_help("help-oob-tcp.txt", "static-and-dynamic-ipv6", true, + (NULL == err4) ? "N/A" : err4, (NULL == err6) ? "N/A" : err6, + dyn_port_string6); + if (NULL != err4) { + free(err4); + } + if (NULL != err6) { + free(err6); + } + return PRTE_ERROR; + } + pmix_util_parse_range_options(dyn_port_string6, &prte_oob_base.tcp6_dyn_ports); + if (0 == strcmp(prte_oob_base.tcp6_dyn_ports[0], "-1")) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.tcp6_dyn_ports); + prte_oob_base.tcp6_dyn_ports = NULL; + } + } else { + prte_oob_base.tcp6_dyn_ports = NULL; + } +#endif // PRTE_ENABLE_IPV6 + + prte_oob_base.disable_ipv4_family = false; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "disable_ipv4_family", + "Disable the IPv4 interfaces", + PMIX_MCA_BASE_VAR_TYPE_BOOL, + &prte_oob_base.disable_ipv4_family); + +#if PRTE_ENABLE_IPV6 + prte_oob_base.disable_ipv6_family = false; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "disable_ipv6_family", + "Disable the IPv6 interfaces", + PMIX_MCA_BASE_VAR_TYPE_BOOL, + &prte_oob_base.disable_ipv6_family); +#endif // PRTE_ENABLE_IPV6 + + // Wait for this amount of time before sending the first keepalive probe + prte_oob_base.keepalive_time = 300; + (void)pmix_mca_base_var_register("prte", "prte", NULL, "keepalive_time", + "Idle time in seconds before starting to send keepalives (keepalive_time <= 0 disables " + "keepalive functionality)", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.keepalive_time); + + // Resend keepalive probe every INT seconds + prte_oob_base.keepalive_intvl = 20; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "keepalive_intvl", + "Time between successive keepalive pings when peer has not responded, in seconds (ignored " + "if keepalive_time <= 0)", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.keepalive_intvl); + + // After sending PR probes every INT seconds consider the connection dead + prte_oob_base.keepalive_probes = 9; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "keepalive_probes", + "Number of keepalives that can be missed before " + "declaring error (ignored if keepalive_time <= 0)", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.keepalive_probes); + + prte_oob_base.retry_delay = 0; + (void) pmix_mca_base_var_register("prte","prte", NULL, "retry_delay", + "Time (in sec) to wait before trying to connect to peer again", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.retry_delay); + + prte_oob_base.max_recon_attempts = 10; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "max_recon_attempts", + "Max number of times to attempt connection before giving up (-1 -> never give up)", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.max_recon_attempts); + return PRTE_SUCCESS; +} + +/* + * Local utility functions + */ +static void recv_handler(int sd, short flags, void *user); + +/* Called by prte_oob_tcp_accept() and connection_handler() on + * a socket that has been accepted. This call finishes processing the + * socket, including setting socket options and registering for the + * OOB-level connection handshake. Used in both the threaded and + * event listen modes. + */ +void prte_oob_accept_connection(const int accepted_fd, const struct sockaddr *addr) +{ + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, + "%s accept_connection: %s:%d\n", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + pmix_net_get_hostname(addr), pmix_net_get_port(addr)); + + /* setup socket options */ + prte_oob_tcp_set_socket_options(accepted_fd); + + /* use a one-time event to wait for receipt of peer's + * process ident message to complete this connection + */ + PRTE_ACTIVATE_TCP_ACCEPT_STATE(accepted_fd, addr, recv_handler); +} + +/* API functions */ +void prte_oob_ping(const pmix_proc_t *proc) +{ + prte_oob_tcp_peer_t *peer; + + pmix_output_verbose(2, prte_oob_base.output, + "%s:[%s:%d] processing ping to peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + __FILE__, __LINE__, PRTE_NAME_PRINT(proc)); + + /* do we know this peer? */ + if (NULL == (peer = prte_oob_tcp_peer_lookup(proc))) { + /* push this back to the component so it can try + * another module within this transport. If no + * module can be found, the component can push back + * to the framework so another component can try + */ + pmix_output_verbose(2, prte_oob_base.output, + "%s:[%s:%d] hop %s unknown", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + __FILE__, __LINE__, PRTE_NAME_PRINT(proc)); + PRTE_ACTIVATE_TCP_MSG_ERROR(NULL, NULL, proc, prte_mca_oob_tcp_component_hop_unknown); + return; + } + + /* if we are already connected, there is nothing to do */ + if (MCA_OOB_TCP_CONNECTED == peer->state) { + pmix_output_verbose(2, prte_oob_base.output, + "%s:[%s:%d] already connected to peer %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, + PRTE_NAME_PRINT(proc)); + return; + } + + /* if we are already connecting, there is nothing to do */ + if (MCA_OOB_TCP_CONNECTING == peer->state || MCA_OOB_TCP_CONNECT_ACK == peer->state) { + pmix_output_verbose(2, prte_oob_base.output, + "%s:[%s:%d] already connecting to peer %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, + PRTE_NAME_PRINT(proc)); + return; + } + + /* attempt the connection */ + peer->state = MCA_OOB_TCP_CONNECTING; + PRTE_ACTIVATE_TCP_CONN_STATE(peer, prte_oob_tcp_peer_try_connect); +} + +/* + * Event callback when there is data available on the registered + * socket to recv. This is called for the listen sockets to accept an + * incoming connection, on new sockets trying to complete the software + * connection process, and for probes. Data on an established + * connection is handled elsewhere. + */ +static void recv_handler(int sd, short flg, void *cbdata) +{ + prte_oob_tcp_conn_op_t *op = (prte_oob_tcp_conn_op_t *) cbdata; + int flags; + prte_oob_tcp_hdr_t hdr; + prte_oob_tcp_peer_t *peer; + PRTE_HIDE_UNUSED_PARAMS(flg); + + PMIX_ACQUIRE_OBJECT(op); + + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, + "%s:tcp:recv:handler called", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); + + /* get the handshake */ + if (PRTE_SUCCESS != prte_oob_tcp_peer_recv_connect_ack(NULL, sd, &hdr)) { + goto cleanup; + } + + /* finish processing ident */ + if (MCA_OOB_TCP_IDENT == hdr.type) { + if (NULL == (peer = prte_oob_tcp_peer_lookup(&hdr.origin))) { + /* should never happen */ + prte_oob_tcp_peer_close(peer); + goto cleanup; + } + /* set socket up to be non-blocking */ + if ((flags = fcntl(sd, F_GETFL, 0)) < 0) { + pmix_output(0, "%s prte_oob_tcp_recv_connect: fcntl(F_GETFL) failed: %s (%d)", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), strerror(prte_socket_errno), + prte_socket_errno); + } else { + flags |= O_NONBLOCK; + if (fcntl(sd, F_SETFL, flags) < 0) { + pmix_output(0, "%s prte_oob_tcp_recv_connect: fcntl(F_SETFL) failed: %s (%d)", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), strerror(prte_socket_errno), + prte_socket_errno); + } + } + /* is the peer instance willing to accept this connection */ + peer->sd = sd; + if (prte_oob_tcp_peer_accept(peer) == false) { + if (OOB_TCP_DEBUG_CONNECT + <= pmix_output_get_verbosity(prte_oob_base.output)) { + pmix_output(0, + "%s-%s prte_oob_tcp_recv_connect: " + "rejected connection from %s connection state %d", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), + PRTE_NAME_PRINT(&(hdr.origin)), peer->state); + } + CLOSE_THE_SOCKET(sd); + } + } + +cleanup: + PMIX_RELEASE(op); +} + +/* + * Go through a list of argv; if there are any subnet specifications + * (a.b.c.d/e), resolve them to an interface name (Currently only + * supporting IPv4). If unresolvable, warn and remove. + */ +static char **split_and_resolve(char **orig_str, char *name) +{ + pmix_pif_t *selected_interface; + int i, n, ret, match_count, interface_count; + char **argv, **interfaces, *str, *tmp; + char if_name[IF_NAMESIZE]; + struct sockaddr_storage argv_inaddr, if_inaddr; + uint32_t argv_prefix; + + /* Sanity check */ + if (NULL == orig_str || NULL == *orig_str) { + return NULL; + } + + argv = PMIX_ARGV_SPLIT_COMPAT(*orig_str, ','); + if (NULL == argv) { + return NULL; + } + interface_count = 0; + interfaces = NULL; + for (i = 0; NULL != argv[i]; ++i) { + if (isalpha(argv[i][0])) { + /* This is an interface name. If not already in the interfaces array, add it */ + for (n = 0; n < interface_count; n++) { + if (0 == strcmp(argv[i], interfaces[n])) { + break; + } + } + if (n == interface_count) { + pmix_output_verbose(20, + prte_oob_base.output, + "oob:tcp: Using interface: %s ", argv[i]); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&interfaces, argv[i]); + ++interface_count; + } + continue; + } + + /* Found a subnet notation. Convert it to an IP + address/netmask. Get the prefix first. */ + argv_prefix = 0; + tmp = strdup(argv[i]); + str = strchr(argv[i], '/'); + if (NULL == str) { + pmix_show_help("help-oob-tcp.txt", "invalid if_inexclude", + true, name, prte_process_info.nodename, + tmp, "Invalid specification (missing \"/\")"); + free(argv[i]); + free(tmp); + continue; + } + *str = '\0'; + argv_prefix = atoi(str + 1); + + /* Now convert the IPv4 address */ + ((struct sockaddr*) &argv_inaddr)->sa_family = AF_INET; + ret = inet_pton(AF_INET, argv[i], + &((struct sockaddr_in*) &argv_inaddr)->sin_addr); + free(argv[i]); + + if (1 != ret) { + pmix_show_help("help-oob-tcp.txt", "invalid if_inexclude", + true, name, prte_process_info.nodename, tmp, + "Invalid specification (inet_pton() failed)"); + free(tmp); + continue; + } + pmix_output_verbose(20, prte_oob_base.output, + "%s oob:tcp: Searching for %s address+prefix: %s / %u", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + name, + pmix_net_get_hostname((struct sockaddr*) &argv_inaddr), + argv_prefix); + + /* Go through all interfaces and see if we can find a match */ + match_count = 0; + PMIX_LIST_FOREACH(selected_interface, &pmix_if_list, pmix_pif_t) { + pmix_ifindextoaddr(selected_interface->if_kernel_index, + (struct sockaddr*) &if_inaddr, + sizeof(if_inaddr)); + if (pmix_net_samenetwork((struct sockaddr_storage*) &argv_inaddr, + (struct sockaddr_storage*) &if_inaddr, + argv_prefix)) { + /* We found a match. If it's not already in the interfaces array, + add it. If it's already in the array, treat it as a match */ + match_count = match_count + 1; + pmix_ifindextoname(selected_interface->if_kernel_index, if_name, sizeof(if_name)); + for (n = 0; n < interface_count; n++) { + if (0 == strcmp(if_name, interfaces[n])) { + break; + } + } + if (n == interface_count) { + pmix_output_verbose(20, + prte_oob_base.output, + "oob:tcp: Found match: %s (%s)", + pmix_net_get_hostname((struct sockaddr*) &if_inaddr), + if_name); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&interfaces, if_name); + ++interface_count; + } + } + } + /* If we didn't find a match, keep trying */ + if (0 == match_count) { + pmix_show_help("help-oob-tcp.txt", "invalid if_inexclude", + true, name, prte_process_info.nodename, tmp, + "Did not find interface matching this subnet"); + free(tmp); + continue; + } + + free(tmp); + } + + /* Mark the end of the interface name array with NULL */ + if (NULL != interfaces) { + interfaces[interface_count] = NULL; + } + free(argv); + free(*orig_str); + *orig_str = PMIX_ARGV_JOIN_COMPAT(interfaces, ','); + return interfaces; +} + +PMIX_CLASS_INSTANCE(prte_oob_send_t, + pmix_object_t, + NULL, NULL); diff --git a/src/mca/oob/tcp/oob_tcp.h b/src/rml/oob/oob_tcp.h similarity index 64% rename from src/mca/oob/tcp/oob_tcp.h rename to src/rml/oob/oob_tcp.h index 41bfaba28f..e23586dbd1 100644 --- a/src/mca/oob/tcp/oob_tcp.h +++ b/src/rml/oob/oob_tcp.h @@ -15,7 +15,7 @@ * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,8 +33,7 @@ #include "src/event/event-internal.h" #include "src/mca/base/pmix_base.h" -#include "src/mca/oob/base/base.h" -#include "src/mca/oob/oob.h" +#include "src/rml/oob/oob.h" BEGIN_C_DECLS @@ -42,10 +41,6 @@ BEGIN_C_DECLS #define OOB_TCP_DEBUG_FAIL 2 #define OOB_TCP_DEBUG_CONNECT 7 -/* forward declare a couple of structures */ -struct prte_oob_tcp_module_t; -struct prte_oob_tcp_msg_error_t; - /* define a struct for tracking NIC addresses */ typedef struct { pmix_list_item_t super; @@ -54,19 +49,6 @@ typedef struct { } prte_oob_tcp_nicaddr_t; PMIX_CLASS_DECLARATION(prte_oob_tcp_nicaddr_t); -/* Module definition */ -typedef void (*prte_oob_tcp_module_accept_connection_fn_t)(const int accepted_fd, - const struct sockaddr *addr); -typedef void (*prte_oob_tcp_module_ping_fn_t)(const pmix_proc_t *proc); -typedef void (*prte_oob_tcp_module_send_nb_fn_t)(prte_rml_send_t *msg); - -typedef struct { - prte_oob_tcp_module_accept_connection_fn_t accept_connection; - prte_oob_tcp_module_ping_fn_t ping; - prte_oob_tcp_module_send_nb_fn_t send_nb; -} prte_oob_tcp_module_t; -PRTE_MODULE_EXPORT extern prte_oob_tcp_module_t prte_oob_tcp_module; - /** * the state of the connection */ @@ -82,10 +64,15 @@ typedef enum { } prte_oob_tcp_state_t; /* module-level shared functions */ -PRTE_MODULE_EXPORT void prte_oob_tcp_send_handler(int fd, short args, void *cbdata); -PRTE_MODULE_EXPORT void prte_oob_tcp_recv_handler(int fd, short args, void *cbdata); -PRTE_MODULE_EXPORT void prte_oob_tcp_queue_msg(int sd, short args, void *cbdata); - +PRTE_EXPORT void prte_oob_tcp_send_handler(int fd, short args, void *cbdata); +PRTE_EXPORT void prte_oob_tcp_recv_handler(int fd, short args, void *cbdata); +PRTE_EXPORT void prte_oob_tcp_queue_msg(int sd, short args, void *cbdata); +PRTE_EXPORT void prte_oob_accept_connection(const int accepted_fd, const struct sockaddr *addr); +PRTE_EXPORT void prte_mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata); +PRTE_EXPORT void prte_mca_oob_tcp_component_failed_to_connect(int fd, short args, void *cbdata); +PRTE_EXPORT void prte_mca_oob_tcp_component_no_route(int fd, short args, void *cbdata); +PRTE_EXPORT void prte_mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata); +PRTE_EXPORT void prte_oob_ping(const pmix_proc_t *proc); END_C_DECLS #endif /* MCA_OOB_TCP_H_ */ diff --git a/src/mca/oob/tcp/oob_tcp_common.c b/src/rml/oob/oob_tcp_common.c similarity index 77% rename from src/mca/oob/tcp/oob_tcp_common.c rename to src/rml/oob/oob_tcp_common.c index 9671ee254f..928dff0056 100644 --- a/src/mca/oob/tcp/oob_tcp_common.c +++ b/src/rml/oob/oob_tcp_common.c @@ -16,7 +16,7 @@ * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2014-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -63,10 +63,9 @@ #include "src/util/pmix_net.h" #include "src/util/pmix_output.h" -#include "oob_tcp_common.h" -#include "oob_tcp_peer.h" -#include "src/mca/oob/tcp/oob_tcp.h" -#include "src/mca/oob/tcp/oob_tcp_component.h" +#include "src/rml/oob/oob_tcp_common.h" +#include "src/rml/oob/oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp.h" /** * Set socket buffering @@ -87,27 +86,27 @@ static void set_keepalive(int sd) /* Set the option active */ option = 1; if (setsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, optlen) < 0) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(SO_KEEPALIVE) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); return; } # if defined(TCP_KEEPALIVE) /* set the idle time */ - if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPALIVE, &prte_mca_oob_tcp_component.keepalive_time, - sizeof(prte_mca_oob_tcp_component.keepalive_time)) + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPALIVE, &prte_oob_base.keepalive_time, + sizeof(prte_oob_base.keepalive_time)) < 0) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(TCP_KEEPALIVE) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); return; } # elif defined(TCP_KEEPIDLE) /* set the idle time */ - if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPIDLE, &prte_mca_oob_tcp_component.keepalive_time, - sizeof(prte_mca_oob_tcp_component.keepalive_time)) + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPIDLE, &prte_oob_base.keepalive_time, + sizeof(prte_oob_base.keepalive_time)) < 0) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(TCP_KEEPIDLE) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); return; @@ -115,10 +114,10 @@ static void set_keepalive(int sd) # endif // TCP_KEEPIDLE # if defined(TCP_KEEPINTVL) /* set the keepalive interval */ - if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPINTVL, &prte_mca_oob_tcp_component.keepalive_intvl, - sizeof(prte_mca_oob_tcp_component.keepalive_intvl)) + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPINTVL, &prte_oob_base.keepalive_intvl, + sizeof(prte_oob_base.keepalive_intvl)) < 0) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(TCP_KEEPINTVL) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); return; @@ -126,10 +125,10 @@ static void set_keepalive(int sd) # endif // TCP_KEEPINTVL # if defined(TCP_KEEPCNT) /* set the miss rate */ - if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPCNT, &prte_mca_oob_tcp_component.keepalive_probes, - sizeof(prte_mca_oob_tcp_component.keepalive_probes)) + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPCNT, &prte_oob_base.keepalive_probes, + sizeof(prte_oob_base.keepalive_probes)) < 0) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(TCP_KEEPCNT) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); } @@ -144,33 +143,33 @@ void prte_oob_tcp_set_socket_options(int sd) optval = 1; if (setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *) &optval, sizeof(optval)) < 0) { prte_backtrace_print(stderr, NULL, 1); - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(TCP_NODELAY) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); } #endif #if defined(SO_SNDBUF) - if (prte_mca_oob_tcp_component.tcp_sndbuf > 0 - && setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *) &prte_mca_oob_tcp_component.tcp_sndbuf, + if (prte_oob_base.tcp_sndbuf > 0 + && setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *) &prte_oob_base.tcp_sndbuf, sizeof(int)) < 0) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(SO_SNDBUF) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); } #endif #if defined(SO_RCVBUF) - if (prte_mca_oob_tcp_component.tcp_rcvbuf > 0 - && setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *) &prte_mca_oob_tcp_component.tcp_rcvbuf, + if (prte_oob_base.tcp_rcvbuf > 0 + && setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *) &prte_oob_base.tcp_rcvbuf, sizeof(int)) < 0) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(SO_RCVBUF) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); } #endif - if (0 < prte_mca_oob_tcp_component.keepalive_time) { + if (0 < prte_oob_base.keepalive_time) { set_keepalive(sd); } } @@ -179,7 +178,7 @@ prte_oob_tcp_peer_t *prte_oob_tcp_peer_lookup(const pmix_proc_t *name) { prte_oob_tcp_peer_t *peer; - PMIX_LIST_FOREACH(peer, &prte_mca_oob_tcp_component.peers, prte_oob_tcp_peer_t) + PMIX_LIST_FOREACH(peer, &prte_oob_base.peers, prte_oob_tcp_peer_t) { if (PMIX_CHECK_PROCID(name, &peer->name)) { return peer; diff --git a/src/mca/oob/tcp/oob_tcp_common.h b/src/rml/oob/oob_tcp_common.h similarity index 77% rename from src/mca/oob/tcp/oob_tcp_common.h rename to src/rml/oob/oob_tcp_common.h index 4e2bfe5043..26e1408208 100644 --- a/src/mca/oob/tcp/oob_tcp_common.h +++ b/src/rml/oob/oob_tcp_common.h @@ -15,7 +15,7 @@ * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,10 +28,10 @@ #include "prte_config.h" -#include "oob_tcp.h" -#include "oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_peer.h" -PRTE_MODULE_EXPORT void prte_oob_tcp_set_socket_options(int sd); -PRTE_MODULE_EXPORT char *prte_oob_tcp_state_print(prte_oob_tcp_state_t state); -PRTE_MODULE_EXPORT prte_oob_tcp_peer_t *prte_oob_tcp_peer_lookup(const pmix_proc_t *name); +PRTE_EXPORT void prte_oob_tcp_set_socket_options(int sd); +PRTE_EXPORT char *prte_oob_tcp_state_print(prte_oob_tcp_state_t state); +PRTE_EXPORT prte_oob_tcp_peer_t *prte_oob_tcp_peer_lookup(const pmix_proc_t *name); #endif /* _MCA_OOB_TCP_COMMON_H_ */ diff --git a/src/rml/oob/oob_tcp_component.c b/src/rml/oob/oob_tcp_component.c new file mode 100644 index 0000000000..734b3eb1c3 --- /dev/null +++ b/src/rml/oob/oob_tcp_component.c @@ -0,0 +1,266 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. + * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights + * reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * In windows, many of the socket functions return an EWOULDBLOCK + * instead of things like EAGAIN, EINPROGRESS, etc. It has been + * verified that this will not conflict with other error codes that + * are returned by these functions under UNIX/Linux environments + */ + +#include "prte_config.h" +#include "types.h" + +#ifdef HAVE_UNISTD_H +# include +#endif +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#include +#ifdef HAVE_NET_IF_H +# include +#endif +#ifdef HAVE_NETINET_IN_H +# include +#endif +#ifdef HAVE_ARPA_INET_H +# include +#endif +#ifdef HAVE_NETDB_H +# include +#endif +#include +#include + +#ifndef MIN +# define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#include "src/class/pmix_list.h" +#include "src/event/event-internal.h" +#include "src/include/prte_socket_errno.h" +#include "src/runtime/prte_progress_threads.h" +#include "src/util/pmix_argv.h" +#include "src/util/pmix_if.h" +#include "src/util/error.h" +#include "src/util/pmix_net.h" +#include "src/util/pmix_output.h" +#include "src/util/pmix_show_help.h" + +#include "src/mca/errmgr/errmgr.h" +#include "src/mca/ess/ess.h" +#include "src/rml/rml.h" +#include "src/mca/state/state.h" +#include "src/runtime/prte_globals.h" +#include "src/runtime/prte_wait.h" +#include "src/threads/pmix_threads.h" +#include "src/util/attr.h" +#include "src/util/name_fns.h" +#include "src/util/pmix_parse_options.h" +#include "src/util/pmix_show_help.h" + +#include "src/rml/oob/oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_common.h" +#include "src/rml/oob/oob_tcp_connection.h" +#include "src/rml/oob/oob_tcp_listener.h" +#include "src/rml/oob/oob_tcp_peer.h" + +void prte_mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata) +{ + prte_oob_tcp_peer_op_t *pop = (prte_oob_tcp_peer_op_t *) cbdata; + PRTE_HIDE_UNUSED_PARAMS(fd, args); + + PMIX_ACQUIRE_OBJECT(pop); + + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, + "%s tcp:lost connection called for peer %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&pop->peer)); + + if (!prte_finalizing) { + /* activate the proc state */ + if (PRTE_SUCCESS != prte_rml_route_lost(pop->peer.rank)) { + PRTE_ACTIVATE_PROC_STATE(&pop->peer, PRTE_PROC_STATE_LIFELINE_LOST); + } else { + PRTE_ACTIVATE_PROC_STATE(&pop->peer, PRTE_PROC_STATE_COMM_FAILED); + } + } + PMIX_RELEASE(pop); +} + +void prte_mca_oob_tcp_component_no_route(int fd, short args, void *cbdata) +{ + prte_oob_tcp_msg_error_t *mop = (prte_oob_tcp_msg_error_t *) cbdata; + PRTE_HIDE_UNUSED_PARAMS(fd, args); + + PMIX_ACQUIRE_OBJECT(mop); + + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, + "%s tcp:no route called for peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + PRTE_NAME_PRINT(&mop->hop)); + + if (prte_prteds_term_ordered || prte_finalizing || prte_abnormal_term_ordered) { + /* just ignore the problem */ + PMIX_RELEASE(mop); + return; + } + + /* report the error */ + PRTE_ACTIVATE_PROC_STATE(&mop->hop, PRTE_PROC_STATE_UNABLE_TO_SEND_MSG); + + PMIX_RELEASE(mop); +} + +void prte_mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata) +{ + prte_oob_tcp_msg_error_t *mop = (prte_oob_tcp_msg_error_t *) cbdata; + PRTE_HIDE_UNUSED_PARAMS(fd, args); + + PMIX_ACQUIRE_OBJECT(mop); + + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, + "%s tcp:unknown hop called for peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + PRTE_NAME_PRINT(&mop->hop)); + + if (prte_prteds_term_ordered || prte_finalizing || prte_abnormal_term_ordered) { + /* just ignore the problem */ + PMIX_RELEASE(mop); + return; + } + + /* post the error */ + PRTE_ACTIVATE_PROC_STATE(&mop->hop, PRTE_PROC_STATE_UNABLE_TO_SEND_MSG); + + PMIX_RELEASE(mop); +} + +void prte_mca_oob_tcp_component_failed_to_connect(int fd, short args, void *cbdata) +{ + prte_oob_tcp_peer_op_t *pop = (prte_oob_tcp_peer_op_t *) cbdata; + PRTE_HIDE_UNUSED_PARAMS(fd, args); + + PMIX_ACQUIRE_OBJECT(pop); + + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, + "%s tcp:failed_to_connect called for peer %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&pop->peer)); + + /* if we are terminating, then don't attempt to reconnect */ + if (prte_prteds_term_ordered || prte_finalizing || prte_abnormal_term_ordered) { + PMIX_RELEASE(pop); + return; + } + + /* activate the proc state */ + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, + "%s tcp:failed_to_connect unable to reach peer %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&pop->peer)); + + PRTE_ACTIVATE_PROC_STATE(&pop->peer, PRTE_PROC_STATE_FAILED_TO_CONNECT); + PMIX_RELEASE(pop); +} + + +/* OOB TCP Class instances */ + +static void peer_cons(prte_oob_tcp_peer_t *peer) +{ + peer->auth_method = NULL; + peer->sd = -1; + PMIX_CONSTRUCT(&peer->addrs, pmix_list_t); + peer->active_addr = NULL; + peer->state = MCA_OOB_TCP_UNCONNECTED; + peer->num_retries = 0; + PMIX_CONSTRUCT(&peer->send_queue, pmix_list_t); + peer->send_msg = NULL; + peer->recv_msg = NULL; + peer->send_ev_active = false; + peer->recv_ev_active = false; + peer->timer_ev_active = false; +} +static void peer_des(prte_oob_tcp_peer_t *peer) +{ + if (NULL != peer->auth_method) { + free(peer->auth_method); + } + if (peer->send_ev_active) { + prte_event_del(&peer->send_event); + } + if (peer->recv_ev_active) { + prte_event_del(&peer->recv_event); + } + if (peer->timer_ev_active) { + prte_event_del(&peer->timer_event); + } + if (0 <= peer->sd) { + pmix_output_verbose(2, prte_oob_base.output, + "%s CLOSING SOCKET %d", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), peer->sd); + CLOSE_THE_SOCKET(peer->sd); + } + PMIX_LIST_DESTRUCT(&peer->addrs); + PMIX_LIST_DESTRUCT(&peer->send_queue); +} +PMIX_CLASS_INSTANCE(prte_oob_tcp_peer_t, pmix_list_item_t, peer_cons, peer_des); + +static void padd_cons(prte_oob_tcp_addr_t *ptr) +{ + memset(&ptr->addr, 0, sizeof(ptr->addr)); + ptr->retries = 0; + ptr->state = MCA_OOB_TCP_UNCONNECTED; +} +PMIX_CLASS_INSTANCE(prte_oob_tcp_addr_t, pmix_list_item_t, padd_cons, NULL); + +static void pop_cons(prte_oob_tcp_peer_op_t *pop) +{ + pop->net = NULL; + pop->port = NULL; +} +static void pop_des(prte_oob_tcp_peer_op_t *pop) +{ + if (NULL != pop->net) { + free(pop->net); + } + if (NULL != pop->port) { + free(pop->port); + } +} +PMIX_CLASS_INSTANCE(prte_oob_tcp_peer_op_t, pmix_object_t, pop_cons, pop_des); + +PMIX_CLASS_INSTANCE(prte_oob_tcp_msg_op_t, pmix_object_t, NULL, NULL); + +PMIX_CLASS_INSTANCE(prte_oob_tcp_conn_op_t, pmix_object_t, NULL, NULL); + +static void nicaddr_cons(prte_oob_tcp_nicaddr_t *ptr) +{ + ptr->af_family = PF_UNSPEC; + memset(&ptr->addr, 0, sizeof(ptr->addr)); +} +PMIX_CLASS_INSTANCE(prte_oob_tcp_nicaddr_t, pmix_list_item_t, nicaddr_cons, NULL); diff --git a/src/mca/oob/tcp/oob_tcp_connection.c b/src/rml/oob/oob_tcp_connection.c similarity index 92% rename from src/mca/oob/tcp/oob_tcp_connection.c rename to src/rml/oob/oob_tcp_connection.c index d77bf2de2e..4218f26431 100644 --- a/src/mca/oob/tcp/oob_tcp_connection.c +++ b/src/rml/oob/oob_tcp_connection.c @@ -19,7 +19,7 @@ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights * reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -76,14 +76,10 @@ #include "src/util/name_fns.h" #include "src/util/pmix_show_help.h" -#include "oob_tcp.h" -#include "oob_tcp_common.h" -#include "oob_tcp_connection.h" -#include "oob_tcp_peer.h" -#include "src/mca/oob/tcp/oob_tcp_common.h" -#include "src/mca/oob/tcp/oob_tcp_component.h" -#include "src/mca/oob/tcp/oob_tcp_connection.h" -#include "src/mca/oob/tcp/oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_common.h" +#include "src/rml/oob/oob_tcp_connection.h" +#include "src/rml/oob/oob_tcp_peer.h" static void tcp_peer_event_init(prte_oob_tcp_peer_t *peer); static int tcp_peer_send_connect_ack(prte_oob_tcp_peer_t *peer); @@ -100,7 +96,7 @@ static int tcp_peer_create_socket(prte_oob_tcp_peer_t *peer, sa_family_t family) return PRTE_SUCCESS; } - PMIX_OUTPUT_VERBOSE((1, prte_oob_base_framework.framework_output, + PMIX_OUTPUT_VERBOSE((1, prte_oob_base.output, "%s oob:tcp:peer creating socket to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)))); peer->sd = socket(family, SOCK_STREAM, 0); @@ -149,7 +145,7 @@ static int tcp_peer_create_socket(prte_oob_tcp_peer_t *peer, sa_family_t family) */ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) { - pmix_list_t *local_list = &prte_mca_oob_tcp_component.local_ifs, *remote_list; + pmix_list_t *local_list = &prte_oob_base.local_ifs, *remote_list; int rc, i, j, local_if_count, remote_if_count, best, best_i = 0, best_j = 0; prte_oob_tcp_conn_op_t *op = (prte_oob_tcp_conn_op_t *) cbdata; prte_reachable_t *results = NULL; @@ -198,12 +194,12 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) results = prte_reachable.reachable(local_list, remote_list); /* Find match, bind socket. If connect attempt failed, move to next */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_tcp_peer_try_connect: " "attempting to connect to proc %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name))); - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_tcp_peer_try_connect: " "attempting to connect to proc %s on socket %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), @@ -246,27 +242,27 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) peer->active_addr = (prte_oob_tcp_addr_t *) ptr; addr = peer->active_addr; /* Grab the local address we are using to bind the socket with */ - ptr = prte_mca_oob_tcp_component.local_ifs.pmix_list_sentinel.pmix_list_next; + ptr = prte_oob_base.local_ifs.pmix_list_sentinel.pmix_list_next; for (i = 0; i < best_i; i++) { ptr = ptr->pmix_list_next; } intf = (pmix_pif_t *) ptr; - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_tcp_peer_try_connect: " "attempting to connect to proc %s on %s:%d - %d retries", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), pmix_net_get_hostname((struct sockaddr *) &addr->addr), pmix_net_get_port((struct sockaddr *) &addr->addr), addr->retries); if (MCA_OOB_TCP_FAILED == addr->state) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_tcp_peer_try_connect: %s:%d is down", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), pmix_net_get_hostname((struct sockaddr *) &addr->addr), pmix_net_get_port((struct sockaddr *) &addr->addr)); continue; } - if (prte_mca_oob_tcp_component.max_retries < addr->retries) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + if (prte_oob_base.max_retries < addr->retries) { + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_tcp_peer_try_connect: %s:%d retries exceeded", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), pmix_net_get_hostname((struct sockaddr *) &addr->addr), @@ -333,7 +329,7 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) /* non-blocking so wait for completion */ if (prte_socket_errno == EINPROGRESS || prte_socket_errno == EWOULDBLOCK) { pmix_output_verbose( - OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s waiting for connect completion to %s - activating send event", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); /* just ensure the send_event is active */ @@ -352,9 +348,9 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) * way by trying twice before giving up */ if (ECONNABORTED == prte_socket_errno) { - if (addr->retries < prte_mca_oob_tcp_component.max_retries) { + if (addr->retries < prte_oob_base.max_retries) { pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, - prte_oob_base_framework.framework_output, + prte_oob_base.output, "%s connection aborted by OS to %s - retrying", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); @@ -381,9 +377,9 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) /* it could be that the intended recipient just hasn't * started yet. if requested, wait awhile and try again * unless/until we hit the maximum number of retries */ - if (0 < prte_mca_oob_tcp_component.retry_delay) { - if (prte_mca_oob_tcp_component.max_recon_attempts < 0 - || peer->num_retries < prte_mca_oob_tcp_component.max_recon_attempts) { + if (0 < prte_oob_base.retry_delay) { + if (prte_oob_base.max_recon_attempts < 0 + || peer->num_retries < prte_oob_base.max_recon_attempts) { struct timeval tv; /* close the current socket */ CLOSE_THE_SOCKET(peer->sd); @@ -394,7 +390,7 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) addr->retries = 0; } /* give it awhile and try again */ - tv.tv_sec = prte_mca_oob_tcp_component.retry_delay; + tv.tv_sec = prte_oob_base.retry_delay; tv.tv_usec = 0; ++peer->num_retries; PRTE_RETRY_TCP_CONN_STATE(peer, prte_oob_tcp_peer_try_connect, &tv); @@ -439,7 +435,7 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) goto cleanup; } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_tcp_peer_try_connect: " "Connection to proc %s succeeded", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); @@ -502,7 +498,7 @@ static int tcp_peer_send_connect_ack(prte_oob_tcp_peer_t *peer) uint16_t ack_flag = htons(1); size_t sdsize, offset = 0; - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s SEND CONNECT ACK", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); /* load the header */ @@ -557,7 +553,7 @@ static int tcp_peer_send_connect_nack(int sd, pmix_proc_t *name) int rc = PRTE_SUCCESS; size_t sdsize, offset = 0; - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s SEND CONNECT NACK", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); /* load the header */ @@ -630,7 +626,7 @@ void prte_oob_tcp_peer_complete_connect(prte_oob_tcp_peer_t *peer) int so_error = 0; prte_socklen_t so_length = sizeof(so_error); - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:complete_connect called for peer %s on socket %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name), peer->sd); @@ -645,12 +641,12 @@ void prte_oob_tcp_peer_complete_connect(prte_oob_tcp_peer_t *peer) } if (so_error == EINPROGRESS) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:send:handler still in progress", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); return; } else if (so_error == ECONNREFUSED || so_error == ETIMEDOUT) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s-%s tcp_peer_complete_connect: connection failed: %s (%d)", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), strerror(so_error), so_error); @@ -660,7 +656,7 @@ void prte_oob_tcp_peer_complete_connect(prte_oob_tcp_peer_t *peer) /* No need to worry about the return code here - we return regardless at this point, and if an error did occur a message has already been printed for the user */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s-%s tcp_peer_complete_connect: " "connection failed with error %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), @@ -669,14 +665,14 @@ void prte_oob_tcp_peer_complete_connect(prte_oob_tcp_peer_t *peer) return; } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp_peer_complete_connect: " "sending ack to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name))); if (tcp_peer_send_connect_ack(peer) == PRTE_SUCCESS) { peer->state = MCA_OOB_TCP_CONNECT_ACK; - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp_peer_complete_connect: " "setting read event on connection to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name))); @@ -706,7 +702,7 @@ static int tcp_peer_send_blocking(int sd, void *data, size_t size) PMIX_ACQUIRE_OBJECT(ptr); - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s send blocking of %" PRIsize_t " bytes to socket %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), size, sd); @@ -725,7 +721,7 @@ static int tcp_peer_send_blocking(int sd, void *data, size_t size) cnt += retval; } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s blocking send complete to socket %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), sd); @@ -741,7 +737,7 @@ static bool retry(prte_oob_tcp_peer_t *peer, int sd, bool fatal) { int cmpval; - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s SIMUL CONNECTION WITH %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); cmpval = prte_util_compare_name_fields(PRTE_NS_CMP_ALL, &peer->name, PRTE_PROC_MY_NAME); @@ -800,7 +796,7 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob uint16_t ack_flag; bool is_new = (NULL == pr); - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s RECV CONNECT ACK FROM %s ON SOCKET %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (NULL == pr) ? "UNKNOWN" : PRTE_NAME_PRINT(&pr->name), sd); @@ -823,14 +819,14 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob } } else { /* unable to complete the recv */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s unable to complete recv of connect-ack from %s ON SOCKET %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (NULL == peer) ? "UNKNOWN" : PRTE_NAME_PRINT(&peer->name), sd); return PRTE_ERR_UNREACH; } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s connect-ack recvd from %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (NULL == peer) ? "UNKNOWN" : PRTE_NAME_PRINT(&peer->name)); @@ -867,13 +863,13 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob if (NULL == peer) { peer = prte_oob_tcp_peer_lookup(&hdr.origin); if (NULL == peer) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_oob_tcp_recv_connect: connection from new peer", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); peer = PMIX_NEW(prte_oob_tcp_peer_t); PMIX_XFER_PROCID(&peer->name, &hdr.origin); peer->state = MCA_OOB_TCP_ACCEPTING; - pmix_list_append(&prte_mca_oob_tcp_component.peers, &peer->super); + pmix_list_append(&prte_oob_base.peers, &peer->super); } } else { /* compare the peers name to the expected value */ @@ -889,7 +885,7 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob } } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s connect-ack header from %s is okay", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); @@ -901,7 +897,7 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob } if (!tcp_peer_recv_blocking(peer, sd, msg, hdr.nbytes)) { /* unable to complete the recv but should never happen */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s unable to complete recv of connect-ack from %s ON SOCKET %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name), peer->sd); @@ -986,7 +982,7 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob } free(msg); - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s connect-ack version from %s matches ours", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); @@ -997,15 +993,10 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob return PRTE_SUCCESS; } - /* set the peer into the component and OOB-level peer tables to indicate - * that we know this peer and we will be handling him - */ - PRTE_ACTIVATE_TCP_CMP_OP(peer, prte_mca_oob_tcp_component_set_module); - /* connected */ tcp_peer_connected(peer); if (OOB_TCP_DEBUG_CONNECT - <= pmix_output_get_verbosity(prte_oob_base_framework.framework_output)) { + <= pmix_output_get_verbosity(prte_oob_base.output)) { prte_oob_tcp_peer_dump(peer, "connected"); } return PRTE_SUCCESS; @@ -1017,7 +1008,7 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob */ static void tcp_peer_connected(prte_oob_tcp_peer_t *peer) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s-%s tcp_peer_connected on socket %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), peer->sd); @@ -1048,7 +1039,7 @@ static void tcp_peer_connected(prte_oob_tcp_peer_t *peer) */ void prte_oob_tcp_peer_close(prte_oob_tcp_peer_t *peer) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp_peer_close for %s sd %d state %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), peer->sd, prte_oob_tcp_state_print(peer->state)); @@ -1115,7 +1106,7 @@ static bool tcp_peer_recv_blocking(prte_oob_tcp_peer_t *peer, int sd, void *data unsigned char *ptr = (unsigned char *) data; size_t cnt = 0; - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s waiting for connect ack from %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (NULL == peer) ? "UNKNOWN" : PRTE_NAME_PRINT(&(peer->name))); @@ -1124,7 +1115,7 @@ static bool tcp_peer_recv_blocking(prte_oob_tcp_peer_t *peer, int sd, void *data /* remote closed connection */ if (retval == 0) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s-%s tcp_peer_recv_blocking: " "peer closed connection: peer state %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), @@ -1162,7 +1153,7 @@ static bool tcp_peer_recv_blocking(prte_oob_tcp_peer_t *peer, int sd, void *data recv_connect_ack, who will try to establish the connection again */ pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, - prte_oob_base_framework.framework_output, + prte_oob_base.output, "%s connect ack received error %s from %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), strerror(prte_socket_errno), @@ -1184,7 +1175,7 @@ static bool tcp_peer_recv_blocking(prte_oob_tcp_peer_t *peer, int sd, void *data cnt += retval; } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s connect ack received from %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (NULL == peer) ? "UNKNOWN" : PRTE_NAME_PRINT(&(peer->name))); return true; @@ -1263,7 +1254,7 @@ void prte_oob_tcp_peer_dump(prte_oob_tcp_peer_t *peer, const char *msg) bool prte_oob_tcp_peer_accept(prte_oob_tcp_peer_t *peer) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp:peer_accept called for peer %s in state %s on socket %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name), prte_oob_tcp_state_print(peer->state), peer->sd); @@ -1282,11 +1273,6 @@ bool prte_oob_tcp_peer_accept(prte_oob_tcp_peer_t *peer) return false; } - /* set the peer into the component and OOB-level peer tables to indicate - * that we know this peer and we will be handling him - */ - PRTE_ACTIVATE_TCP_CMP_OP(peer, prte_mca_oob_tcp_component_set_module); - tcp_peer_connected(peer); if (!peer->recv_ev_active) { peer->recv_ev_active = true; @@ -1294,13 +1280,13 @@ bool prte_oob_tcp_peer_accept(prte_oob_tcp_peer_t *peer) prte_event_add(&peer->recv_event, 0); } if (OOB_TCP_DEBUG_CONNECT - <= pmix_output_get_verbosity(prte_oob_base_framework.framework_output)) { + <= pmix_output_get_verbosity(prte_oob_base.output)) { prte_oob_tcp_peer_dump(peer, "accepted"); } return true; } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp:peer_accept ignored for peer %s in state %s on socket %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name), prte_oob_tcp_state_print(peer->state), peer->sd); diff --git a/src/mca/oob/tcp/oob_tcp_connection.h b/src/rml/oob/oob_tcp_connection.h similarity index 83% rename from src/mca/oob/tcp/oob_tcp_connection.h rename to src/rml/oob/oob_tcp_connection.h index db2272959c..731cfec6d1 100644 --- a/src/mca/oob/tcp/oob_tcp_connection.h +++ b/src/rml/oob/oob_tcp_connection.h @@ -15,7 +15,7 @@ * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,8 +35,8 @@ # include #endif -#include "oob_tcp.h" -#include "oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_peer.h" #include "src/threads/pmix_threads.h" /* State machine for connection operations */ @@ -56,7 +56,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_conn_op_t); #define PRTE_ACTIVATE_TCP_CONN_STATE(p, cbfunc) \ do { \ prte_oob_tcp_conn_op_t *cop; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base.output, \ "%s:[%s:%d] connect to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT((&(p)->name))); \ cop = PMIX_NEW(prte_oob_tcp_conn_op_t); \ @@ -76,7 +76,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_conn_op_t); #define PRTE_RETRY_TCP_CONN_STATE(p, cbfunc, tv) \ do { \ prte_oob_tcp_conn_op_t *cop; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base.output, \ "%s:[%s:%d] retry connect to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT((&(p)->name))); \ cop = PMIX_NEW(prte_oob_tcp_conn_op_t); \ @@ -86,12 +86,12 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_conn_op_t); prte_event_evtimer_add(&cop->ev, (tv)); \ } while (0); -PRTE_MODULE_EXPORT void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata); -PRTE_MODULE_EXPORT void prte_oob_tcp_peer_dump(prte_oob_tcp_peer_t *peer, const char *msg); -PRTE_MODULE_EXPORT bool prte_oob_tcp_peer_accept(prte_oob_tcp_peer_t *peer); -PRTE_MODULE_EXPORT void prte_oob_tcp_peer_complete_connect(prte_oob_tcp_peer_t *peer); -PRTE_MODULE_EXPORT int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *peer, int sd, +PRTE_EXPORT void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata); +PRTE_EXPORT void prte_oob_tcp_peer_dump(prte_oob_tcp_peer_t *peer, const char *msg); +PRTE_EXPORT bool prte_oob_tcp_peer_accept(prte_oob_tcp_peer_t *peer); +PRTE_EXPORT void prte_oob_tcp_peer_complete_connect(prte_oob_tcp_peer_t *peer); +PRTE_EXPORT int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *peer, int sd, prte_oob_tcp_hdr_t *dhdr); -PRTE_MODULE_EXPORT void prte_oob_tcp_peer_close(prte_oob_tcp_peer_t *peer); +PRTE_EXPORT void prte_oob_tcp_peer_close(prte_oob_tcp_peer_t *peer); #endif /* _MCA_OOB_TCP_CONNECTION_H_ */ diff --git a/src/mca/oob/tcp/oob_tcp_hdr.h b/src/rml/oob/oob_tcp_hdr.h similarity index 97% rename from src/mca/oob/tcp/oob_tcp_hdr.h rename to src/rml/oob/oob_tcp_hdr.h index e014ccdd37..05977156be 100644 --- a/src/mca/oob/tcp/oob_tcp_hdr.h +++ b/src/rml/oob/oob_tcp_hdr.h @@ -16,7 +16,7 @@ * Copyright (c) 2017-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/src/mca/oob/tcp/oob_tcp_listener.c b/src/rml/oob/oob_tcp_listener.c similarity index 81% rename from src/mca/oob/tcp/oob_tcp_listener.c rename to src/rml/oob/oob_tcp_listener.c index 90cf611878..0c11764bae 100644 --- a/src/mca/oob/tcp/oob_tcp_listener.c +++ b/src/rml/oob/oob_tcp_listener.c @@ -68,12 +68,11 @@ #include "src/util/pmix_parse_options.h" #include "src/util/pmix_show_help.h" -#include "src/mca/oob/tcp/oob_tcp.h" -#include "src/mca/oob/tcp/oob_tcp_common.h" -#include "src/mca/oob/tcp/oob_tcp_component.h" -#include "src/mca/oob/tcp/oob_tcp_connection.h" -#include "src/mca/oob/tcp/oob_tcp_listener.h" -#include "src/mca/oob/tcp/oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_common.h" +#include "src/rml/oob/oob_tcp_connection.h" +#include "src/rml/oob/oob_tcp_listener.h" +#include "src/rml/oob/oob_tcp_peer.h" static void connection_event_handler(int incoming_sd, short flags, void *cbdata); static void *listen_thread(pmix_object_t *obj); @@ -101,9 +100,9 @@ int prte_oob_tcp_start_listening(void) prte_oob_tcp_listener_t *listener; /* if we don't have any TCP interfaces, we shouldn't be here */ - if (NULL == prte_mca_oob_tcp_component.ipv4conns + if (NULL == prte_oob_base.ipv4conns #if PRTE_ENABLE_IPV6 - && NULL == prte_mca_oob_tcp_component.ipv6conns + && NULL == prte_oob_base.ipv6conns #endif ) { PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); @@ -128,25 +127,25 @@ int prte_oob_tcp_start_listening(void) * harvest connection requests as rapidly as possible */ if (PRTE_PROC_IS_MASTER) { - if (0 > pipe(prte_mca_oob_tcp_component.stop_thread)) { + if (0 > pipe(prte_oob_base.stop_thread)) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return PRTE_ERR_OUT_OF_RESOURCE; } /* Make sure the pipe FDs are set to close-on-exec so that they don't leak into children */ - if (pmix_fd_set_cloexec(prte_mca_oob_tcp_component.stop_thread[0]) != PRTE_SUCCESS - || pmix_fd_set_cloexec(prte_mca_oob_tcp_component.stop_thread[1]) != PRTE_SUCCESS) { - close(prte_mca_oob_tcp_component.stop_thread[0]); - close(prte_mca_oob_tcp_component.stop_thread[1]); + if (pmix_fd_set_cloexec(prte_oob_base.stop_thread[0]) != PRTE_SUCCESS + || pmix_fd_set_cloexec(prte_oob_base.stop_thread[1]) != PRTE_SUCCESS) { + close(prte_oob_base.stop_thread[0]); + close(prte_oob_base.stop_thread[1]); PRTE_ERROR_LOG(PRTE_ERR_IN_ERRNO); return PRTE_ERR_IN_ERRNO; } - prte_mca_oob_tcp_component.listen_thread_active = true; - prte_mca_oob_tcp_component.listen_thread.t_run = listen_thread; - prte_mca_oob_tcp_component.listen_thread.t_arg = NULL; - if (PRTE_SUCCESS != (rc = pmix_thread_start(&prte_mca_oob_tcp_component.listen_thread))) { + prte_oob_base.listen_thread_active = true; + prte_oob_base.listen_thread.t_run = listen_thread; + prte_oob_base.listen_thread.t_arg = NULL; + if (PRTE_SUCCESS != (rc = pmix_thread_start(&prte_oob_base.listen_thread))) { PRTE_ERROR_LOG(rc); pmix_output(0, "%s Unable to start listen thread", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); } @@ -155,7 +154,7 @@ int prte_oob_tcp_start_listening(void) /* otherwise, setup to listen via the event lib */ - PMIX_LIST_FOREACH(listener, &prte_mca_oob_tcp_component.listeners, prte_oob_tcp_listener_t) + PMIX_LIST_FOREACH(listener, &prte_oob_base.listeners, prte_oob_tcp_listener_t) { listener->ev_active = true; prte_event_set(prte_event_base, &listener->event, listener->sd, @@ -191,16 +190,16 @@ static int create_listen(void) * port in the range. Otherwise, tcp_port_min will be 0, which * means "pick any port" */ - if (NULL != prte_mca_oob_tcp_component.tcp_static_ports) { + if (NULL != prte_oob_base.tcp_static_ports) { /* if static ports were provided, take the * first entry in the list */ - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ports, prte_mca_oob_tcp_component.tcp_static_ports[0]); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ports, prte_oob_base.tcp_static_ports[0]); /* flag that we are using static ports */ prte_static_ports = true; - } else if (NULL != prte_mca_oob_tcp_component.tcp_dyn_ports) { + } else if (NULL != prte_oob_base.tcp_dyn_ports) { /* take the entire range */ - ports = PMIX_ARGV_COPY_COMPAT(prte_mca_oob_tcp_component.tcp_dyn_ports); + ports = PMIX_ARGV_COPY_COMPAT(prte_oob_base.tcp_dyn_ports); prte_static_ports = false; } else { /* flag the system to dynamically take any available port */ @@ -225,7 +224,7 @@ static int create_listen(void) * sockets to support more flexible wireup protocols */ for (i = 0; i < PMIX_ARGV_COUNT_COMPAT(ports); i++) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "%s attempting to bind to IPv4 port %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), ports[i]); /* get the port number */ @@ -239,7 +238,7 @@ static int create_listen(void) sd = socket(AF_INET, SOCK_STREAM, 0); if (sd < 0) { if (EAFNOSUPPORT != prte_socket_errno) { - pmix_output(0, "prte_mca_oob_tcp_component_init: socket() failed: %s (%d)", + pmix_output(0, "prte_oob_create_listen: socket() failed: %s (%d)", strerror(prte_socket_errno), prte_socket_errno); } PMIX_ARGV_FREE_COMPAT(ports); @@ -254,7 +253,7 @@ static int create_listen(void) } if (setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, (const char *) &flags, sizeof(flags)) < 0) { pmix_output(0, - "prte_oob_tcp_create_listen: unable to set the " + "prte_oob_create_listen: unable to set the " "SO_REUSEADDR option (%s:%d)\n", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); @@ -266,7 +265,7 @@ static int create_listen(void) this FD */ if (pmix_fd_set_cloexec(sd) != PRTE_SUCCESS) { pmix_output(0, - "prte_oob_tcp_create_listen: unable to set the " + "prte_oob_create_listen: unable to set the " "listening socket to CLOEXEC (%s:%d)\n", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); @@ -287,7 +286,7 @@ static int create_listen(void) } /* resolve assigned port */ if (getsockname(sd, (struct sockaddr *) &inaddr, &addrlen) < 0) { - pmix_output(0, "prte_oob_tcp_create_listen: getsockname(): %s (%d)", + pmix_output(0, "prte_oob_create_listen: getsockname(): %s (%d)", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); PMIX_ARGV_FREE_COMPAT(ports); @@ -296,7 +295,7 @@ static int create_listen(void) /* setup listen backlog to maximum allowed by kernel */ if (listen(sd, SOMAXCONN) < 0) { - pmix_output(0, "prte_mca_oob_tcp_component_init: listen(): %s (%d)", + pmix_output(0, "prte_oob_create_listen: listen(): %s (%d)", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); PMIX_ARGV_FREE_COMPAT(ports); @@ -305,7 +304,7 @@ static int create_listen(void) /* set socket up to be non-blocking, otherwise accept could block */ if ((flags = fcntl(sd, F_GETFL, 0)) < 0) { - pmix_output(0, "prte_mca_oob_tcp_component_init: fcntl(F_GETFL) failed: %s (%d)", + pmix_output(0, "prte_oob_create_listen init: fcntl(F_GETFL) failed: %s (%d)", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); PMIX_ARGV_FREE_COMPAT(ports); @@ -313,7 +312,7 @@ static int create_listen(void) } flags |= O_NONBLOCK; if (fcntl(sd, F_SETFL, flags) < 0) { - pmix_output(0, "prte_mca_oob_tcp_component_init: fcntl(F_SETFL) failed: %s (%d)", + pmix_output(0, "prte_oob_create_listen init: fcntl(F_SETFL) failed: %s (%d)", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); PMIX_ARGV_FREE_COMPAT(ports); @@ -328,13 +327,13 @@ static int create_listen(void) /* save the first one */ prte_process_info.my_port = conn->port; } - pmix_list_append(&prte_mca_oob_tcp_component.listeners, &conn->item); + pmix_list_append(&prte_oob_base.listeners, &conn->item); /* and to our ports */ pmix_asprintf(&tconn, "%d", ntohs(((struct sockaddr_in *) &inaddr)->sin_port)); - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_mca_oob_tcp_component.ipv4ports, tconn); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_oob_base.ipv4ports, tconn); free(tconn); if (OOB_TCP_DEBUG_CONNECT - <= pmix_output_get_verbosity(prte_oob_base_framework.framework_output)) { + <= pmix_output_get_verbosity(prte_oob_base.output)) { port = ntohs(((struct sockaddr_in *) &inaddr)->sin_port); pmix_output(0, "%s assigned IPv4 port %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), port); } @@ -347,7 +346,7 @@ static int create_listen(void) /* done with this, so release it */ PMIX_ARGV_FREE_COMPAT(ports); - if (0 == pmix_list_get_size(&prte_mca_oob_tcp_component.listeners)) { + if (0 == pmix_list_get_size(&prte_oob_base.listeners)) { /* cleanup */ if (0 <= sd) { CLOSE_THE_SOCKET(sd); @@ -384,16 +383,16 @@ static int create_listen6(void) * means "pick any port" */ if (PRTE_PROC_IS_DAEMON) { - if (NULL != prte_mca_oob_tcp_component.tcp6_static_ports) { + if (NULL != prte_oob_base.tcp6_static_ports) { /* if static ports were provided, take the * first entry in the list */ - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ports, prte_mca_oob_tcp_component.tcp6_static_ports[0]); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ports, prte_oob_base.tcp6_static_ports[0]); /* flag that we are using static ports */ prte_static_ports = true; - } else if (NULL != prte_mca_oob_tcp_component.tcp6_dyn_ports) { + } else if (NULL != prte_oob_base.tcp6_dyn_ports) { /* take the entire range */ - ports = PMIX_ARGV_COPY_COMPAT(prte_mca_oob_tcp_component.tcp6_dyn_ports); + ports = PMIX_ARGV_COPY_COMPAT(prte_oob_base.tcp6_dyn_ports); prte_static_ports = false; } else { /* flag the system to dynamically take any available port */ @@ -401,16 +400,16 @@ static int create_listen6(void) prte_static_ports = false; } } else { - if (NULL != prte_mca_oob_tcp_component.tcp6_static_ports) { + if (NULL != prte_oob_base.tcp6_static_ports) { /* if static ports were provided, take the * first entry in the list */ - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ports, prte_mca_oob_tcp_component.tcp6_static_ports[0]); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ports, prte_oob_base.tcp6_static_ports[0]); /* flag that we are using static ports */ prte_static_ports = true; - } else if (NULL != prte_mca_oob_tcp_component.tcp6_dyn_ports) { + } else if (NULL != prte_oob_base.tcp6_dyn_ports) { /* take the entire range */ - ports = PMIX_ARGV_COPY_COMPAT(prte_mca_oob_tcp_component.tcp6_dyn_ports); + ports = PMIX_ARGV_COPY_COMPAT(prte_oob_base.tcp6_dyn_ports); prte_static_ports = false; } else { /* flag the system to dynamically take any available port */ @@ -436,7 +435,7 @@ static int create_listen6(void) * sockets to support more flexible wireup protocols */ for (i = 0; i < PMIX_ARGV_COUNT_COMPAT(ports); i++) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "%s attempting to bind to IPv6 port %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), ports[i]); /* get the port number */ @@ -450,7 +449,7 @@ static int create_listen6(void) sd = socket(AF_INET6, SOCK_STREAM, 0); if (sd < 0) { if (EAFNOSUPPORT != prte_socket_errno) { - pmix_output(0, "prte_mca_oob_tcp_component_init: socket() failed: %s (%d)", + pmix_output(0, "prte_oob_create_listen6: socket() failed: %s (%d)", strerror(prte_socket_errno), prte_socket_errno); } return PRTE_ERR_IN_ERRNO; @@ -459,7 +458,7 @@ static int create_listen6(void) this FD */ if (pmix_fd_set_cloexec(sd) != PRTE_SUCCESS) { pmix_output(0, - "prte_oob_tcp_create_listen6: unable to set the " + "prte_oob_create_listen6: unable to set the " "listening socket to CLOEXEC (%s:%d)\n", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); @@ -496,7 +495,7 @@ static int create_listen6(void) } /* resolve assigned port */ if (getsockname(sd, (struct sockaddr *) &inaddr, &addrlen) < 0) { - pmix_output(0, "prte_oob_tcp_create_listen: getsockname(): %s (%d)", + pmix_output(0, "prte_oob_create_listen6: getsockname(): %s (%d)", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); return PRTE_ERROR; @@ -504,20 +503,20 @@ static int create_listen6(void) /* setup listen backlog to maximum allowed by kernel */ if (listen(sd, SOMAXCONN) < 0) { - pmix_output(0, "prte_mca_oob_tcp_component_init: listen(): %s (%d)", + pmix_output(0, "prte_oob_create_listen6: listen(): %s (%d)", strerror(prte_socket_errno), prte_socket_errno); return PRTE_ERROR; } /* set socket up to be non-blocking, otherwise accept could block */ if ((flags = fcntl(sd, F_GETFL, 0)) < 0) { - pmix_output(0, "prte_mca_oob_tcp_component_init: fcntl(F_GETFL) failed: %s (%d)", + pmix_output(0, "prte_oob_create_listen6: fcntl(F_GETFL) failed: %s (%d)", strerror(prte_socket_errno), prte_socket_errno); return PRTE_ERROR; } flags |= O_NONBLOCK; if (fcntl(sd, F_SETFL, flags) < 0) { - pmix_output(0, "prte_mca_oob_tcp_component_init: fcntl(F_SETFL) failed: %s (%d)", + pmix_output(0, "prte_oob_create_listen6: fcntl(F_SETFL) failed: %s (%d)", strerror(prte_socket_errno), prte_socket_errno); return PRTE_ERROR; } @@ -527,13 +526,13 @@ static int create_listen6(void) conn->tcp6 = true; conn->sd = sd; conn->port = ntohs(((struct sockaddr_in6 *) &inaddr)->sin6_port); - pmix_list_append(&prte_mca_oob_tcp_component.listeners, &conn->item); + pmix_list_append(&prte_oob_base.listeners, &conn->item); /* and to our ports */ pmix_asprintf(&tconn, "%d", ntohs(((struct sockaddr_in6 *) &inaddr)->sin6_port)); - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_mca_oob_tcp_component.ipv6ports, tconn); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_oob_base.ipv6ports, tconn); free(tconn); if (OOB_TCP_DEBUG_CONNECT - <= pmix_output_get_verbosity(prte_oob_base_framework.framework_output)) { + <= pmix_output_get_verbosity(prte_oob_base.output)) { pmix_output(0, "%s assigned IPv6 port %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (int) ntohs(((struct sockaddr_in6 *) &inaddr)->sin6_port)); } @@ -543,7 +542,7 @@ static int create_listen6(void) break; } } - if (0 == pmix_list_get_size(&prte_mca_oob_tcp_component.listeners)) { + if (0 == pmix_list_get_size(&prte_oob_base.listeners)) { /* cleanup */ CLOSE_THE_SOCKET(sd); PMIX_ARGV_FREE_COMPAT(ports); @@ -579,28 +578,27 @@ static void *listen_thread(pmix_object_t *obj) * to the event method for handling any further connections * so as to minimize overhead */ - while (prte_mca_oob_tcp_component.listen_thread_active) { + while (prte_oob_base.listen_thread_active) { FD_ZERO(&readfds); max = -1; - PMIX_LIST_FOREACH(listener, &prte_mca_oob_tcp_component.listeners, prte_oob_tcp_listener_t) + PMIX_LIST_FOREACH(listener, &prte_oob_base.listeners, prte_oob_tcp_listener_t) { FD_SET(listener->sd, &readfds); max = (listener->sd > max) ? listener->sd : max; } /* add the stop_thread fd */ - FD_SET(prte_mca_oob_tcp_component.stop_thread[0], &readfds); - max = (prte_mca_oob_tcp_component.stop_thread[0] > max) ? prte_mca_oob_tcp_component.stop_thread[0] - : max; + FD_SET(prte_oob_base.stop_thread[0], &readfds); + max = (prte_oob_base.stop_thread[0] > max) ? prte_oob_base.stop_thread[0] : max; /* set timeout interval */ - timeout.tv_sec = prte_mca_oob_tcp_component.listen_thread_tv.tv_sec; - timeout.tv_usec = prte_mca_oob_tcp_component.listen_thread_tv.tv_usec; + timeout.tv_sec = prte_oob_base.listen_thread_tv.tv_sec; + timeout.tv_usec = prte_oob_base.listen_thread_tv.tv_usec; /* Block in a select to avoid hammering the cpu. If a connection * comes in, we'll get woken up right away. */ rc = select(max + 1, &readfds, NULL, NULL, &timeout); - if (!prte_mca_oob_tcp_component.listen_thread_active) { + if (!prte_oob_base.listen_thread_active) { /* we've been asked to terminate */ return NULL; } @@ -617,7 +615,7 @@ static void *listen_thread(pmix_object_t *obj) */ do { accepted_connections = 0; - PMIX_LIST_FOREACH(listener, &prte_mca_oob_tcp_component.listeners, prte_oob_tcp_listener_t) + PMIX_LIST_FOREACH(listener, &prte_oob_base.listeners, prte_oob_tcp_listener_t) { sd = listener->sd; @@ -677,7 +675,7 @@ static void *listen_thread(pmix_object_t *obj) } } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_oob_tcp_listen_thread: incoming connection: " "(%d, %d) %s:%d\n", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), pending_connection->fd, @@ -716,28 +714,6 @@ static void *listen_thread(pmix_object_t *obj) } done: -#if 0 - /* once we complete the initial launch, the "flood" of connections - * will end - only connection requests from local procs, connect/accept - * operations across mpirun instances, or the occasional tool will need - * to be serviced. As these are relatively small events, we can easily - * handle them in the context of the event library and no longer require - * a separate connection harvesting thread. So switch over to the event - * lib handler now - */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s prte_oob_tcp_listen_thread: switching to event lib", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); - /* setup to listen via event library */ - PMIX_LIST_FOREACH(listener, &prte_mca_oob_tcp_component.listeners, prte_oob_tcp_listener_t) { - prte_event_set(prte_event_base, listener->event, - listener->sd, - PRTE_EV_READ|PRTE_EV_PERSIST, - connection_event_handler, - 0); - prte_event_add(listener->event, 0); - } -#endif return NULL; } @@ -753,7 +729,7 @@ static void connection_handler(int sd, short flags, void *cbdata) PMIX_ACQUIRE_OBJECT(new_connection); - pmix_output_verbose(4, prte_oob_base_framework.framework_output, + pmix_output_verbose(4, prte_oob_base.output, "%s connection_handler: working connection " "(%d, %d) %s:%d\n", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), new_connection->fd, prte_socket_errno, @@ -761,8 +737,8 @@ static void connection_handler(int sd, short flags, void *cbdata) pmix_net_get_port((struct sockaddr *) &new_connection->addr)); /* process the connection */ - prte_oob_tcp_module.accept_connection(new_connection->fd, - (struct sockaddr *) &(new_connection->addr)); + prte_oob_accept_connection(new_connection->fd, (struct sockaddr *) &(new_connection->addr)); + /* cleanup */ PMIX_RELEASE(new_connection); } @@ -778,7 +754,7 @@ static void connection_event_handler(int incoming_sd, short flags, void *cbdata) PRTE_HIDE_UNUSED_PARAMS(flags, cbdata); sd = accept(incoming_sd, (struct sockaddr *) &addr, &addrlen); - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s connection_event_handler: working connection " "(%d, %d) %s:%d\n", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), sd, prte_socket_errno, @@ -815,7 +791,7 @@ static void connection_event_handler(int incoming_sd, short flags, void *cbdata) } /* process the connection */ - prte_oob_tcp_module.accept_connection(sd, &addr); + prte_oob_accept_connection(sd, &addr); } static void tcp_ev_cons(prte_oob_tcp_listener_t *event) diff --git a/src/mca/oob/tcp/oob_tcp_listener.h b/src/rml/oob/oob_tcp_listener.h similarity index 93% rename from src/mca/oob/tcp/oob_tcp_listener.h rename to src/rml/oob/oob_tcp_listener.h index a109b15493..bfb4b984ae 100644 --- a/src/mca/oob/tcp/oob_tcp_listener.h +++ b/src/rml/oob/oob_tcp_listener.h @@ -15,7 +15,7 @@ * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,6 +60,6 @@ typedef struct { } prte_oob_tcp_pending_connection_t; PMIX_CLASS_DECLARATION(prte_oob_tcp_pending_connection_t); -PRTE_MODULE_EXPORT int prte_oob_tcp_start_listening(void); +PRTE_EXPORT int prte_oob_tcp_start_listening(void); #endif /* _MCA_OOB_TCP_LISTENER_H_ */ diff --git a/src/mca/oob/tcp/oob_tcp_peer.h b/src/rml/oob/oob_tcp_peer.h similarity index 96% rename from src/mca/oob/tcp/oob_tcp_peer.h rename to src/rml/oob/oob_tcp_peer.h index a5753538be..04cac5c3cd 100644 --- a/src/mca/oob/tcp/oob_tcp_peer.h +++ b/src/rml/oob/oob_tcp_peer.h @@ -17,7 +17,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights * reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,8 +32,8 @@ #include "src/event/event-internal.h" -#include "oob_tcp.h" -#include "oob_tcp_sendrecv.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_sendrecv.h" #include "src/threads/pmix_threads.h" typedef struct { diff --git a/src/mca/oob/tcp/oob_tcp_sendrecv.c b/src/rml/oob/oob_tcp_sendrecv.c similarity index 94% rename from src/mca/oob/tcp/oob_tcp_sendrecv.c rename to src/rml/oob/oob_tcp_sendrecv.c index 8b2b627fc0..6d473cb1b6 100644 --- a/src/mca/oob/tcp/oob_tcp_sendrecv.c +++ b/src/rml/oob/oob_tcp_sendrecv.c @@ -16,7 +16,7 @@ * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2017-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -71,11 +71,10 @@ #include "src/threads/pmix_threads.h" #include "src/util/name_fns.h" -#include "oob_tcp.h" -#include "src/mca/oob/tcp/oob_tcp_common.h" -#include "src/mca/oob/tcp/oob_tcp_component.h" -#include "src/mca/oob/tcp/oob_tcp_connection.h" -#include "src/mca/oob/tcp/oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_common.h" +#include "src/rml/oob/oob_tcp_connection.h" +#include "src/rml/oob/oob_tcp_peer.h" #define OOB_SEND_MAX_RETRIES 3 @@ -205,14 +204,14 @@ void prte_oob_tcp_send_handler(int sd, short flags, void *cbdata) PMIX_ACQUIRE_OBJECT(peer); msg = peer->send_msg; - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp:send_handler called to send to peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); switch (peer->state) { case MCA_OOB_TCP_CONNECTING: case MCA_OOB_TCP_CLOSED: - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp:send_handler %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), prte_oob_tcp_state_print(peer->state)); prte_oob_tcp_peer_complete_connect(peer); @@ -225,17 +224,17 @@ void prte_oob_tcp_send_handler(int sd, short flags, void *cbdata) } break; case MCA_OOB_TCP_CONNECTED: - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp:send_handler SENDING TO %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (NULL == peer->send_msg) ? "NULL" : PRTE_NAME_PRINT(&peer->name)); if (NULL != msg) { - pmix_output_verbose(2, prte_oob_base_framework.framework_output, + pmix_output_verbose(2, prte_oob_base.output, "oob:tcp:send_handler SENDING MSG"); if (PRTE_SUCCESS == (rc = send_msg(peer, msg))) { /* this msg is complete */ if (NULL != msg->data || NULL == msg->msg) { /* the relay is complete - release the data */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, + pmix_output_verbose(2, prte_oob_base.output, "%s MESSAGE RELAY COMPLETE TO %s OF %d BYTES ON SOCKET %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), @@ -244,7 +243,7 @@ void prte_oob_tcp_send_handler(int sd, short flags, void *cbdata) peer->send_msg = NULL; } else { /* we are done - notify the RML */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, + pmix_output_verbose(2, prte_oob_base.output, "%s MESSAGE SEND COMPLETE TO %s OF %d BYTES ON SOCKET %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), @@ -327,7 +326,7 @@ static int read_bytes(prte_oob_tcp_peer_t *peer) * the error back to the RML and let the caller know * to abort this message */ - pmix_output_verbose(OOB_TCP_DEBUG_FAIL, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_FAIL, prte_oob_base.output, "%s-%s prte_oob_tcp_msg_recv: readv failed: %s (%d)", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), strerror(prte_socket_errno), prte_socket_errno); @@ -340,7 +339,7 @@ static int read_bytes(prte_oob_tcp_peer_t *peer) /* the remote peer closed the connection - report that condition * and let the caller know */ - pmix_output_verbose(OOB_TCP_DEBUG_FAIL, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_FAIL, prte_oob_base.output, "%s-%s prte_oob_tcp_msg_recv: peer closed connection", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name))); /* stop all events */ @@ -390,14 +389,14 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) PMIX_ACQUIRE_OBJECT(peer); - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:recv:handler called for peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); switch (peer->state) { case MCA_OOB_TCP_CONNECT_ACK: if (PRTE_SUCCESS == (rc = prte_oob_tcp_peer_recv_connect_ack(peer, peer->sd, NULL))) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:recv:handler starting send/recv events", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); /* we connected! Start the send/recv events */ @@ -425,7 +424,7 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) /* we get an unreachable error returned if a connection * completes but is rejected - otherwise, we don't want * to terminate as we might be retrying the connection */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s UNABLE TO COMPLETE CONNECT ACK WITH %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); prte_event_del(&peer->recv_event); @@ -434,11 +433,11 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) } break; case MCA_OOB_TCP_CONNECTED: - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:recv:handler CONNECTED", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); /* allocate a new message and setup for recv */ if (NULL == peer->recv_msg) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:recv:handler allocate new recv msg", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); peer->recv_msg = PMIX_NEW(prte_oob_tcp_recv_t); @@ -454,7 +453,7 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) } /* if the header hasn't been completely read, read it */ if (!peer->recv_msg->hdr_recvd) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:recv:handler read hdr", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); if (PRTE_SUCCESS == (rc = read_bytes(peer))) { /* completed reading the header */ @@ -464,14 +463,14 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) /* if this is a zero-byte message, then we are done */ if (0 == peer->recv_msg->hdr.nbytes) { pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, - prte_oob_base_framework.framework_output, + prte_oob_base.output, "%s RECVD ZERO-BYTE MESSAGE FROM %s for tag %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name), peer->recv_msg->hdr.tag); peer->recv_msg->data = NULL; // make sure } else { pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, - prte_oob_base_framework.framework_output, + prte_oob_base.output, "%s:tcp:recv:handler allocate data region of size %lu", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (unsigned long) peer->recv_msg->hdr.nbytes); @@ -487,7 +486,7 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) return; } else { /* close the connection */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:recv:handler error reading bytes - closing connection", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); prte_oob_tcp_peer_close(peer); @@ -503,7 +502,7 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) if (PRTE_SUCCESS == (rc = read_bytes(peer))) { /* we recvd all of the message */ pmix_output_verbose( - OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s RECVD COMPLETE MESSAGE FROM %s (ORIGIN %s) OF %d BYTES FOR DEST %s TAG %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name), PRTE_NAME_PRINT(&peer->recv_msg->hdr.origin), (int) peer->recv_msg->hdr.nbytes, @@ -513,7 +512,7 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) if (PMIX_CHECK_PROCID(&peer->recv_msg->hdr.dst, PRTE_PROC_MY_NAME)) { /* yes - post it to the RML for delivery */ pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, - prte_oob_base_framework.framework_output, + prte_oob_base.output, "%s DELIVERING TO RML tag = %d seq_num = %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), peer->recv_msg->hdr.tag, peer->recv_msg->hdr.seq_num); @@ -525,7 +524,7 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) /* promote this to the OOB as some other transport might * be the next best hop */ pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, - prte_oob_base_framework.framework_output, + prte_oob_base.output, "%s TCP PROMOTING ROUTED MESSAGE FOR %s TO OOB", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->recv_msg->hdr.dst)); diff --git a/src/mca/oob/tcp/oob_tcp_sendrecv.h b/src/rml/oob/oob_tcp_sendrecv.h similarity index 95% rename from src/mca/oob/tcp/oob_tcp_sendrecv.h rename to src/rml/oob/oob_tcp_sendrecv.h index 973f0d0136..c654bc8abe 100644 --- a/src/mca/oob/tcp/oob_tcp_sendrecv.h +++ b/src/rml/oob/oob_tcp_sendrecv.h @@ -15,7 +15,7 @@ * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,8 +31,8 @@ #include "src/class/pmix_list.h" #include "src/util/pmix_string_copy.h" -#include "oob_tcp.h" -#include "oob_tcp_hdr.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_hdr.h" #include "src/rml/rml.h" #include "src/threads/pmix_threads.h" @@ -98,7 +98,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_recv_t); #define MCA_OOB_TCP_QUEUE_SEND(m, p) \ do { \ prte_oob_tcp_send_t *_s; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base.output, \ "%s:[%s:%d] queue send to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT(&((m)->dst))); \ _s = PMIX_NEW(prte_oob_tcp_send_t); \ @@ -130,7 +130,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_recv_t); #define MCA_OOB_TCP_QUEUE_PENDING(m, p) \ do { \ prte_oob_tcp_send_t *_s; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base.output, \ "%s:[%s:%d] queue pending to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT(&((m)->dst))); \ _s = PMIX_NEW(prte_oob_tcp_send_t); \ @@ -162,7 +162,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_recv_t); #define MCA_OOB_TCP_QUEUE_RELAY(m, p) \ do { \ prte_oob_tcp_send_t *_s; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base.output, \ "%s:[%s:%d] queue relay to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT(&((p)->name))); \ _s = PMIX_NEW(prte_oob_tcp_send_t); \ @@ -196,7 +196,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_msg_op_t); #define PRTE_ACTIVATE_TCP_POST_SEND(ms, cbfunc) \ do { \ prte_oob_tcp_msg_op_t *mop; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base.output, \ "%s:[%s:%d] post send to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT(&((ms)->dst))); \ mop = PMIX_NEW(prte_oob_tcp_msg_op_t); \ @@ -218,7 +218,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_msg_error_t); prte_oob_tcp_msg_error_t *mop; \ prte_oob_tcp_send_t *snd; \ prte_oob_tcp_recv_t *proxy; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base.output, \ "%s:[%s:%d] post msg error to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT((h))); \ mop = PMIX_NEW(prte_oob_tcp_msg_error_t); \ @@ -249,7 +249,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_msg_error_t); #define PRTE_ACTIVATE_TCP_NO_ROUTE(r, h, c) \ do { \ prte_oob_tcp_msg_error_t *mop; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base_.output, \ "%s:[%s:%d] post no route to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT((h))); \ mop = PMIX_NEW(prte_oob_tcp_msg_error_t); \ diff --git a/src/mca/oob/tcp/owner.txt b/src/rml/oob/owner.txt similarity index 100% rename from src/mca/oob/tcp/owner.txt rename to src/rml/oob/owner.txt diff --git a/src/rml/rml.c b/src/rml/rml.c index f8be31be90..be965841b0 100644 --- a/src/rml/rml.c +++ b/src/rml/rml.c @@ -24,12 +24,14 @@ #include "src/mca/mca.h" #include "src/util/pmix_output.h" -#include "src/mca/errmgr/errmgr.h" -#include "src/rml/rml.h" #include "src/mca/state/state.h" #include "src/runtime/prte_wait.h" #include "src/threads/pmix_threads.h" #include "src/util/name_fns.h" +#include "src/mca/errmgr/errmgr.h" +#include "src/rml/rml.h" +#include "src/rml/rml_contact.h" +#include "src/rml/oob/oob.h" prte_rml_base_t prte_rml_base = { .rml_output = -1, @@ -82,10 +84,22 @@ void prte_rml_register(void) pmix_mca_base_var_register_synonym(ret, "prte", "routed", "radix", NULL, PMIX_MCA_BASE_VAR_SYN_FLAG_DEPRECATED); + prte_oob_register(); + + verbosity = 0; + pmix_mca_base_var_register("prte", "oob", "base", "verbose", + "Debug verbosity of the out-of-band subsystem", + PMIX_MCA_BASE_VAR_TYPE_INT, + &verbosity); + if (0 < verbosity) { + prte_oob_base.output = pmix_output_open(NULL); + pmix_output_set_verbosity(prte_oob_base.output, verbosity); + } } void prte_rml_close(void) { + prte_oob_close(); PMIX_LIST_DESTRUCT(&prte_rml_base.posted_recvs); PMIX_LIST_DESTRUCT(&prte_rml_base.unmatched_msgs); PMIX_LIST_DESTRUCT(&prte_rml_base.children); @@ -94,8 +108,12 @@ void prte_rml_close(void) } } -void prte_rml_open(void) +int prte_rml_open(void) { + char *uri = NULL; + pmix_value_t val; + int ret; + /* construct object for holding the active plugin modules */ PMIX_CONSTRUCT(&prte_rml_base.posted_recvs, pmix_list_t); PMIX_CONSTRUCT(&prte_rml_base.unmatched_msgs, pmix_list_t); @@ -106,6 +124,54 @@ void prte_rml_open(void) prte_rml_compute_routing_tree(); prte_rml_base.lifeline = PRTE_PROC_MY_PARENT->rank; + + prte_oob_open(); + + /* store our URI for later */ + prte_oob_base_get_addr(&uri); + PMIX_VALUE_LOAD(&val, uri, PMIX_STRING); + ret = PMIx_Store_internal(PRTE_PROC_MY_NAME, PMIX_PROC_URI, &val); + if (PMIX_SUCCESS != ret) { + PRTE_ERROR_LOG(PRTE_ERROR); + PMIX_VALUE_DESTRUCT(&val); + return PRTE_ERROR; + } + PMIX_VALUE_DESTRUCT(&val); + // add it to our local info + prte_process_info.my_uri = strdup(uri); + + if (PRTE_PROC_IS_MASTER) { + prte_process_info.my_hnp_uri = uri; + } else { + free(uri); + if (NULL == prte_process_info.my_hnp_uri) { + // this is an error + PRTE_ERROR_LOG(PRTE_ERROR); + return PRTE_ERROR; + } + /* extract the HNP's name so we can update the routing table */ + ret = prte_rml_parse_uris(prte_process_info.my_hnp_uri, + PRTE_PROC_MY_HNP, + NULL); + if (PRTE_SUCCESS != ret) { + PRTE_ERROR_LOG(ret); + return ret; + } + /* Set the contact info in the RML - this won't actually establish + * the connection, but just tells the RML how to reach the HNP + * if/when we attempt to send to it + */ + PMIX_VALUE_LOAD(&val, prte_process_info.my_hnp_uri, PMIX_STRING); + ret = PMIx_Store_internal(PRTE_PROC_MY_HNP, PMIX_PROC_URI, &val); + if (PMIX_SUCCESS != ret) { + PRTE_ERROR_LOG(ret); + PMIX_VALUE_DESTRUCT(&val); + return ret; + } + PMIX_VALUE_DESTRUCT(&val); + } + + return PRTE_SUCCESS; } void prte_rml_send_callback(int status, pmix_proc_t *peer, diff --git a/src/rml/rml.h b/src/rml/rml.h index dff3bc801e..7cb46aaed1 100644 --- a/src/rml/rml.h +++ b/src/rml/rml.h @@ -17,7 +17,7 @@ * and Technology (RIST). All rights reserved. * * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -145,6 +145,7 @@ PRTE_EXPORT void prte_rml_recv_cancel(pmix_proc_t *peer, prte_rml_tag_t tag); typedef struct { int rml_output; int routed_output; + int oob_output; int max_retries; pmix_list_t posted_recvs; pmix_list_t unmatched_msgs; @@ -158,7 +159,7 @@ PRTE_EXPORT extern prte_rml_base_t prte_rml_base; PRTE_EXPORT void prte_rml_register(void); PRTE_EXPORT void prte_rml_close(void); -PRTE_EXPORT void prte_rml_open(void); +PRTE_EXPORT int prte_rml_open(void); /* common implementations */ PRTE_EXPORT void prte_rml_base_post_recv(int sd, short args, void *cbdata); PRTE_EXPORT void prte_rml_base_process_msg(int fd, short flags, void *cbdata); diff --git a/src/rml/rml_send.c b/src/rml/rml_send.c index a429650476..8a81f143a4 100644 --- a/src/rml/rml_send.c +++ b/src/rml/rml_send.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,11 +31,11 @@ #include "src/util/pmix_name_fns.h" #include "src/mca/errmgr/errmgr.h" -#include "src/mca/oob/base/base.h" #include "src/runtime/prte_globals.h" #include "src/threads/pmix_threads.h" #include "src/rml/rml.h" +#include "src/rml/oob/oob.h" int prte_rml_send_buffer_nb(pmix_rank_t rank, pmix_data_buffer_t *buffer, diff --git a/src/runtime/prte_init.c b/src/runtime/prte_init.c index 5e7a234f63..6e79131572 100644 --- a/src/runtime/prte_init.c +++ b/src/runtime/prte_init.c @@ -72,7 +72,6 @@ #include "src/mca/grpcomm/base/base.h" #include "src/mca/iof/base/base.h" #include "src/mca/odls/base/base.h" -#include "src/mca/oob/base/base.h" #include "src/mca/plm/base/base.h" #include "src/mca/pmdl/base/base.h" #include "src/mca/prtebacktrace/base/base.h" diff --git a/src/util/proc_info.c b/src/util/proc_info.c index 0923b962eb..8e153fb388 100644 --- a/src/util/proc_info.c +++ b/src/util/proc_info.c @@ -64,6 +64,7 @@ PRTE_EXPORT prte_process_info_t prte_process_info = { .aliases = NULL, .pid = 0, .proc_type = PRTE_PROC_TYPE_NONE, + .my_uri = NULL, .my_port = 0, .tmpdir_base = NULL, .top_session_dir = NULL, diff --git a/src/util/proc_info.h b/src/util/proc_info.h index 85e53ba5af..fe970e5ce7 100644 --- a/src/util/proc_info.h +++ b/src/util/proc_info.h @@ -75,6 +75,7 @@ typedef struct prte_process_info_t { char **aliases; /**< aliases for this node */ pid_t pid; /**< Local process ID for this process */ prte_proc_type_t proc_type; /**< Type of process */ + char *my_uri; /**< My contact info */ uint16_t my_port; /**< TCP port for out-of-band comm */ /* The session directory has the form * ///, where the prefix