diff --git a/src/mca/ess/base/ess_base_std_prted.c b/src/mca/ess/base/ess_base_std_prted.c index 4e48cce7d6..e130b0fa0b 100644 --- a/src/mca/ess/base/ess_base_std_prted.c +++ b/src/mca/ess/base/ess_base_std_prted.c @@ -53,7 +53,6 @@ #include "src/mca/grpcomm/grpcomm.h" #include "src/mca/iof/base/base.h" #include "src/mca/odls/base/base.h" -#include "src/mca/oob/base/base.h" #include "src/mca/plm/base/base.h" #include "src/mca/prtereachable/base/base.h" #include "src/mca/rmaps/base/base.h" @@ -99,9 +98,8 @@ int prte_ess_base_prted_setup(void) int fd; char log_file[PATH_MAX]; char *error = NULL; - char *uri = NULL; - char *tmp; - prte_job_t *jdata; + char *tmp = NULL; + prte_job_t *jdata = NULL; prte_proc_t *proc; prte_app_context_t *app; hwloc_obj_t obj; @@ -109,7 +107,6 @@ int prte_ess_base_prted_setup(void) prte_topology_t *t; prte_ess_base_signal_t *sig; int idx; - pmix_value_t val; plm_in_use = false; @@ -245,7 +242,7 @@ int prte_ess_base_prted_setup(void) /* obviously, we have "reported" */ jdata->num_reported = 1; - /* setup my session directory here as the OOB may need it */ + /* setup my session directory here */ PMIX_OUTPUT_VERBOSE( (2, prte_ess_base_framework.framework_output, "%s setting up session dir with\n\ttmpdir: %s\n\thost %s", @@ -316,61 +313,15 @@ int prte_ess_base_prted_setup(void) error = "prte_prtereachable_base_select"; goto error; } - if (PRTE_SUCCESS - != (ret = pmix_mca_base_framework_open(&prte_oob_base_framework, - PMIX_MCA_BASE_OPEN_DEFAULT))) { - PRTE_ERROR_LOG(ret); - error = "prte_oob_base_open"; - goto error; - } - if (PRTE_SUCCESS != (ret = prte_oob_base_select())) { + if (PRTE_SUCCESS != (ret = prte_rml_open())) { PRTE_ERROR_LOG(ret); - error = "prte_oob_base_select"; + error = "prte_rml_open"; goto error; } - prte_rml_open(); /* it is now safe to start the pmix server */ pmix_server_start(); - /* store our URI for later */ - prte_oob_base_get_addr(&uri); - PMIX_VALUE_LOAD(&val, uri, PMIX_STRING); - ret = PMIx_Store_internal(PRTE_PROC_MY_NAME, PMIX_PROC_URI, &val); - if (PMIX_SUCCESS != ret) { - PMIX_VALUE_DESTRUCT(&val); - error = "store MY URI"; - ret = PRTE_ERROR; - goto error; - } - PMIX_VALUE_DESTRUCT(&val); - free(uri); - - if (NULL != prte_process_info.my_hnp_uri) { - /* extract the HNP's name so we can update the routing table */ - ret = prte_rml_parse_uris(prte_process_info.my_hnp_uri, - PRTE_PROC_MY_HNP, - NULL); - if (PRTE_SUCCESS != ret) { - PRTE_ERROR_LOG(ret); - error = "prte_rml_parse_HNP"; - goto error; - } - /* Set the contact info in the RML - this won't actually establish - * the connection, but just tells the RML how to reach the HNP - * if/when we attempt to send to it - */ - PMIX_VALUE_LOAD(&val, prte_process_info.my_hnp_uri, PMIX_STRING); - ret = PMIx_Store_internal(PRTE_PROC_MY_HNP, PMIX_PROC_URI, &val); - if (PMIX_SUCCESS != ret) { - PMIX_VALUE_DESTRUCT(&val); - error = "store HNP URI"; - ret = PRTE_ERROR; - goto error; - } - PMIX_VALUE_DESTRUCT(&val); - } - /* select the errmgr */ if (PRTE_SUCCESS != (ret = prte_errmgr_base_select())) { PRTE_ERROR_LOG(ret); @@ -547,7 +498,6 @@ int prte_ess_base_prted_finalize(void) (void) pmix_mca_base_framework_close(&prte_odls_base_framework); (void) pmix_mca_base_framework_close(&prte_errmgr_base_framework); prte_rml_close(); - (void) pmix_mca_base_framework_close(&prte_oob_base_framework); (void) pmix_mca_base_framework_close(&prte_prtereachable_base_framework); (void) pmix_mca_base_framework_close(&prte_state_base_framework); diff --git a/src/mca/ess/hnp/ess_hnp_module.c b/src/mca/ess/hnp/ess_hnp_module.c index ca0185349f..14b0ad4e59 100644 --- a/src/mca/ess/hnp/ess_hnp_module.c +++ b/src/mca/ess/hnp/ess_hnp_module.c @@ -59,7 +59,6 @@ #include "src/mca/grpcomm/base/base.h" #include "src/mca/iof/base/base.h" #include "src/mca/odls/base/base.h" -#include "src/mca/oob/base/base.h" #include "src/mca/plm/base/base.h" #include "src/mca/plm/plm.h" #include "src/mca/prtereachable/base/base.h" @@ -107,8 +106,6 @@ static int rte_init(int argc, char **argv) prte_app_context_t *app; int idx; prte_topology_t *t; - pmix_value_t pval; - pmix_status_t pret; PRTE_HIDE_UNUSED_PARAMS(argc); /* run the prolog */ @@ -234,7 +231,7 @@ static int rte_init(int argc, char **argv) jdata->num_reported = 1; jdata->num_daemons_reported = 1; - /* setup my session directory here as the OOB may need it */ + /* setup my session directory here */ PMIX_OUTPUT_VERBOSE((2, prte_debug_output, "%s setting up session dir with\n\ttmpdir: %s\n\thost %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), @@ -268,38 +265,11 @@ static int rte_init(int argc, char **argv) error = "prte_prtereachable_base_select"; goto error; } - /* - * OOB Layer - */ - if (PRTE_SUCCESS - != (ret = pmix_mca_base_framework_open(&prte_oob_base_framework, - PMIX_MCA_BASE_OPEN_DEFAULT))) { - error = "prte_oob_base_open"; - goto error; - } - if (PRTE_SUCCESS != (ret = prte_oob_base_select())) { - error = "prte_oob_base_select"; - goto error; - } - - // set our RML address - prte_oob_base_get_addr(&proc->rml_uri); - prte_process_info.my_hnp_uri = strdup(proc->rml_uri); - /* store it in the local PMIx repo for later retrieval */ - PMIX_VALUE_LOAD(&pval, proc->rml_uri, PMIX_STRING); - if (PMIX_SUCCESS != (pret = PMIx_Store_internal(PRTE_PROC_MY_NAME, PMIX_PROC_URI, &pval))) { - PMIX_ERROR_LOG(pret); - ret = PRTE_ERROR; - PMIX_VALUE_DESTRUCT(&pval); - error = "store uri"; + if (PRTE_SUCCESS != (ret = prte_rml_open())) { + PRTE_ERROR_LOG(ret); + error = "prte_rml_open"; goto error; } - PMIX_VALUE_DESTRUCT(&pval); - - /* - * Runtime Messaging Layer - */ - prte_rml_open(); /* it is now safe to start the pmix server */ pmix_server_start(); @@ -471,7 +441,7 @@ static int rte_init(int argc, char **argv) static int rte_finalize(void) { /* first stage shutdown of the errmgr, deregister the handler but keep - * the required facilities until the rml and oob are offline */ + * the required facilities until the rml is offline */ prte_errmgr.finalize(); /* close frameworks */ @@ -486,7 +456,6 @@ static int rte_finalize(void) (void) pmix_mca_base_framework_close(&prte_rtc_base_framework); (void) pmix_mca_base_framework_close(&prte_odls_base_framework); prte_rml_close(); - (void) pmix_mca_base_framework_close(&prte_oob_base_framework); (void) pmix_mca_base_framework_close(&prte_prtereachable_base_framework); (void) pmix_mca_base_framework_close(&prte_errmgr_base_framework); (void) pmix_mca_base_framework_close(&prte_state_base_framework); diff --git a/src/mca/oob/Makefile.am b/src/mca/oob/Makefile.am deleted file mode 100644 index 1a97fbbe79..0000000000 --- a/src/mca/oob/Makefile.am +++ /dev/null @@ -1,40 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved -# Copyright (c) 2019 Intel, Inc. All rights reserved. -# Copyright (c) 2022 Nanook Consulting. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libprtemca_oob.la -libprtemca_oob_la_SOURCES = - -# pkgdata setup -dist_prtedata_DATA = - -# local files -headers = oob.h -libprtemca_oob_la_SOURCES += $(headers) - -# Conditionally install the header files -prtedir = $(prteincludedir)/$(subdir) -nobase_prte_HEADERS = $(headers) - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/src/mca/oob/base/Makefile.am b/src/mca/oob/base/Makefile.am deleted file mode 100644 index b3cdb6a3d1..0000000000 --- a/src/mca/oob/base/Makefile.am +++ /dev/null @@ -1,32 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights -# reserved. -# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. -# Copyright (c) 2020 Cisco Systems, Inc. All rights reserved -# Copyright (c) 2022 Nanook Consulting. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_prtedata_DATA += base/help-oob-base.txt - -headers += \ - base/base.h - -libprtemca_oob_la_SOURCES += \ - base/oob_base_stubs.c \ - base/oob_base_frame.c \ - base/oob_base_select.c diff --git a/src/mca/oob/base/oob_base_frame.c b/src/mca/oob/base/oob_base_frame.c deleted file mode 100644 index 9102fa94d9..0000000000 --- a/src/mca/oob/base/oob_base_frame.c +++ /dev/null @@ -1,111 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015-2019 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2017-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "prte_config.h" -#include "constants.h" - -#include "src/class/pmix_bitmap.h" -#include "src/mca/base/pmix_base.h" -#include "src/mca/mca.h" -#include "src/pmix/pmix-internal.h" -#include "src/runtime/prte_progress_threads.h" -#include "src/util/pmix_output.h" - -#include "src/mca/oob/base/base.h" -#include "src/rml/rml.h" - -/* - * The following file was created by configure. It contains extern - * statements and the definition of an array of pointers to each - * component's public pmix_mca_base_component_t struct. - */ - -#include "src/mca/oob/base/static-components.h" - -/* - * Global variables - */ -prte_oob_base_t prte_oob_base = {0}; - -static int prte_oob_base_close(void) -{ - prte_oob_base_component_t *component; - pmix_mca_base_component_list_item_t *cli; - - /* shutdown all active transports */ - while (NULL - != (cli = (pmix_mca_base_component_list_item_t *) pmix_list_remove_first( - &prte_oob_base.actives))) { - component = (prte_oob_base_component_t *) cli->cli_component; - if (NULL != component->shutdown) { - component->shutdown(); - } - PMIX_RELEASE(cli); - } - - /* destruct our internal lists */ - PMIX_DESTRUCT(&prte_oob_base.actives); - - /* release all peers from the list */ - PMIX_LIST_DESTRUCT(&prte_oob_base.peers); - - return pmix_mca_base_framework_components_close(&prte_oob_base_framework, NULL); -} - -/** - * Function for finding and opening either all MCA components, - * or the one that was specifically requested via a MCA parameter. - */ -static int prte_oob_base_open(pmix_mca_base_open_flag_t flags) -{ - /* setup globals */ - prte_oob_base.max_uri_length = -1; - PMIX_CONSTRUCT(&prte_oob_base.peers, pmix_list_t); - PMIX_CONSTRUCT(&prte_oob_base.actives, pmix_list_t); - - /* Open up all available components */ - return pmix_mca_base_framework_components_open(&prte_oob_base_framework, flags); -} - -PMIX_MCA_BASE_FRAMEWORK_DECLARE(prte, oob, "Out-of-Band Messaging Subsystem", NULL, - prte_oob_base_open, prte_oob_base_close, - prte_oob_base_static_components, - PMIX_MCA_BASE_FRAMEWORK_FLAG_DEFAULT); - -PMIX_CLASS_INSTANCE(prte_oob_send_t, pmix_object_t, NULL, NULL); - -static void pr_cons(prte_oob_base_peer_t *ptr) -{ - PMIX_LOAD_PROCID(&ptr->name, NULL, PMIX_RANK_INVALID); - ptr->component = NULL; - PMIX_CONSTRUCT(&ptr->addressable, pmix_bitmap_t); - pmix_bitmap_init(&ptr->addressable, 8); -} -static void pr_des(prte_oob_base_peer_t *ptr) -{ - PMIX_DESTRUCT(&ptr->addressable); -} -PMIX_CLASS_INSTANCE(prte_oob_base_peer_t, pmix_list_item_t, pr_cons, pr_des); diff --git a/src/mca/oob/base/oob_base_select.c b/src/mca/oob/base/oob_base_select.c deleted file mode 100644 index 290a9589f1..0000000000 --- a/src/mca/oob/base/oob_base_select.c +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "prte_config.h" -#include "constants.h" - -#include -#include - -#include "src/mca/base/pmix_base.h" -#include "src/mca/mca.h" -#include "src/util/pmix_output.h" - -#include "src/util/pmix_show_help.h" - -#include "src/mca/oob/base/base.h" -#include "src/mca/oob/oob.h" -#include "src/runtime/prte_globals.h" - -/** - * Function for selecting all runnable modules from those that are - * available. - * - * Call the init function on all available modules. - */ -int prte_oob_base_select(void) -{ - pmix_mca_base_component_list_item_t *cli, *cmp, *c2; - prte_oob_base_component_t *component, *c3; - bool added; - int i, rc; - - /* Query all available components and ask if their transport is available */ - PMIX_LIST_FOREACH(cli, &prte_oob_base_framework.framework_components, - pmix_mca_base_component_list_item_t) - { - component = (prte_oob_base_component_t *) cli->cli_component; - - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: checking available component %s", - component->oob_base.pmix_mca_component_name); - - /* If there's no query function, skip it */ - if (NULL == component->available) { - pmix_output_verbose( - 5, prte_oob_base_framework.framework_output, - "mca:oob:select: Skipping component [%s]. It does not implement a query function", - component->oob_base.pmix_mca_component_name); - continue; - } - - /* Query the component */ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: Querying component [%s]", - component->oob_base.pmix_mca_component_name); - - rc = component->available(); - - /* If the component is not available, then skip it as - * it has no available interfaces - */ - if (PRTE_SUCCESS != rc && PRTE_ERR_FORCE_SELECT != rc) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: Skipping component [%s] - no available interfaces", - component->oob_base.pmix_mca_component_name); - continue; - } - - /* if it fails to startup, then skip it */ - if (PRTE_SUCCESS != component->startup()) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: Skipping component [%s] - failed to startup", - component->oob_base.pmix_mca_component_name); - continue; - } - - if (PRTE_ERR_FORCE_SELECT == rc) { - /* this component shall be the *only* component allowed - * for use, so shutdown and remove any prior ones */ - while (NULL - != (cmp = (pmix_mca_base_component_list_item_t *) pmix_list_remove_first( - &prte_oob_base.actives))) { - c3 = (prte_oob_base_component_t *) cmp->cli_component; - if (NULL != c3->shutdown) { - c3->shutdown(); - } - PMIX_RELEASE(cmp); - } - c2 = PMIX_NEW(pmix_mca_base_component_list_item_t); - c2->cli_component = (pmix_mca_base_component_t *) component; - pmix_list_append(&prte_oob_base.actives, &c2->super); - break; - } - - /* record it, but maintain priority order */ - added = false; - PMIX_LIST_FOREACH(cmp, &prte_oob_base.actives, pmix_mca_base_component_list_item_t) - { - c3 = (prte_oob_base_component_t *) cmp->cli_component; - if (c3->priority > component->priority) { - continue; - } - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: Inserting component"); - c2 = PMIX_NEW(pmix_mca_base_component_list_item_t); - c2->cli_component = (pmix_mca_base_component_t *) component; - pmix_list_insert_pos(&prte_oob_base.actives, &cmp->super, &c2->super); - added = true; - break; - } - if (!added) { - /* add to end */ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: Adding component to end"); - c2 = PMIX_NEW(pmix_mca_base_component_list_item_t); - c2->cli_component = (pmix_mca_base_component_t *) component; - pmix_list_append(&prte_oob_base.actives, &c2->super); - } - } - - if (0 == pmix_list_get_size(&prte_oob_base.actives)) { - /* no support available means we really cannot run */ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: Init failed to return any available transports"); - pmix_show_help("help-oob-base.txt", "no-interfaces-avail", true); - return PRTE_ERR_SILENT; - } - - /* provide them an index so we can track their usability in a bitmap */ - i = 0; - PMIX_LIST_FOREACH(cmp, &prte_oob_base.actives, pmix_mca_base_component_list_item_t) - { - c3 = (prte_oob_base_component_t *) cmp->cli_component; - c3->idx = i++; - } - - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "mca:oob:select: Found %d active transports", - (int) pmix_list_get_size(&prte_oob_base.actives)); - return PRTE_SUCCESS; -} diff --git a/src/mca/oob/base/oob_base_stubs.c b/src/mca/oob/base/oob_base_stubs.c deleted file mode 100644 index 16f380a730..0000000000 --- a/src/mca/oob/base/oob_base_stubs.c +++ /dev/null @@ -1,382 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "prte_config.h" -#include "constants.h" - -#include "src/pmix/pmix-internal.h" -#include "src/runtime/prte_globals.h" -#include "src/util/pmix_argv.h" -#include "src/util/pmix_output.h" -#include "src/util/pmix_printf.h" - -#include "src/mca/errmgr/errmgr.h" -#include "src/mca/oob/base/base.h" -#include "src/rml/rml.h" -#include "src/mca/state/state.h" -#include "src/threads/pmix_threads.h" - -static prte_oob_base_peer_t* process_uri(char *uri); - -void prte_oob_base_send_nb(int fd, short args, void *cbdata) -{ - prte_oob_send_t *cd = (prte_oob_send_t *) cbdata; - prte_rml_send_t *msg; - pmix_mca_base_component_list_item_t *cli; - prte_oob_base_peer_t *pr; - int rc; - bool msg_sent; - prte_oob_base_component_t *component; - bool reachable; - char *uri; - PRTE_HIDE_UNUSED_PARAMS(fd, args); - - PMIX_ACQUIRE_OBJECT(cd); - - /* done with this. release it now */ - msg = cd->msg; - PMIX_RELEASE(cd); - - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s oob:base:send to target %s - attempt %u", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&msg->dst), - msg->retries); - - /* don't try forever - if we have exceeded the number of retries, - * then report this message as undeliverable even if someone continues - * to think they could reach it */ - if (prte_rml_base.max_retries <= msg->retries) { - msg->status = PRTE_ERR_NO_PATH_TO_TARGET; - PRTE_RML_SEND_COMPLETE(msg); - return; - } - - /* check if we have this peer in our list */ - pr = prte_oob_base_get_peer(&msg->dst); - if (NULL == pr) { - /* if we are abnormally terminating, or terminating the DVM, then - * don't bother looking for it */ - if (prte_abnormal_term_ordered || prte_never_launched || prte_dvm_abort_ordered) { - return; - } - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s oob:base:send unknown peer %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - PRTE_NAME_PRINT(&msg->dst)); - /* for direct launched procs, the URI might be in the database, - * so check there next - if it is, the peer object will be added - * to our hash table. However, we don't want to chase up to the - * server after it, so indicate it is optional - */ - PRTE_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_PROC_URI, &msg->dst, (char **) &uri, PMIX_STRING); - if (PRTE_SUCCESS == rc) { - if (NULL != uri) { - pr = process_uri(uri); - if (NULL == pr) { - /* that is just plain wrong */ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s oob:base:send addressee unknown %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - PRTE_NAME_PRINT(&msg->dst)); - PRTE_ERROR_LOG(PRTE_ERR_ADDRESSEE_UNKNOWN); - msg->status = PRTE_ERR_ADDRESSEE_UNKNOWN; - PRTE_RML_SEND_COMPLETE(msg); - return; - } - } else { - PRTE_ERROR_LOG(PRTE_ERR_ADDRESSEE_UNKNOWN); - msg->status = PRTE_ERR_ADDRESSEE_UNKNOWN; - PRTE_RML_SEND_COMPLETE(msg); - return; - } - } else { - /* even though we don't know about this peer yet, we still might - * be able to get to it via routing, so ask each component if - * it can reach it - */ - reachable = false; - pr = NULL; - PMIX_LIST_FOREACH(cli, &prte_oob_base.actives, pmix_mca_base_component_list_item_t) - { - component = (prte_oob_base_component_t *) cli->cli_component; - if (NULL != component->is_reachable) { - if (component->is_reachable(&msg->dst)) { - /* there is a way to reach this peer - record it - * so we don't waste this time again - */ - if (NULL == pr) { - pr = PMIX_NEW(prte_oob_base_peer_t); - PMIX_XFER_PROCID(&pr->name, &msg->dst); - pmix_list_append(&prte_oob_base.peers, &pr->super); - } - /* mark that this component can reach the peer */ - pmix_bitmap_set_bit(&pr->addressable, component->idx); - /* flag that at least one component can reach this peer */ - reachable = true; - } - } - } - /* if nobody could reach it, then that's an error */ - if (!reachable) { - /* if we are a daemon or HNP, then it could be that - * this is a local proc we just haven't heard from - * yet due to a race condition. Check that situation */ - if (PRTE_PROC_IS_DAEMON || PRTE_PROC_IS_MASTER) { - ++msg->retries; - if (msg->retries < prte_rml_base.max_retries) { - PRTE_OOB_SEND(msg); - return; - } - } - msg->status = PRTE_ERR_ADDRESSEE_UNKNOWN; - PRTE_RML_SEND_COMPLETE(msg); - return; - } - } - } - - /* if we already have a connection to this peer, use it */ - if (NULL != pr->component) { - /* post this msg for send by this transport - the component - * runs on our event base, so we can just call their function - */ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s oob:base:send known transport for peer %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&msg->dst)); - if (PRTE_SUCCESS == (rc = pr->component->send_nb(msg))) { - return; - } - } - - /* if we haven't identified a transport to this peer, - * loop across all available components in priority order until - * one replies that it has a module that can reach this peer. - * Let it try to make the connection - */ - msg_sent = false; - PMIX_LIST_FOREACH(cli, &prte_oob_base.actives, pmix_mca_base_component_list_item_t) - { - component = (prte_oob_base_component_t *) cli->cli_component; - /* is this peer reachable via this component? */ - if (!component->is_reachable(&msg->dst)) { - continue; - } - /* it is addressable, so attempt to send via that transport */ - if (PRTE_SUCCESS == (rc = component->send_nb(msg))) { - /* the msg status will be set upon send completion/failure */ - msg_sent = true; - /* point to this transport for any future messages */ - pr->component = component; - break; - } else if (PRTE_ERR_TAKE_NEXT_OPTION != rc) { - /* components return "next option" if they can't connect - * to this peer. anything else is a true error. - */ - PRTE_ERROR_LOG(rc); - msg->status = rc; - PRTE_RML_SEND_COMPLETE(msg); - return; - } - } - - /* if no component can reach this peer, that's an error - post - * it back to the RML for handling - */ - if (!msg_sent) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s oob:base:send no path to target %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&msg->dst)); - msg->status = PRTE_ERR_NO_PATH_TO_TARGET; - PRTE_RML_SEND_COMPLETE(msg); - } -} - -/** - * Obtain a uri for initial connection purposes - * - * During initial wireup, we can only transfer contact info on the daemon - * command line. This limits what we can send to a string representation of - * the actual contact info, which gets sent in a uri-like form. Not every - * oob module can support this transaction, so this function will loop - * across all oob components/modules, letting each add to the uri string if - * it supports bootstrap operations. An error will be returned in the cbfunc - * if NO component can successfully provide a contact. - * - * Note: since there is a limit to what an OS will allow on a cmd line, we - * impose a limit on the length of the resulting uri via an MCA param. The - * default value of -1 implies unlimited - however, users with large numbers - * of interfaces on their nodes may wish to restrict the size. - */ -void prte_oob_base_get_addr(char **uri) -{ - char *turi, *final = NULL, *tmp; - size_t len = 0; - bool one_added = false; - pmix_mca_base_component_list_item_t *cli; - prte_oob_base_component_t *component; - pmix_status_t rc; - - /* start with our process name */ - rc = prte_util_convert_process_name_to_string(&final, PRTE_PROC_MY_NAME); - if (PRTE_SUCCESS != rc) { - PRTE_ERROR_LOG(rc); - *uri = NULL; - return; - } - len = strlen(final); - - /* loop across all available modules to get their input - * up to the max length - */ - PMIX_LIST_FOREACH(cli, &prte_oob_base.actives, pmix_mca_base_component_list_item_t) - { - component = (prte_oob_base_component_t *) cli->cli_component; - /* ask the component for its input, obtained when it - * opened its modules - */ - if (NULL == component->get_addr) { - /* doesn't support this ability */ - continue; - } - /* the components operate within our event base, so we - * can directly call their get_uri function to get the - * pointer to the uri - this is not a copy, so - * do NOT free it! - */ - turi = component->get_addr(); - if (NULL != turi) { - /* check overall length for limits */ - if (0 < prte_oob_base.max_uri_length - && prte_oob_base.max_uri_length < (int) (len + strlen(turi))) { - /* cannot accept the payload */ - continue; - } - /* add new value to final one */ - pmix_asprintf(&tmp, "%s;%s", final, turi); - free(turi); - free(final); - final = tmp; - len = strlen(final); - /* flag that at least one contributed */ - one_added = true; - } - } - - if (!one_added) { - /* nobody could contribute */ - if (NULL != final) { - free(final); - final = NULL; - } - } - - *uri = final; -} - -static prte_oob_base_peer_t* process_uri(char *uri) -{ - pmix_proc_t peer; - char *cptr; - pmix_mca_base_component_list_item_t *cli; - prte_oob_base_component_t *component; - char **uris = NULL; - prte_oob_base_peer_t *pr; - - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s:set_addr processing uri %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), uri); - - /* find the first semi-colon in the string */ - cptr = strchr(uri, ';'); - if (NULL == cptr) { - /* got a problem - there must be at least two fields, - * the first containing the process name of our peer - * and all others containing the OOB contact info - */ - PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); - return NULL; - } - *cptr = '\0'; - cptr++; - /* the first field is the process name, so convert it */ - prte_util_convert_string_to_process_name(&peer, uri); - - /* if the peer is us, no need to go further as we already - * know our own contact info - */ - if (PMIX_CHECK_PROCID(&peer, PRTE_PROC_MY_NAME)) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s:set_addr peer %s is me", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - PRTE_NAME_PRINT(&peer)); - return NULL; - } - - /* split the rest of the uri into component parts */ - uris = PMIX_ARGV_SPLIT_COMPAT(cptr, ';'); - - /* get the peer object for this process */ - pr = prte_oob_base_get_peer(&peer); - if (NULL == pr) { - pr = PMIX_NEW(prte_oob_base_peer_t); - PMIX_XFER_PROCID(&pr->name, &peer); - pmix_list_append(&prte_oob_base.peers, &pr->super); - } - - /* loop across all available components and let them extract - * whatever piece(s) of the uri they find relevant - they - * are all operating on our event base, so we can just - * directly call their functions - */ - PMIX_LIST_FOREACH(cli, &prte_oob_base.actives, pmix_mca_base_component_list_item_t) - { - component = (prte_oob_base_component_t *) cli->cli_component; - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s:set_addr checking if peer %s is reachable via component %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer), - component->oob_base.pmix_mca_component_name); - if (NULL != component->set_addr) { - if (PRTE_SUCCESS == component->set_addr(&peer, uris)) { - /* this component found reachable addresses - * in the uris - */ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s: peer %s is reachable via component %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer), - component->oob_base.pmix_mca_component_name); - pmix_bitmap_set_bit(&pr->addressable, component->idx); - } else { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s: peer %s is NOT reachable via component %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer), - component->oob_base.pmix_mca_component_name); - } - } - } - PMIX_ARGV_FREE_COMPAT(uris); - return pr; -} - -prte_oob_base_peer_t *prte_oob_base_get_peer(const pmix_proc_t *pr) -{ - prte_oob_base_peer_t *peer; - - PMIX_LIST_FOREACH(peer, &prte_oob_base.peers, prte_oob_base_peer_t) - { - if (PMIX_CHECK_PROCID(pr, &peer->name)) { - return peer; - } - } - return NULL; -} diff --git a/src/mca/oob/base/owner.txt b/src/mca/oob/base/owner.txt deleted file mode 100644 index 2d23c9be65..0000000000 --- a/src/mca/oob/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: project -status: maintenance diff --git a/src/mca/oob/oob.h b/src/mca/oob/oob.h deleted file mode 100644 index 6c6e3294f3..0000000000 --- a/src/mca/oob/oob.h +++ /dev/null @@ -1,83 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * Contains the internal functions and typedefs for the use of the oob - */ - -#ifndef MCA_OOB_H_ -#define MCA_OOB_H_ - -#include "prte_config.h" -#include "types.h" - -#ifdef HAVE_UNISTD_H -# include -#endif -#ifdef HAVE_SYS_UIO_H -# include -#endif -#ifdef HAVE_NET_UIO_H -# include -#endif - -#include "src/class/pmix_list.h" -#include "src/class/pmix_pointer_array.h" -#include "src/mca/mca.h" -#include "src/pmix/pmix-internal.h" - -#include "src/rml/rml_types.h" - -BEGIN_C_DECLS - -typedef int (*mca_oob_base_component_avail_fn_t)(void); -typedef int (*mca_oob_base_component_startup_fn_t)(void); -typedef void (*mca_oob_base_component_shutdown_fn_t)(void); -typedef int (*mca_oob_base_component_send_fn_t)(prte_rml_send_t *msg); -typedef char *(*mca_oob_base_component_get_addr_fn_t)(void); -typedef int (*mca_oob_base_component_set_addr_fn_t)(pmix_proc_t *peer, char **uris); -typedef bool (*mca_oob_base_component_is_reachable_fn_t)(pmix_proc_t *peer); -typedef void (*mca_oob_ping_callback_fn_t)(int status, void *cbdata); - -typedef struct { - pmix_mca_base_component_t oob_base; - int idx; - int priority; - mca_oob_base_component_avail_fn_t available; - mca_oob_base_component_startup_fn_t startup; - mca_oob_base_component_shutdown_fn_t shutdown; - mca_oob_base_component_send_fn_t send_nb; - mca_oob_base_component_get_addr_fn_t get_addr; - mca_oob_base_component_set_addr_fn_t set_addr; - mca_oob_base_component_is_reachable_fn_t is_reachable; -} prte_oob_base_component_t; - -/** - * Macro for use in components that are of type oob - */ -#define PRTE_OOB_BASE_VERSION_2_0_0 PRTE_MCA_BASE_VERSION_3_0_0("oob", 2, 0, 0) - -END_C_DECLS - -#endif diff --git a/src/mca/oob/tcp/Makefile.am b/src/mca/oob/tcp/Makefile.am deleted file mode 100644 index 07e6c7bfbd..0000000000 --- a/src/mca/oob/tcp/Makefile.am +++ /dev/null @@ -1,63 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved -# Copyright (c) 2012-2013 Los Alamos National Security, LLC. -# All rights reserved -# Copyright (c) 2014-2020 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# Copyright (c) 2022 Nanook Consulting. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_prtedata_DATA = help-oob-tcp.txt - -sources = \ - oob_tcp_component.h \ - oob_tcp.h \ - oob_tcp_listener.h \ - oob_tcp_common.h \ - oob_tcp_component.c \ - oob_tcp_connection.h \ - oob_tcp_sendrecv.h \ - oob_tcp_hdr.h \ - oob_tcp_peer.h \ - oob_tcp.c \ - oob_tcp_listener.c \ - oob_tcp_common.c \ - oob_tcp_connection.c \ - oob_tcp_sendrecv.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_prte_oob_tcp_DSO -component_noinst = -component_install = prte_mca_oob_tcp.la -else -component_noinst = libprtemca_oob_tcp.la -component_install = -endif - -mcacomponentdir = $(prtelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -prte_mca_oob_tcp_la_SOURCES = $(sources) -prte_mca_oob_tcp_la_LDFLAGS = -module -avoid-version -prte_mca_oob_tcp_la_LIBADD = $(top_builddir)/src/libprrte.la - -noinst_LTLIBRARIES = $(component_noinst) -libprtemca_oob_tcp_la_SOURCES = $(sources) -libprtemca_oob_tcp_la_LDFLAGS = -module -avoid-version diff --git a/src/mca/oob/tcp/configure.m4 b/src/mca/oob/tcp/configure.m4 deleted file mode 100644 index 25ea055125..0000000000 --- a/src/mca/oob/tcp/configure.m4 +++ /dev/null @@ -1,39 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved -# Copyright (c) 2019 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_oob_tcp_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_prte_oob_tcp_CONFIG],[ - AC_CONFIG_FILES([src/mca/oob/tcp/Makefile]) - - # check for sockaddr_in (a good sign we have TCP) - AC_CHECK_TYPES([struct sockaddr_in], - [oob_tcp_happy="yes"], - [oob_tcp_happy="no"], - [AC_INCLUDES_DEFAULT -#ifdef HAVE_NETINET_IN_H -#include -#endif]) - - AS_IF([test "$oob_tcp_happy" = "yes"], [$1], [$2]) -])dnl diff --git a/src/mca/oob/tcp/oob_tcp.c b/src/mca/oob/tcp/oob_tcp.c deleted file mode 100644 index a78ce60079..0000000000 --- a/src/mca/oob/tcp/oob_tcp.c +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2016-2019 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include "prte_config.h" -#include "types.h" - -#ifdef HAVE_UNISTD_H -# include -#endif -#ifdef HAVE_SYS_TYPES_H -# include -#endif -#include -#ifdef HAVE_NETINET_IN_H -# include -#endif -#ifdef HAVE_ARPA_INET_H -# include -#endif -#ifdef HAVE_NETDB_H -# include -#endif -#include - -#include "src/include/prte_socket_errno.h" -#include "src/runtime/prte_progress_threads.h" -#include "src/util/pmix_argv.h" -#include "src/util/error.h" -#include "src/util/pmix_if.h" -#include "src/util/pmix_net.h" -#include "src/util/pmix_output.h" -#include "src/util/pmix_show_help.h" - -#include "src/mca/errmgr/errmgr.h" -#include "src/mca/ess/ess.h" -#include "src/runtime/prte_globals.h" -#include "src/threads/pmix_threads.h" -#include "src/util/name_fns.h" -#include "src/util/pmix_parse_options.h" -#include "src/util/pmix_show_help.h" - -#include "src/mca/oob/tcp/oob_tcp.h" -#include "src/mca/oob/tcp/oob_tcp_common.h" -#include "src/mca/oob/tcp/oob_tcp_component.h" -#include "src/mca/oob/tcp/oob_tcp_connection.h" -#include "src/mca/oob/tcp/oob_tcp_peer.h" -#include "src/mca/oob/tcp/oob_tcp_sendrecv.h" - -static void accept_connection(const int accepted_fd, const struct sockaddr *addr); -static void ping(const pmix_proc_t *proc); -static void send_nb(prte_rml_send_t *msg); - -prte_oob_tcp_module_t prte_oob_tcp_module = {.accept_connection = accept_connection, - .ping = ping, - .send_nb = send_nb}; - -/* - * Local utility functions - */ -static void recv_handler(int sd, short flags, void *user); - -/* Called by prte_oob_tcp_accept() and connection_handler() on - * a socket that has been accepted. This call finishes processing the - * socket, including setting socket options and registering for the - * OOB-level connection handshake. Used in both the threaded and - * event listen modes. - */ -static void accept_connection(const int accepted_fd, const struct sockaddr *addr) -{ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s accept_connection: %s:%d\n", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - pmix_net_get_hostname(addr), pmix_net_get_port(addr)); - - /* setup socket options */ - prte_oob_tcp_set_socket_options(accepted_fd); - - /* use a one-time event to wait for receipt of peer's - * process ident message to complete this connection - */ - PRTE_ACTIVATE_TCP_ACCEPT_STATE(accepted_fd, addr, recv_handler); -} - -/* API functions */ -static void ping(const pmix_proc_t *proc) -{ - prte_oob_tcp_peer_t *peer; - - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s:[%s:%d] processing ping to peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - __FILE__, __LINE__, PRTE_NAME_PRINT(proc)); - - /* do we know this peer? */ - if (NULL == (peer = prte_oob_tcp_peer_lookup(proc))) { - /* push this back to the component so it can try - * another module within this transport. If no - * module can be found, the component can push back - * to the framework so another component can try - */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s:[%s:%d] hop %s unknown", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - __FILE__, __LINE__, PRTE_NAME_PRINT(proc)); - PRTE_ACTIVATE_TCP_MSG_ERROR(NULL, NULL, proc, prte_mca_oob_tcp_component_hop_unknown); - return; - } - - /* if we are already connected, there is nothing to do */ - if (MCA_OOB_TCP_CONNECTED == peer->state) { - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s:[%s:%d] already connected to peer %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, - PRTE_NAME_PRINT(proc)); - return; - } - - /* if we are already connecting, there is nothing to do */ - if (MCA_OOB_TCP_CONNECTING == peer->state || MCA_OOB_TCP_CONNECT_ACK == peer->state) { - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s:[%s:%d] already connecting to peer %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, - PRTE_NAME_PRINT(proc)); - return; - } - - /* attempt the connection */ - peer->state = MCA_OOB_TCP_CONNECTING; - PRTE_ACTIVATE_TCP_CONN_STATE(peer, prte_oob_tcp_peer_try_connect); -} - -static void send_nb(prte_rml_send_t *msg) -{ - prte_oob_tcp_peer_t *peer; - pmix_proc_t hop; - - /* do we have a route to this peer (could be direct)? */ - PMIX_LOAD_NSPACE(hop.nspace, PRTE_PROC_MY_NAME->nspace); - hop.rank = prte_rml_get_route(msg->dst.rank); - /* do we know this hop? */ - if (NULL == (peer = prte_oob_tcp_peer_lookup(&hop))) { - /* if this message is going to the HNP, send it direct */ - if (PRTE_PROC_MY_HNP->rank == msg->dst.rank) { - hop.rank = PRTE_PROC_MY_HNP->rank; - peer = prte_oob_tcp_peer_lookup(&hop); - if (NULL != peer) { - goto send; - } - } - /* push this back to the component so it can try - * another module within this transport. If no - * module can be found, the component can push back - * to the framework so another component can try - */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s:[%s:%d] processing send to peer %s:%d seq_num = %d hop %s unknown", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, - PRTE_NAME_PRINT(&msg->dst), msg->tag, msg->seq_num, - PRTE_NAME_PRINT(&hop)); - PRTE_ACTIVATE_TCP_NO_ROUTE(msg, &hop, prte_mca_oob_tcp_component_no_route); - return; - } - -send: - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s:[%s:%d] processing send to peer %s:%d seq_num = %d via %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, - PRTE_NAME_PRINT(&msg->dst), msg->tag, msg->seq_num, - PRTE_NAME_PRINT(&peer->name)); - - /* add the msg to the hop's send queue */ - if (MCA_OOB_TCP_CONNECTED == peer->state) { - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s tcp:send_nb: already connected to %s - queueing for send", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); - MCA_OOB_TCP_QUEUE_SEND(msg, peer); - return; - } - - /* add the message to the queue for sending after the - * connection is formed - */ - MCA_OOB_TCP_QUEUE_PENDING(msg, peer); - - if (MCA_OOB_TCP_CONNECTING != peer->state && MCA_OOB_TCP_CONNECT_ACK != peer->state) { - /* we have to initiate the connection - again, we do not - * want to block while the connection is created. - * So throw us into an event that will create - * the connection via a mini-state-machine :-) - */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s tcp:send_nb: initiating connection to %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); - peer->state = MCA_OOB_TCP_CONNECTING; - PRTE_ACTIVATE_TCP_CONN_STATE(peer, prte_oob_tcp_peer_try_connect); - } -} - -/* - * Event callback when there is data available on the registered - * socket to recv. This is called for the listen sockets to accept an - * incoming connection, on new sockets trying to complete the software - * connection process, and for probes. Data on an established - * connection is handled elsewhere. - */ -static void recv_handler(int sd, short flg, void *cbdata) -{ - prte_oob_tcp_conn_op_t *op = (prte_oob_tcp_conn_op_t *) cbdata; - int flags; - prte_oob_tcp_hdr_t hdr; - prte_oob_tcp_peer_t *peer; - PRTE_HIDE_UNUSED_PARAMS(flg); - - PMIX_ACQUIRE_OBJECT(op); - - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s:tcp:recv:handler called", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); - - /* get the handshake */ - if (PRTE_SUCCESS != prte_oob_tcp_peer_recv_connect_ack(NULL, sd, &hdr)) { - goto cleanup; - } - - /* finish processing ident */ - if (MCA_OOB_TCP_IDENT == hdr.type) { - if (NULL == (peer = prte_oob_tcp_peer_lookup(&hdr.origin))) { - /* should never happen */ - prte_oob_tcp_peer_close(peer); - goto cleanup; - } - /* set socket up to be non-blocking */ - if ((flags = fcntl(sd, F_GETFL, 0)) < 0) { - pmix_output(0, "%s prte_oob_tcp_recv_connect: fcntl(F_GETFL) failed: %s (%d)", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), strerror(prte_socket_errno), - prte_socket_errno); - } else { - flags |= O_NONBLOCK; - if (fcntl(sd, F_SETFL, flags) < 0) { - pmix_output(0, "%s prte_oob_tcp_recv_connect: fcntl(F_SETFL) failed: %s (%d)", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), strerror(prte_socket_errno), - prte_socket_errno); - } - } - /* is the peer instance willing to accept this connection */ - peer->sd = sd; - if (prte_oob_tcp_peer_accept(peer) == false) { - if (OOB_TCP_DEBUG_CONNECT - <= pmix_output_get_verbosity(prte_oob_base_framework.framework_output)) { - pmix_output(0, - "%s-%s prte_oob_tcp_recv_connect: " - "rejected connection from %s connection state %d", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), - PRTE_NAME_PRINT(&(hdr.origin)), peer->state); - } - CLOSE_THE_SOCKET(sd); - } - } - -cleanup: - PMIX_RELEASE(op); -} diff --git a/src/mca/oob/tcp/oob_tcp_component.c b/src/mca/oob/tcp/oob_tcp_component.c deleted file mode 100644 index e915198f95..0000000000 --- a/src/mca/oob/tcp/oob_tcp_component.c +++ /dev/null @@ -1,1293 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2015-2019 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 IBM Corporation. All rights reserved. - * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights - * reserved. - * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * In windows, many of the socket functions return an EWOULDBLOCK - * instead of things like EAGAIN, EINPROGRESS, etc. It has been - * verified that this will not conflict with other error codes that - * are returned by these functions under UNIX/Linux environments - */ - -#include "prte_config.h" -#include "types.h" - -#ifdef HAVE_UNISTD_H -# include -#endif -#ifdef HAVE_SYS_TYPES_H -# include -#endif -#include -#ifdef HAVE_NET_IF_H -# include -#endif -#ifdef HAVE_NETINET_IN_H -# include -#endif -#ifdef HAVE_ARPA_INET_H -# include -#endif -#ifdef HAVE_NETDB_H -# include -#endif -#include -#include -#include - -#ifndef MIN -# define MIN(a, b) ((a) < (b) ? (a) : (b)) -#endif - -#include "src/class/pmix_list.h" -#include "src/event/event-internal.h" -#include "src/include/prte_socket_errno.h" -#include "src/runtime/prte_progress_threads.h" -#include "src/util/pmix_argv.h" -#include "src/util/pmix_if.h" -#include "src/util/error.h" -#include "src/util/pmix_net.h" -#include "src/util/pmix_output.h" -#include "src/util/pmix_show_help.h" - -#include "src/mca/errmgr/errmgr.h" -#include "src/mca/ess/ess.h" -#include "src/rml/rml.h" -#include "src/mca/state/state.h" -#include "src/runtime/prte_globals.h" -#include "src/runtime/prte_wait.h" -#include "src/threads/pmix_threads.h" -#include "src/util/attr.h" -#include "src/util/name_fns.h" -#include "src/util/pmix_parse_options.h" -#include "src/util/pmix_show_help.h" - -#include "oob_tcp_peer.h" -#include "src/mca/oob/tcp/oob_tcp.h" -#include "src/mca/oob/tcp/oob_tcp_common.h" -#include "src/mca/oob/tcp/oob_tcp_component.h" -#include "src/mca/oob/tcp/oob_tcp_connection.h" -#include "src/mca/oob/tcp/oob_tcp_listener.h" -#include "src/mca/oob/tcp/oob_tcp_peer.h" - -/* - * Local utility functions - */ - -static int tcp_component_register(void); -static int tcp_component_open(void); -static int tcp_component_close(void); - -static int component_available(void); -static int component_startup(void); -static void component_shutdown(void); -static int component_send(prte_rml_send_t *msg); -static char *component_get_addr(void); -static int component_set_addr(pmix_proc_t *peer, char **uris); -static bool component_is_reachable(pmix_proc_t *peer); - -/* - * Struct of function pointers and all that to let us be initialized - */ -prte_mca_oob_tcp_component_t prte_mca_oob_tcp_component = { - .super = { - .oob_base = { - PRTE_OOB_BASE_VERSION_2_0_0, - .pmix_mca_component_name = "tcp", - PMIX_MCA_BASE_MAKE_VERSION(component, - PRTE_MAJOR_VERSION, - PRTE_MINOR_VERSION, - PMIX_RELEASE_VERSION), - .pmix_mca_open_component = tcp_component_open, - .pmix_mca_close_component = tcp_component_close, - .pmix_mca_register_component_params = tcp_component_register, - }, - .priority = 30, // default priority of this transport - .available = component_available, - .startup = component_startup, - .shutdown = component_shutdown, - .send_nb = component_send, - .get_addr = component_get_addr, - .set_addr = component_set_addr, - .is_reachable = component_is_reachable, - } -}; - -/* - * Initialize global variables used w/in this module. - */ -static int tcp_component_open(void) -{ - PMIX_CONSTRUCT(&prte_mca_oob_tcp_component.peers, pmix_list_t); - PMIX_CONSTRUCT(&prte_mca_oob_tcp_component.listeners, pmix_list_t); - if (PRTE_PROC_IS_MASTER) { - PMIX_CONSTRUCT(&prte_mca_oob_tcp_component.listen_thread, pmix_thread_t); - prte_mca_oob_tcp_component.listen_thread_active = false; - prte_mca_oob_tcp_component.listen_thread_tv.tv_sec = 3600; - prte_mca_oob_tcp_component.listen_thread_tv.tv_usec = 0; - } - prte_mca_oob_tcp_component.addr_count = 0; - prte_mca_oob_tcp_component.ipv4conns = NULL; - prte_mca_oob_tcp_component.ipv4ports = NULL; - prte_mca_oob_tcp_component.ipv6conns = NULL; - prte_mca_oob_tcp_component.ipv6ports = NULL; - prte_mca_oob_tcp_component.if_masks = NULL; - - PMIX_CONSTRUCT(&prte_mca_oob_tcp_component.local_ifs, pmix_list_t); - return PRTE_SUCCESS; -} - -/* - * Cleanup of global variables used by this module. - */ -static int tcp_component_close(void) -{ - PMIX_LIST_DESTRUCT(&prte_mca_oob_tcp_component.local_ifs); - PMIX_LIST_DESTRUCT(&prte_mca_oob_tcp_component.peers); - - if (NULL != prte_mca_oob_tcp_component.ipv4conns) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.ipv4conns); - } - if (NULL != prte_mca_oob_tcp_component.ipv4ports) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.ipv4ports); - } - -#if PRTE_ENABLE_IPV6 - if (NULL != prte_mca_oob_tcp_component.ipv6conns) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.ipv6conns); - } - if (NULL != prte_mca_oob_tcp_component.ipv6ports) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.ipv6ports); - } -#endif - if (NULL != prte_mca_oob_tcp_component.if_masks) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.if_masks); - } - return PRTE_SUCCESS; -} -static char *static_port_string; -#if PRTE_ENABLE_IPV6 -static char *static_port_string6; -#endif // PRTE_ENABLE_IPV6 - -static char *dyn_port_string; -#if PRTE_ENABLE_IPV6 -static char *dyn_port_string6; -#endif - -static int tcp_component_register(void) -{ - pmix_mca_base_component_t *component = &prte_mca_oob_tcp_component.super.oob_base; - - /* register oob module parameters */ - prte_mca_oob_tcp_component.peer_limit = -1; - (void) pmix_mca_base_component_var_register(component, "peer_limit", - "Maximum number of peer connections to simultaneously maintain (-1 = infinite)", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.peer_limit); - - prte_mca_oob_tcp_component.max_retries = 2; - (void) pmix_mca_base_component_var_register(component, "peer_retries", - "Number of times to try shutting down a connection before giving up", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.max_retries); - - prte_mca_oob_tcp_component.tcp_sndbuf = 0; - (void) pmix_mca_base_component_var_register(component, "sndbuf", - "TCP socket send buffering size (in bytes, 0 => leave system default)", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.tcp_sndbuf); - - prte_mca_oob_tcp_component.tcp_rcvbuf = 0; - (void) pmix_mca_base_component_var_register(component, "rcvbuf", - "TCP socket receive buffering size (in bytes, 0 => leave system default)", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.tcp_rcvbuf); - - - static_port_string = NULL; - (void) pmix_mca_base_component_var_register(component, "static_ipv4_ports", - "Static ports for daemons and procs (IPv4)", - PMIX_MCA_BASE_VAR_TYPE_STRING, - &static_port_string); - - /* if ports were provided, parse the provided range */ - if (NULL != static_port_string) { - pmix_util_parse_range_options(static_port_string, &prte_mca_oob_tcp_component.tcp_static_ports); - if (0 == strcmp(prte_mca_oob_tcp_component.tcp_static_ports[0], "-1")) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.tcp_static_ports); - prte_mca_oob_tcp_component.tcp_static_ports = NULL; - } - } else { - prte_mca_oob_tcp_component.tcp_static_ports = NULL; - } - -#if PRTE_ENABLE_IPV6 - static_port_string6 = NULL; - (void) pmix_mca_base_component_var_register(component, "static_ipv6_ports", - "Static ports for daemons and procs (IPv6)", - PMIX_MCA_BASE_VAR_TYPE_STRING, - &static_port_string6); - - /* if ports were provided, parse the provided range */ - if (NULL != static_port_string6) { - pmix_util_parse_range_options(static_port_string6, - &prte_mca_oob_tcp_component.tcp6_static_ports); - if (0 == strcmp(prte_mca_oob_tcp_component.tcp6_static_ports[0], "-1")) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.tcp6_static_ports); - prte_mca_oob_tcp_component.tcp6_static_ports = NULL; - } - } else { - prte_mca_oob_tcp_component.tcp6_static_ports = NULL; - } -#endif // PRTE_ENABLE_IPV6 - - if (NULL != prte_mca_oob_tcp_component.tcp_static_ports - || NULL != prte_mca_oob_tcp_component.tcp6_static_ports) { - prte_static_ports = true; - } - - dyn_port_string = NULL; - (void) pmix_mca_base_component_var_register(component, "dynamic_ipv4_ports", - "Range of ports to be dynamically used by daemons and procs (IPv4)", - PMIX_MCA_BASE_VAR_TYPE_STRING, - &dyn_port_string); - /* if ports were provided, parse the provided range */ - if (NULL != dyn_port_string) { - /* can't have both static and dynamic ports! */ - if (prte_static_ports) { - char *err = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.tcp_static_ports, ','); - pmix_show_help("help-oob-tcp.txt", "static-and-dynamic", true, err, dyn_port_string); - free(err); - return PRTE_ERROR; - } - pmix_util_parse_range_options(dyn_port_string, &prte_mca_oob_tcp_component.tcp_dyn_ports); - if (0 == strcmp(prte_mca_oob_tcp_component.tcp_dyn_ports[0], "-1")) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.tcp_dyn_ports); - prte_mca_oob_tcp_component.tcp_dyn_ports = NULL; - } - } else { - prte_mca_oob_tcp_component.tcp_dyn_ports = NULL; - } - -#if PRTE_ENABLE_IPV6 - dyn_port_string6 = NULL; - (void) pmix_mca_base_component_var_register(component, "dynamic_ipv6_ports", - "Range of ports to be dynamically used by daemons and procs (IPv6)", - PMIX_MCA_BASE_VAR_TYPE_STRING, - &dyn_port_string6); - /* if ports were provided, parse the provided range */ - if (NULL != dyn_port_string6) { - /* can't have both static and dynamic ports! */ - if (prte_static_ports) { - char *err4 = NULL, *err6 = NULL; - if (NULL != prte_mca_oob_tcp_component.tcp_static_ports) { - err4 = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.tcp_static_ports, ','); - } - if (NULL != prte_mca_oob_tcp_component.tcp6_static_ports) { - err6 = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.tcp6_static_ports, ','); - } - pmix_show_help("help-oob-tcp.txt", "static-and-dynamic-ipv6", true, - (NULL == err4) ? "N/A" : err4, (NULL == err6) ? "N/A" : err6, - dyn_port_string6); - if (NULL != err4) { - free(err4); - } - if (NULL != err6) { - free(err6); - } - return PRTE_ERROR; - } - pmix_util_parse_range_options(dyn_port_string6, &prte_mca_oob_tcp_component.tcp6_dyn_ports); - if (0 == strcmp(prte_mca_oob_tcp_component.tcp6_dyn_ports[0], "-1")) { - PMIX_ARGV_FREE_COMPAT(prte_mca_oob_tcp_component.tcp6_dyn_ports); - prte_mca_oob_tcp_component.tcp6_dyn_ports = NULL; - } - } else { - prte_mca_oob_tcp_component.tcp6_dyn_ports = NULL; - } -#endif // PRTE_ENABLE_IPV6 - - prte_mca_oob_tcp_component.disable_ipv4_family = false; - (void) pmix_mca_base_component_var_register(component, "disable_ipv4_family", - "Disable the IPv4 interfaces", - PMIX_MCA_BASE_VAR_TYPE_BOOL, - &prte_mca_oob_tcp_component.disable_ipv4_family); - -#if PRTE_ENABLE_IPV6 - prte_mca_oob_tcp_component.disable_ipv6_family = false; - (void) pmix_mca_base_component_var_register(component, "disable_ipv6_family", - "Disable the IPv6 interfaces", - PMIX_MCA_BASE_VAR_TYPE_BOOL, - &prte_mca_oob_tcp_component.disable_ipv6_family); -#endif // PRTE_ENABLE_IPV6 - - // Wait for this amount of time before sending the first keepalive probe - prte_mca_oob_tcp_component.keepalive_time = 300; - (void) pmix_mca_base_component_var_register(component, "keepalive_time", - "Idle time in seconds before starting to send keepalives (keepalive_time <= 0 disables " - "keepalive functionality)", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.keepalive_time); - - // Resend keepalive probe every INT seconds - prte_mca_oob_tcp_component.keepalive_intvl = 20; - (void) pmix_mca_base_component_var_register(component, "keepalive_intvl", - "Time between successive keepalive pings when peer has not responded, in seconds (ignored " - "if keepalive_time <= 0)", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.keepalive_intvl); - - // After sending PR probes every INT seconds consider the connection dead - prte_mca_oob_tcp_component.keepalive_probes = 9; - (void) pmix_mca_base_component_var_register(component, "keepalive_probes", - "Number of keepalives that can be missed before " - "declaring error (ignored if keepalive_time <= 0)", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.keepalive_probes); - - prte_mca_oob_tcp_component.retry_delay = 0; - (void) pmix_mca_base_component_var_register(component, "retry_delay", - "Time (in sec) to wait before trying to connect to peer again", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.retry_delay); - - prte_mca_oob_tcp_component.max_recon_attempts = 10; - (void) pmix_mca_base_component_var_register(component, "max_recon_attempts", - "Max number of times to attempt connection before giving up (-1 -> never give up)", - PMIX_MCA_BASE_VAR_TYPE_INT, - &prte_mca_oob_tcp_component.max_recon_attempts); - - return PRTE_SUCCESS; -} - -static char **split_and_resolve(char **orig_str, char *name); - -static int component_available(void) -{ - pmix_pif_t *copied_interface, *selected_interface; - struct sockaddr_storage my_ss; - /* Larger than necessary, used for copying mask */ - char string[50], **interfaces = NULL; - int kindex; - int i, rc; - bool keeploopback = false; - bool including = false; - - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "oob:tcp: component_available called"); - - /* if interface include was given, construct a list - * of those interfaces which match the specifications - remember, - * the includes could be given as named interfaces, IP addrs, or - * subnet+mask - */ - if (NULL != prte_if_include) { - interfaces = split_and_resolve(&prte_if_include, - "include"); - including = true; - } else if (NULL != prte_if_exclude) { - interfaces = split_and_resolve(&prte_if_exclude, - "exclude"); - } - - /* if we are the master, then check the interfaces for loopbacks - * and keep loopbacks only if no non-loopback interface exists */ - if (PRTE_PROC_IS_MASTER) { - keeploopback = true; - PMIX_LIST_FOREACH(selected_interface, &pmix_if_list, pmix_pif_t) - { - if (!(selected_interface->if_flags & IFF_LOOPBACK)) { - keeploopback = false; - break; - } - } - } - - /* look at all available interfaces */ - PMIX_LIST_FOREACH(selected_interface, &pmix_if_list, pmix_pif_t) - { - if ((selected_interface->if_flags & IFF_LOOPBACK) && - !keeploopback) { - continue; - } - - - i = selected_interface->if_index; - kindex = selected_interface->if_kernel_index; - memcpy((struct sockaddr *) &my_ss, &selected_interface->if_addr, - MIN(sizeof(struct sockaddr_storage), sizeof(selected_interface->if_addr))); - - /* ignore non-ip4/6 interfaces */ - if (AF_INET != my_ss.ss_family -#if PRTE_ENABLE_IPV6 - && AF_INET6 != my_ss.ss_family -#endif - ) { - continue; - } - - /* ignore any virtual interfaces */ - if (0 == strncmp(selected_interface->if_name, "vir", 3)) { - continue; - } - - /* handle include/exclude directives */ - if (NULL != interfaces) { - /* check for match */ - rc = pmix_ifmatches(kindex, interfaces); - /* if one of the network specifications isn't parseable, then - * error out as we can't do what was requested - */ - if (PRTE_ERR_NETWORK_NOT_PARSEABLE == rc) { - pmix_show_help("help-oob-tcp.txt", "not-parseable", true); - PMIX_ARGV_FREE_COMPAT(interfaces); - return PRTE_ERR_BAD_PARAM; - } - /* if we are including, then ignore this if not present */ - if (including) { - if (PMIX_SUCCESS != rc) { - pmix_output_verbose(20, prte_oob_base_framework.framework_output, - "%s oob:tcp:init rejecting interface %s (not in include list)", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), selected_interface->if_name); - continue; - } - } else { - /* we are excluding, so ignore if present */ - if (PMIX_SUCCESS == rc) { - pmix_output_verbose(20, prte_oob_base_framework.framework_output, - "%s oob:tcp:init rejecting interface %s (in exclude list)", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), selected_interface->if_name); - continue; - } - } - } - - /* Refs ticket #3019 - * it would probably be worthwhile to print out a warning if PRRTE detects multiple - * IP interfaces that are "up" on the same subnet (because that's a Bad Idea). Note - * that we should only check for this after applying the relevant include/exclude - * list MCA params. If we detect redundant ports, we can also automatically ignore - * them so that applications won't hang. - */ - - /* add this address to our connections */ - if (AF_INET == my_ss.ss_family) { - pmix_output_verbose(10, prte_oob_base_framework.framework_output, - "%s oob:tcp:init adding %s to our list of %s connections", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - pmix_net_get_hostname((struct sockaddr *) &my_ss), - (AF_INET == my_ss.ss_family) ? "V4" : "V6"); - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_mca_oob_tcp_component.ipv4conns, - pmix_net_get_hostname((struct sockaddr *) &my_ss)); - } else if (AF_INET6 == my_ss.ss_family) { -#if PRTE_ENABLE_IPV6 - pmix_output_verbose(10, prte_oob_base_framework.framework_output, - "%s oob:tcp:init adding %s to our list of %s connections", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - pmix_net_get_hostname((struct sockaddr *) &my_ss), - (AF_INET == my_ss.ss_family) ? "V4" : "V6"); - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_mca_oob_tcp_component.ipv6conns, - pmix_net_get_hostname((struct sockaddr *) &my_ss)); -#endif // PRTE_ENABLE_IPV6 - } else { - pmix_output_verbose(10, prte_oob_base_framework.framework_output, - "%s oob:tcp:init ignoring %s from out list of connections", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - pmix_net_get_hostname((struct sockaddr *) &my_ss)); - continue; - } - copied_interface = PMIX_NEW(pmix_pif_t); - if (NULL == copied_interface) { - return PRTE_ERR_OUT_OF_RESOURCE; - } - pmix_string_copy(copied_interface->if_name, selected_interface->if_name, PMIX_IF_NAMESIZE); - copied_interface->if_index = i; - copied_interface->if_kernel_index = kindex; - copied_interface->af_family = my_ss.ss_family; - copied_interface->if_flags = selected_interface->if_flags; - copied_interface->if_speed = selected_interface->if_speed; - memcpy(&copied_interface->if_addr, &selected_interface->if_addr, - sizeof(struct sockaddr_storage)); - copied_interface->if_mask = selected_interface->if_mask; - /* If bandwidth is not found, set to arbitrary non zero value */ - copied_interface->if_bandwidth = selected_interface->if_bandwidth > 0 - ? selected_interface->if_bandwidth - : 1; - memcpy(&copied_interface->if_mac, &selected_interface->if_mac, - sizeof(copied_interface->if_mac)); - copied_interface->ifmtu = selected_interface->ifmtu; - /* Add the if_mask to the list */ - sprintf(string, "%d", selected_interface->if_mask); - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_mca_oob_tcp_component.if_masks, string); - pmix_list_append(&prte_mca_oob_tcp_component.local_ifs, &(copied_interface->super)); - } - - if (0 == PMIX_ARGV_COUNT_COMPAT(prte_mca_oob_tcp_component.ipv4conns) -#if PRTE_ENABLE_IPV6 - && 0 == PMIX_ARGV_COUNT_COMPAT(prte_mca_oob_tcp_component.ipv6conns) -#endif - ) { - return PRTE_ERR_NOT_AVAILABLE; - } - - return PRTE_SUCCESS; -} - -/* Start all modules */ -static int component_startup(void) -{ - int rc = PRTE_SUCCESS; - - pmix_output_verbose(2, prte_oob_base_framework.framework_output, "%s TCP STARTUP", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); - - /* if we are a daemon/HNP, - * then it is possible that someone else may initiate a - * connection to us. In these cases, we need to start the - * listening thread/event. Otherwise, we will be the one - * initiating communication, and there is no need for - * a listener */ - if (PRTE_PROC_IS_MASTER || PRTE_PROC_IS_DAEMON) { - if (PRTE_SUCCESS != (rc = prte_oob_tcp_start_listening())) { - PRTE_ERROR_LOG(rc); - } - } - - return rc; -} - -static void component_shutdown(void) -{ - int i = 0, rc; - - pmix_output_verbose(2, prte_oob_base_framework.framework_output, "%s TCP SHUTDOWN", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); - - if (PRTE_PROC_IS_MASTER && prte_mca_oob_tcp_component.listen_thread_active) { - prte_mca_oob_tcp_component.listen_thread_active = false; - /* tell the thread to exit */ - rc = write(prte_mca_oob_tcp_component.stop_thread[1], &i, sizeof(int)); - if (0 < rc) { - pmix_thread_join(&prte_mca_oob_tcp_component.listen_thread, NULL); - } - - close(prte_mca_oob_tcp_component.stop_thread[0]); - close(prte_mca_oob_tcp_component.stop_thread[1]); - - } else { - pmix_output_verbose(2, prte_oob_base_framework.framework_output, "no hnp or not active"); - } - - /* cleanup listen event list */ - PMIX_LIST_DESTRUCT(&prte_mca_oob_tcp_component.listeners); - - pmix_output_verbose(2, prte_oob_base_framework.framework_output, "%s TCP SHUTDOWN done", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); -} - -static int component_send(prte_rml_send_t *msg) -{ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, - "%s oob:tcp:send_nb to peer %s:%d seq = %d", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&msg->dst), msg->tag, - msg->seq_num); - - /* The module will first see if it knows - * of a way to send the data to the target, and then - * attempt to send the data. It will call the cbfunc - * with the status upon completion - if it can't do it for - * some reason, it will pass the error to our fn below so - * it can do something about it - */ - prte_oob_tcp_module.send_nb(msg); - return PRTE_SUCCESS; -} - -static char *component_get_addr(void) -{ - char *cptr = NULL, *tmp, *tp, *tm; - - if (!prte_mca_oob_tcp_component.disable_ipv4_family && - NULL != prte_mca_oob_tcp_component.ipv4conns) { - tmp = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.ipv4conns, ','); - tp = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.ipv4ports, ','); - tm = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.if_masks, ','); - pmix_asprintf(&cptr, "tcp://%s:%s:%s", tmp, tp, tm); - free(tmp); - free(tp); - free(tm); - } -#if PRTE_ENABLE_IPV6 - if (!prte_mca_oob_tcp_component.disable_ipv6_family && NULL != prte_mca_oob_tcp_component.ipv6conns) { - char *tmp2; - - /* Fixes #2498 - * RFC 3986, section 3.2.2 - * The notation in that case is to encode the IPv6 IP number in square brackets: - * "http://[2001:db8:1f70::999:de8:7648:6e8]:100/" - * A host identified by an Internet Protocol literal address, version 6 [RFC3513] - * or later, is distinguished by enclosing the IP literal within square brackets. - * This is the only place where square bracket characters are allowed in the URI - * syntax. In anticipation of future, as-yet-undefined IP literal address formats, - * an implementation may use an optional version flag to indicate such a format - * explicitly rather than rely on heuristic determination. - */ - tmp = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.ipv6conns, ','); - tp = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.ipv6ports, ','); - tm = PMIX_ARGV_JOIN_COMPAT(prte_mca_oob_tcp_component.if_masks, ','); - if (NULL == cptr) { - /* no ipv4 stuff */ - pmix_asprintf(&cptr, "tcp6://[%s]:%s:%s", tmp, tp, tm); - } else { - pmix_asprintf(&tmp2, "%s;tcp6://[%s]:%s:%s", cptr, tmp, tp, tm); - free(cptr); - cptr = tmp2; - } - free(tmp); - free(tp); - free(tm); - } -#endif // PRTE_ENABLE_IPV6 - - /* return our uri */ - return cptr; -} - -/* the host in this case is always in "dot" notation, and - * thus we do not need to do a DNS lookup to convert it */ -static int parse_uri(const uint16_t af_family, const char *host, const char *port, - struct sockaddr_storage *inaddr) -{ - struct sockaddr_in *in; - - if (AF_INET == af_family) { - memset(inaddr, 0, sizeof(struct sockaddr_in)); - in = (struct sockaddr_in *) inaddr; - in->sin_family = AF_INET; - in->sin_addr.s_addr = inet_addr(host); - if (in->sin_addr.s_addr == INADDR_NONE) { - return PRTE_ERR_BAD_PARAM; - } - ((struct sockaddr_in *) inaddr)->sin_port = htons(atoi(port)); - } -#if PRTE_ENABLE_IPV6 - else if (AF_INET6 == af_family) { - struct sockaddr_in6 *in6; - memset(inaddr, 0, sizeof(struct sockaddr_in6)); - in6 = (struct sockaddr_in6 *) inaddr; - - if (0 == inet_pton(AF_INET6, host, (void *) &in6->sin6_addr)) { - pmix_output(0, "oob_tcp_parse_uri: Could not convert %s\n", host); - return PRTE_ERR_BAD_PARAM; - } - in6->sin6_family = AF_INET6; - in6->sin6_port = htons(atoi(port)); - } -#endif - else { - return PRTE_ERR_NOT_SUPPORTED; - } - return PRTE_SUCCESS; -} - -static int component_set_addr(pmix_proc_t *peer, char **uris) -{ - char **addrs, **masks, *hptr; - char *tcpuri = NULL, *host, *ports, *masks_string; - int i, j, rc; - uint16_t af_family = AF_UNSPEC; - uint64_t ui64; - bool found; - prte_oob_tcp_peer_t *pr; - prte_oob_tcp_addr_t *maddr; - - memcpy(&ui64, (char *) peer, sizeof(uint64_t)); - /* cycle across component parts and see if one belongs to us */ - found = false; - - for (i = 0; NULL != uris[i]; i++) { - tcpuri = strdup(uris[i]); - if (NULL == tcpuri) { - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s oob:tcp: out of memory", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); - continue; - } - if (0 == strncmp(uris[i], "tcp:", 4)) { - af_family = AF_INET; - host = tcpuri + strlen("tcp://"); - } else if (0 == strncmp(uris[i], "tcp6:", 5)) { -#if PRTE_ENABLE_IPV6 - af_family = AF_INET6; - host = tcpuri + strlen("tcp6://"); -#else // PRTE_ENABLE_IPV6 - /* we don't support this connection type */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s oob:tcp: address %s not supported", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), uris[i]); - free(tcpuri); - continue; -#endif // PRTE_ENABLE_IPV6 - } else { - /* not one of ours */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s oob:tcp: ignoring address %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), uris[i]); - free(tcpuri); - continue; - } - - /* this one is ours - record the peer */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s oob:tcp: working peer %s address %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(peer), uris[i]); - - /* separate the mask from the network addrs */ - masks_string = strrchr(tcpuri, ':'); - if (NULL == masks_string) { - PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); - free(tcpuri); - continue; - } - *masks_string = '\0'; - masks_string++; - masks = PMIX_ARGV_SPLIT_COMPAT(masks_string, ','); - - /* separate the ports from the network addrs */ - ports = strrchr(tcpuri, ':'); - if (NULL == ports) { - PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); - free(tcpuri); - continue; - } - *ports = '\0'; - ports++; - - /* split the addrs */ - /* if this is a tcp6 connection, the first one will have a '[' - * at the beginning of it, and the last will have a ']' at the - * end - we need to remove those extra characters - */ - hptr = host; -#if PRTE_ENABLE_IPV6 - if (AF_INET6 == af_family) { - if ('[' == host[0]) { - hptr = &host[1]; - } - if (']' == host[strlen(host) - 1]) { - host[strlen(host) - 1] = '\0'; - } - } -#endif // PRTE_ENABLE_IPV6 - addrs = PMIX_ARGV_SPLIT_COMPAT(hptr, ','); - - /* cycle across the provided addrs */ - for (j = 0; NULL != addrs[j]; j++) { - if (NULL == masks[j]) { - /* Missing mask information */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, - "%s oob:tcp: uri missing mask information.", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); - return PRTE_ERR_TAKE_NEXT_OPTION; - } - /* if they gave us "localhost", then just take the first conn on our list */ - if (0 == strcasecmp(addrs[j], "localhost")) { -#if PRTE_ENABLE_IPV6 - if (AF_INET6 == af_family) { - if (NULL == prte_mca_oob_tcp_component.ipv6conns - || NULL == prte_mca_oob_tcp_component.ipv6conns[0]) { - continue; - } - host = prte_mca_oob_tcp_component.ipv6conns[0]; - } else { -#endif // PRTE_ENABLE_IPV6 - if (NULL == prte_mca_oob_tcp_component.ipv4conns - || NULL == prte_mca_oob_tcp_component.ipv4conns[0]) { - continue; - } - host = prte_mca_oob_tcp_component.ipv4conns[0]; -#if PRTE_ENABLE_IPV6 - } -#endif - } else { - host = addrs[j]; - } - - if (NULL == (pr = prte_oob_tcp_peer_lookup(peer))) { - pr = PMIX_NEW(prte_oob_tcp_peer_t); - PMIX_XFER_PROCID(&pr->name, peer); - pmix_output_verbose(20, prte_oob_base_framework.framework_output, - "%s SET_PEER ADDING PEER %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(peer)); - pmix_list_append(&prte_mca_oob_tcp_component.peers, &pr->super); - } - - maddr = PMIX_NEW(prte_oob_tcp_addr_t); - ((struct sockaddr_storage *) &(maddr->addr))->ss_family = af_family; - if (PRTE_SUCCESS - != (rc = parse_uri(af_family, host, ports, - (struct sockaddr_storage *) &(maddr->addr)))) { - PRTE_ERROR_LOG(rc); - PMIX_RELEASE(maddr); - pmix_list_remove_item(&prte_mca_oob_tcp_component.peers, &pr->super); - PMIX_RELEASE(pr); - return PRTE_ERR_TAKE_NEXT_OPTION; - } - maddr->if_mask = atoi(masks[j]); - - pmix_output_verbose(20, prte_oob_base_framework.framework_output, - "%s set_peer: peer %s is listening on net %s port %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(peer), - (NULL == host) ? "NULL" : host, (NULL == ports) ? "NULL" : ports); - pmix_list_append(&pr->addrs, &maddr->super); - - found = true; - } - PMIX_ARGV_FREE_COMPAT(addrs); - free(tcpuri); - } - if (found) { - /* indicate that this peer is addressable by this component */ - return PRTE_SUCCESS; - } - - /* otherwise indicate that it is not addressable by us */ - return PRTE_ERR_TAKE_NEXT_OPTION; -} - -static bool component_is_reachable(pmix_proc_t *peer) -{ - PRTE_HIDE_UNUSED_PARAMS(peer); - - /* assume we can reach the hop - the module will tell us if it can't - * when we try to send the first time, and then we'll correct it */ - return true; -} - -void prte_mca_oob_tcp_component_set_module(int fd, short args, void *cbdata) -{ - prte_oob_tcp_peer_op_t *pop = (prte_oob_tcp_peer_op_t *) cbdata; - prte_oob_base_peer_t *bpr; - PRTE_HIDE_UNUSED_PARAMS(fd, args); - - PMIX_ACQUIRE_OBJECT(pop); - - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s tcp:set_module called for peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - PRTE_NAME_PRINT(&pop->peer)); - - /* make sure the OOB knows that we can reach this peer - we - * are in the same event base as the OOB base, so we can - * directly access its storage - */ - bpr = prte_oob_base_get_peer(&pop->peer); - if (NULL == bpr) { - bpr = PMIX_NEW(prte_oob_base_peer_t); - PMIX_XFER_PROCID(&bpr->name, &pop->peer); - pmix_list_append(&prte_oob_base.peers, &bpr->super); - } - pmix_bitmap_set_bit(&bpr->addressable, prte_mca_oob_tcp_component.super.idx); - bpr->component = &prte_mca_oob_tcp_component.super; - - PMIX_RELEASE(pop); -} - -void prte_mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata) -{ - prte_oob_tcp_peer_op_t *pop = (prte_oob_tcp_peer_op_t *) cbdata; - prte_oob_base_peer_t *bpr; - PRTE_HIDE_UNUSED_PARAMS(fd, args); - - PMIX_ACQUIRE_OBJECT(pop); - - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s tcp:lost connection called for peer %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&pop->peer)); - - /* Mark that we no longer support this peer */ - bpr = prte_oob_base_get_peer(&pop->peer); - if (NULL != bpr) { - pmix_bitmap_clear_bit(&bpr->addressable, prte_mca_oob_tcp_component.super.idx); - pmix_list_remove_item(&prte_oob_base.peers, &bpr->super); - PMIX_RELEASE(bpr); - } - - if (!prte_finalizing) { - /* activate the proc state */ - if (PRTE_SUCCESS != prte_rml_route_lost(pop->peer.rank)) { - PRTE_ACTIVATE_PROC_STATE(&pop->peer, PRTE_PROC_STATE_LIFELINE_LOST); - } else { - PRTE_ACTIVATE_PROC_STATE(&pop->peer, PRTE_PROC_STATE_COMM_FAILED); - } - } - PMIX_RELEASE(pop); -} - -void prte_mca_oob_tcp_component_no_route(int fd, short args, void *cbdata) -{ - prte_oob_tcp_msg_error_t *mop = (prte_oob_tcp_msg_error_t *) cbdata; - prte_oob_base_peer_t *bpr; - PRTE_HIDE_UNUSED_PARAMS(fd, args); - - PMIX_ACQUIRE_OBJECT(mop); - - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s tcp:no route called for peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - PRTE_NAME_PRINT(&mop->hop)); - - /* mark that we cannot reach this hop */ - bpr = prte_oob_base_get_peer(&mop->hop); - if (NULL == bpr) { - bpr = PMIX_NEW(prte_oob_base_peer_t); - PMIX_XFER_PROCID(&bpr->name, &mop->hop); - } - pmix_bitmap_clear_bit(&bpr->addressable, prte_mca_oob_tcp_component.super.idx); - - /* report the error back to the OOB and let it try other components - * or declare a problem - */ - mop->rmsg->retries++; - /* activate the OOB send state */ - PRTE_OOB_SEND(mop->rmsg); - - PMIX_RELEASE(mop); -} - -void prte_mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata) -{ - prte_oob_tcp_msg_error_t *mop = (prte_oob_tcp_msg_error_t *) cbdata; - prte_rml_send_t *snd; - prte_oob_base_peer_t *bpr; - pmix_status_t rc; - pmix_byte_object_t bo; - PRTE_HIDE_UNUSED_PARAMS(fd, args); - - PMIX_ACQUIRE_OBJECT(mop); - - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s tcp:unknown hop called for peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - PRTE_NAME_PRINT(&mop->hop)); - - if (prte_finalizing || prte_abnormal_term_ordered) { - /* just ignore the problem */ - PMIX_RELEASE(mop); - return; - } - - /* mark that this component cannot reach this hop */ - bpr = prte_oob_base_get_peer(&mop->hop); - if (NULL == bpr) { - /* the overall OOB has no knowledge of this hop. Only - * way this could happen is if the peer contacted us - * via this component, and it wasn't entered into the - * OOB framework hash table. We have no way of knowing - * what to do next, so just output an error message and - * abort */ - pmix_output(0, - "%s ERROR: message to %s requires routing and the OOB has no knowledge of the " - "reqd hop %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&mop->snd->hdr.dst), - PRTE_NAME_PRINT(&mop->hop)); - PRTE_ACTIVATE_PROC_STATE(&mop->hop, PRTE_PROC_STATE_UNABLE_TO_SEND_MSG); - PMIX_RELEASE(mop); - return; - } - pmix_bitmap_clear_bit(&bpr->addressable, prte_mca_oob_tcp_component.super.idx); - - /* mark that this component cannot reach this destination either */ - bpr = prte_oob_base_get_peer(&mop->snd->hdr.dst); - if (NULL == bpr) { - pmix_output( - 0, - "%s ERROR: message to %s requires routing and the OOB has no knowledge of this process", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&mop->snd->hdr.dst)); - PRTE_ACTIVATE_PROC_STATE(&mop->hop, PRTE_PROC_STATE_UNABLE_TO_SEND_MSG); - PMIX_RELEASE(mop); - return; - } - pmix_bitmap_clear_bit(&bpr->addressable, prte_mca_oob_tcp_component.super.idx); - - /* post the message to the OOB so it can see - * if another component can transfer it - */ - MCA_OOB_TCP_HDR_NTOH(&mop->snd->hdr); - snd = PMIX_NEW(prte_rml_send_t); - snd->retries = mop->rmsg->retries + 1; - PMIX_XFER_PROCID(&snd->dst, &mop->snd->hdr.dst); - PMIX_XFER_PROCID(&snd->origin, &mop->snd->hdr.origin); - snd->tag = mop->snd->hdr.tag; - snd->seq_num = mop->snd->hdr.seq_num; - bo.bytes = mop->snd->data; - bo.size = mop->snd->hdr.nbytes; - PMIX_DATA_BUFFER_CREATE(snd->dbuf); - rc = PMIx_Data_load(snd->dbuf, &bo); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - } - snd->cbfunc = NULL; - snd->cbdata = NULL; - /* activate the OOB send state */ - PRTE_OOB_SEND(snd); - /* protect the data */ - mop->snd->data = NULL; - - PMIX_RELEASE(mop); -} - -void prte_mca_oob_tcp_component_failed_to_connect(int fd, short args, void *cbdata) -{ - prte_oob_tcp_peer_op_t *pop = (prte_oob_tcp_peer_op_t *) cbdata; - PRTE_HIDE_UNUSED_PARAMS(fd, args); - - PMIX_ACQUIRE_OBJECT(pop); - - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s tcp:failed_to_connect called for peer %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&pop->peer)); - - /* if we are terminating, then don't attempt to reconnect */ - if (prte_prteds_term_ordered || prte_finalizing || prte_abnormal_term_ordered) { - PMIX_RELEASE(pop); - return; - } - - /* activate the proc state */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s tcp:failed_to_connect unable to reach peer %s", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&pop->peer)); - - PRTE_ACTIVATE_PROC_STATE(&pop->peer, PRTE_PROC_STATE_FAILED_TO_CONNECT); - PMIX_RELEASE(pop); -} - - -/* - * Go through a list of argv; if there are any subnet specifications - * (a.b.c.d/e), resolve them to an interface name (Currently only - * supporting IPv4). If unresolvable, warn and remove. - */ -static char **split_and_resolve(char **orig_str, char *name) -{ - pmix_pif_t *selected_interface; - int i, n, ret, match_count, interface_count; - char **argv, **interfaces, *str, *tmp; - char if_name[IF_NAMESIZE]; - struct sockaddr_storage argv_inaddr, if_inaddr; - uint32_t argv_prefix; - - /* Sanity check */ - if (NULL == orig_str || NULL == *orig_str) { - return NULL; - } - - argv = PMIX_ARGV_SPLIT_COMPAT(*orig_str, ','); - if (NULL == argv) { - return NULL; - } - interface_count = 0; - interfaces = NULL; - for (i = 0; NULL != argv[i]; ++i) { - if (isalpha(argv[i][0])) { - /* This is an interface name. If not already in the interfaces array, add it */ - for (n = 0; n < interface_count; n++) { - if (0 == strcmp(argv[i], interfaces[n])) { - break; - } - } - if (n == interface_count) { - pmix_output_verbose(20, - prte_oob_base_framework.framework_output, - "oob:tcp: Using interface: %s ", argv[i]); - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&interfaces, argv[i]); - ++interface_count; - } - continue; - } - - /* Found a subnet notation. Convert it to an IP - address/netmask. Get the prefix first. */ - argv_prefix = 0; - tmp = strdup(argv[i]); - str = strchr(argv[i], '/'); - if (NULL == str) { - pmix_show_help("help-oob-tcp.txt", "invalid if_inexclude", - true, name, prte_process_info.nodename, - tmp, "Invalid specification (missing \"/\")"); - free(argv[i]); - free(tmp); - continue; - } - *str = '\0'; - argv_prefix = atoi(str + 1); - - /* Now convert the IPv4 address */ - ((struct sockaddr*) &argv_inaddr)->sa_family = AF_INET; - ret = inet_pton(AF_INET, argv[i], - &((struct sockaddr_in*) &argv_inaddr)->sin_addr); - free(argv[i]); - - if (1 != ret) { - pmix_show_help("help-oob-tcp.txt", "invalid if_inexclude", - true, name, prte_process_info.nodename, tmp, - "Invalid specification (inet_pton() failed)"); - free(tmp); - continue; - } - pmix_output_verbose(20, prte_oob_base_framework.framework_output, - "%s oob:tcp: Searching for %s address+prefix: %s / %u", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), - name, - pmix_net_get_hostname((struct sockaddr*) &argv_inaddr), - argv_prefix); - - /* Go through all interfaces and see if we can find a match */ - match_count = 0; - PMIX_LIST_FOREACH(selected_interface, &pmix_if_list, pmix_pif_t) { - pmix_ifindextoaddr(selected_interface->if_kernel_index, - (struct sockaddr*) &if_inaddr, - sizeof(if_inaddr)); - if (pmix_net_samenetwork((struct sockaddr_storage*) &argv_inaddr, - (struct sockaddr_storage*) &if_inaddr, - argv_prefix)) { - /* We found a match. If it's not already in the interfaces array, - add it. If it's already in the array, treat it as a match */ - match_count = match_count + 1; - pmix_ifindextoname(selected_interface->if_kernel_index, if_name, sizeof(if_name)); - for (n = 0; n < interface_count; n++) { - if (0 == strcmp(if_name, interfaces[n])) { - break; - } - } - if (n == interface_count) { - pmix_output_verbose(20, - prte_oob_base_framework.framework_output, - "oob:tcp: Found match: %s (%s)", - pmix_net_get_hostname((struct sockaddr*) &if_inaddr), - if_name); - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&interfaces, if_name); - ++interface_count; - } - } - } - /* If we didn't find a match, keep trying */ - if (0 == match_count) { - pmix_show_help("help-oob-tcp.txt", "invalid if_inexclude", - true, name, prte_process_info.nodename, tmp, - "Did not find interface matching this subnet"); - free(tmp); - continue; - } - - free(tmp); - } - - /* Mark the end of the interface name array with NULL */ - if (NULL != interfaces) { - interfaces[interface_count] = NULL; - } - free(argv); - free(*orig_str); - *orig_str = PMIX_ARGV_JOIN_COMPAT(interfaces, ','); - return interfaces; -} - -/* OOB TCP Class instances */ - -static void peer_cons(prte_oob_tcp_peer_t *peer) -{ - peer->auth_method = NULL; - peer->sd = -1; - PMIX_CONSTRUCT(&peer->addrs, pmix_list_t); - peer->active_addr = NULL; - peer->state = MCA_OOB_TCP_UNCONNECTED; - peer->num_retries = 0; - PMIX_CONSTRUCT(&peer->send_queue, pmix_list_t); - peer->send_msg = NULL; - peer->recv_msg = NULL; - peer->send_ev_active = false; - peer->recv_ev_active = false; - peer->timer_ev_active = false; -} -static void peer_des(prte_oob_tcp_peer_t *peer) -{ - if (NULL != peer->auth_method) { - free(peer->auth_method); - } - if (peer->send_ev_active) { - prte_event_del(&peer->send_event); - } - if (peer->recv_ev_active) { - prte_event_del(&peer->recv_event); - } - if (peer->timer_ev_active) { - prte_event_del(&peer->timer_event); - } - if (0 <= peer->sd) { - pmix_output_verbose(2, prte_oob_base_framework.framework_output, "%s CLOSING SOCKET %d", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), peer->sd); - CLOSE_THE_SOCKET(peer->sd); - } - PMIX_LIST_DESTRUCT(&peer->addrs); - PMIX_LIST_DESTRUCT(&peer->send_queue); -} -PMIX_CLASS_INSTANCE(prte_oob_tcp_peer_t, pmix_list_item_t, peer_cons, peer_des); - -static void padd_cons(prte_oob_tcp_addr_t *ptr) -{ - memset(&ptr->addr, 0, sizeof(ptr->addr)); - ptr->retries = 0; - ptr->state = MCA_OOB_TCP_UNCONNECTED; -} -PMIX_CLASS_INSTANCE(prte_oob_tcp_addr_t, pmix_list_item_t, padd_cons, NULL); - -static void pop_cons(prte_oob_tcp_peer_op_t *pop) -{ - pop->net = NULL; - pop->port = NULL; -} -static void pop_des(prte_oob_tcp_peer_op_t *pop) -{ - if (NULL != pop->net) { - free(pop->net); - } - if (NULL != pop->port) { - free(pop->port); - } -} -PMIX_CLASS_INSTANCE(prte_oob_tcp_peer_op_t, pmix_object_t, pop_cons, pop_des); - -PMIX_CLASS_INSTANCE(prte_oob_tcp_msg_op_t, pmix_object_t, NULL, NULL); - -PMIX_CLASS_INSTANCE(prte_oob_tcp_conn_op_t, pmix_object_t, NULL, NULL); - -static void nicaddr_cons(prte_oob_tcp_nicaddr_t *ptr) -{ - ptr->af_family = PF_UNSPEC; - memset(&ptr->addr, 0, sizeof(ptr->addr)); -} -PMIX_CLASS_INSTANCE(prte_oob_tcp_nicaddr_t, pmix_list_item_t, nicaddr_cons, NULL); diff --git a/src/mca/oob/tcp/oob_tcp_component.h b/src/mca/oob/tcp/oob_tcp_component.h deleted file mode 100644 index 9131e100fb..0000000000 --- a/src/mca/oob/tcp/oob_tcp_component.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2019 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights - * reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * Copyright (c) 2023 Triad National Security, LLC. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef _MCA_OOB_TCP_COMPONENT_H_ -#define _MCA_OOB_TCP_COMPONENT_H_ - -#include "prte_config.h" - -#ifdef HAVE_SYS_TIME_H -# include -#endif - -#include "src/include/prte_stdatomic.h" -#include "src/class/pmix_bitmap.h" -#include "src/class/pmix_list.h" -#include "src/class/pmix_pointer_array.h" -#include "src/event/event-internal.h" - -#include "oob_tcp.h" -#include "src/mca/oob/oob.h" - -/** - * OOB TCP Component - */ -typedef struct { - prte_oob_base_component_t super; /**< base OOB component */ - uint32_t addr_count; /**< total number of addresses */ - int num_links; /**< number of logical links per physical device */ - int max_retries; /**< max number of retries before declaring peer gone */ - pmix_list_t events; /**< events for monitoring connections */ - int peer_limit; /**< max size of tcp peer cache */ - pmix_list_t peers; // connection addresses for peers - - /* Port specifications */ - int tcp_sndbuf; /**< socket send buffer size */ - int tcp_rcvbuf; /**< socket recv buffer size */ - - /* IPv4 support */ - bool disable_ipv4_family; /**< disable this AF */ - char **tcp_static_ports; /**< Static ports - IPV4 */ - char **tcp_dyn_ports; /**< Dynamic ports - IPV4 */ - char **ipv4conns; - char **ipv4ports; - - /* IPv6 support */ - bool disable_ipv6_family; /**< disable this AF */ - char **tcp6_static_ports; /**< Static ports - IPV6 */ - char **tcp6_dyn_ports; /**< Dynamic ports - IPV6 */ - char **ipv6conns; - char **ipv6ports; - - /* connection support */ - pmix_list_t local_ifs; /**< prte list of local pmix_pif_t interfaces */ - char **if_masks; - char *my_uri; /**< uri for connecting to the TCP module */ - int num_hnp_ports; /**< number of ports the HNP should listen on */ - pmix_list_t listeners; /**< List of sockets being monitored by event or thread */ - pmix_thread_t listen_thread; /**< handle to the listening thread */ - prte_atomic_bool_t listen_thread_active; - struct timeval listen_thread_tv; /**< Timeout when using listen thread */ - int stop_thread[2]; /**< pipe used to exit the listen thread */ - int keepalive_probes; /**< number of keepalives that can be missed before declaring error */ - int keepalive_time; /**< idle time in seconds before starting to send keepalives */ - int keepalive_intvl; /**< time between keepalives, in seconds */ - int retry_delay; /**< time to wait before retrying connection */ - int max_recon_attempts; /**< maximum number of times to attempt connect before giving up (-1 for - never) */ -} prte_mca_oob_tcp_component_t; - -PRTE_MODULE_EXPORT extern prte_mca_oob_tcp_component_t prte_mca_oob_tcp_component; - -PRTE_MODULE_EXPORT void prte_mca_oob_tcp_component_set_module(int fd, short args, void *cbdata); -PRTE_MODULE_EXPORT void prte_mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata); -PRTE_MODULE_EXPORT void prte_mca_oob_tcp_component_failed_to_connect(int fd, short args, void *cbdata); -PRTE_MODULE_EXPORT void prte_mca_oob_tcp_component_no_route(int fd, short args, void *cbdata); -PRTE_MODULE_EXPORT void prte_mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata); - -#endif /* _MCA_OOB_TCP_COMPONENT_H_ */ diff --git a/src/mca/plm/ssh/plm_ssh_module.c b/src/mca/plm/ssh/plm_ssh_module.c index 3608ae64a7..b14e38a21d 100644 --- a/src/mca/plm/ssh/plm_ssh_module.c +++ b/src/mca/plm/ssh/plm_ssh_module.c @@ -17,7 +17,7 @@ * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -88,7 +88,6 @@ #include "src/mca/ess/base/base.h" #include "src/mca/ess/ess.h" #include "src/mca/grpcomm/base/base.h" -#include "src/mca/oob/base/base.h" #include "src/mca/rmaps/rmaps.h" #include "src/rml/rml_contact.h" #include "src/rml/rml.h" @@ -652,11 +651,9 @@ static int setup_launch(int *argcptr, char ***argvptr, char *nodename, int *node * uri of their parent (me) */ if (!prte_mca_plm_ssh_component.no_tree_spawn) { pmix_argv_append(&argc, &argv, "--tree-spawn"); - prte_oob_base_get_addr(¶m); pmix_argv_append(&argc, &argv, "--prtemca"); pmix_argv_append(&argc, &argv, "prte_parent_uri"); - pmix_argv_append(&argc, &argv, param); - free(param); + pmix_argv_append(&argc, &argv, prte_process_info.my_uri); } /* protect the params */ diff --git a/src/prted/prted_comm.c b/src/prted/prted_comm.c index c78fa51d88..054740afc8 100644 --- a/src/prted/prted_comm.c +++ b/src/prted/prted_comm.c @@ -65,7 +65,6 @@ #include "src/mca/grpcomm/base/base.h" #include "src/mca/iof/base/base.h" #include "src/mca/odls/base/base.h" -#include "src/mca/oob/base/base.h" #include "src/mca/plm/base/base.h" #include "src/mca/plm/plm.h" #include "src/mca/rmaps/rmaps_types.h" diff --git a/src/rml/Makefile.am b/src/rml/Makefile.am index 5cc9f3ca0e..c864b2c2e4 100644 --- a/src/rml/Makefile.am +++ b/src/rml/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2019 Intel, Inc. All rights reserved. -# Copyright (c) 2022 Nanook Consulting. All rights reserved. +# Copyright (c) 2022-2024 Nanook Consulting All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -32,3 +32,5 @@ libprrte_la_SOURCES += \ rml/rml_base_contact.c \ rml/rml_base_msg_handlers.c \ rml/routed_radix.c + +include rml/oob/Makefile.am diff --git a/src/rml/oob/Makefile.am b/src/rml/oob/Makefile.am new file mode 100644 index 0000000000..25704f9b3a --- /dev/null +++ b/src/rml/oob/Makefile.am @@ -0,0 +1,46 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2012-2013 Los Alamos National Security, LLC. +# All rights reserved +# Copyright (c) 2014-2020 Intel, Inc. All rights reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2022-2024 Nanook Consulting All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +dist_prtedata_DATA += \ + rml/oob/help-oob-base.txt \ + rml/oob/help-oob-tcp.txt + +headers += \ + rml/oob/oob.h \ + rml/oob/oob_tcp.h \ + rml/oob/oob_tcp_listener.h \ + rml/oob/oob_tcp_common.h \ + rml/oob/oob_tcp_connection.h \ + rml/oob/oob_tcp_sendrecv.h \ + rml/oob/oob_tcp_hdr.h \ + rml/oob/oob_tcp_peer.h + +libprrte_la_SOURCES += \ + rml/oob/oob_tcp_component.c \ + rml/oob/oob_tcp.c \ + rml/oob/oob_tcp_listener.c \ + rml/oob/oob_tcp_common.c \ + rml/oob/oob_tcp_connection.c \ + rml/oob/oob_tcp_sendrecv.c \ + rml/oob/oob_base_stubs.c diff --git a/src/mca/oob/base/help-oob-base.txt b/src/rml/oob/help-oob-base.txt similarity index 93% rename from src/mca/oob/base/help-oob-base.txt rename to src/rml/oob/help-oob-base.txt index 41ae1761d7..009fcf0e0e 100644 --- a/src/mca/oob/base/help-oob-base.txt +++ b/src/rml/oob/help-oob-base.txt @@ -11,6 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2014-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2024 Nanook Consulting All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/src/mca/oob/tcp/help-oob-tcp.txt b/src/rml/oob/help-oob-tcp.txt similarity index 98% rename from src/mca/oob/tcp/help-oob-tcp.txt rename to src/rml/oob/help-oob-tcp.txt index edbce3ef98..950599d810 100644 --- a/src/mca/oob/tcp/help-oob-tcp.txt +++ b/src/rml/oob/help-oob-tcp.txt @@ -12,6 +12,7 @@ # All rights reserved. # Copyright (c) 2014-2020 Intel, Inc. All rights reserved. # Copyright (c) 2015-2020 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2024 Nanook Consulting All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/src/mca/oob/base/base.h b/src/rml/oob/oob.h similarity index 65% rename from src/mca/oob/base/base.h rename to src/rml/oob/oob.h index c3f1f04142..b901997b4b 100644 --- a/src/mca/oob/base/base.h +++ b/src/rml/oob/oob.h @@ -15,7 +15,7 @@ * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -46,12 +46,11 @@ #include "src/class/pmix_hash_table.h" #include "src/class/pmix_list.h" #include "src/event/event-internal.h" +#include "src/include/prte_stdatomic.h" #include "src/util/pmix_printf.h" - -#include "src/mca/mca.h" #include "src/threads/pmix_threads.h" -#include "src/mca/oob/oob.h" +#include "src/rml/rml_types.h" BEGIN_C_DECLS @@ -59,26 +58,55 @@ BEGIN_C_DECLS * Convenience Typedef */ typedef struct { - char *include; - char *exclude; - pmix_list_t components; - pmix_list_t actives; + int output; + uint32_t addr_count; /**< total number of addresses */ + int num_links; /**< number of logical links per physical device */ + int max_retries; /**< max number of retries before declaring peer gone */ int max_uri_length; - pmix_list_t peers; + pmix_list_t events; /**< events for monitoring connections */ + int peer_limit; /**< max size of tcp peer cache */ + pmix_list_t peers; // connection addresses for peers + + /* Port specifications */ + int tcp_sndbuf; /**< socket send buffer size */ + int tcp_rcvbuf; /**< socket recv buffer size */ + + /* IPv4 support */ + bool disable_ipv4_family; /**< disable this AF */ + char **tcp_static_ports; /**< Static ports - IPV4 */ + char **tcp_dyn_ports; /**< Dynamic ports - IPV4 */ + char **ipv4conns; + char **ipv4ports; + + /* IPv6 support */ + bool disable_ipv6_family; /**< disable this AF */ + char **tcp6_static_ports; /**< Static ports - IPV6 */ + char **tcp6_dyn_ports; /**< Dynamic ports - IPV6 */ + char **ipv6conns; + char **ipv6ports; + + /* connection support */ + pmix_list_t local_ifs; /**< prte list of local pmix_pif_t interfaces */ + char **if_masks; + int num_hnp_ports; /**< number of ports the HNP should listen on */ + pmix_list_t listeners; /**< List of sockets being monitored by event or thread */ + pmix_thread_t listen_thread; /**< handle to the listening thread */ + prte_atomic_bool_t listen_thread_active; + struct timeval listen_thread_tv; /**< Timeout when using listen thread */ + int stop_thread[2]; /**< pipe used to exit the listen thread */ + int keepalive_probes; /**< number of keepalives that can be missed before declaring error */ + int keepalive_time; /**< idle time in seconds before starting to send keepalives */ + int keepalive_intvl; /**< time between keepalives, in seconds */ + int retry_delay; /**< time to wait before retrying connection */ + int max_recon_attempts; /**< maximum number of times to attempt connect before giving up (-1 for + never) */ } prte_oob_base_t; PRTE_EXPORT extern prte_oob_base_t prte_oob_base; -typedef struct { - pmix_list_item_t super; - pmix_proc_t name; - prte_oob_base_component_t *component; - pmix_bitmap_t addressable; -} prte_oob_base_peer_t; -PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_oob_base_peer_t); - /* MCA framework */ -PRTE_EXPORT extern pmix_mca_base_framework_t prte_oob_base_framework; -PRTE_EXPORT int prte_oob_base_select(void); +PRTE_EXPORT int prte_oob_open(void); +PRTE_EXPORT void prte_oob_close(void); +PRTE_EXPORT int prte_oob_register(void); /* Access the OOB internal functions via set of event-based macros * for inserting messages and other commands into the @@ -112,15 +140,13 @@ PRTE_EXPORT void prte_oob_base_send_nb(int fd, short args, void *cbdata); #define PRTE_OOB_SEND(m) \ do { \ prte_oob_send_t *prte_oob_send_cd; \ - pmix_output_verbose(1, prte_oob_base_framework.framework_output, "%s OOB_SEND: %s:%d", \ + pmix_output_verbose(1, prte_oob_base.output, "%s OOB_SEND: %s:%d", \ PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__); \ prte_oob_send_cd = PMIX_NEW(prte_oob_send_t); \ prte_oob_send_cd->msg = (m); \ PRTE_PMIX_THREADSHIFT(prte_oob_send_cd, prte_event_base, prte_oob_base_send_nb); \ } while (0) -PRTE_EXPORT prte_oob_base_peer_t *prte_oob_base_get_peer(const pmix_proc_t *pr); - /* During initial wireup, we can only transfer contact info on the daemon * command line. This limits what we can send to a string representation of * the actual contact info, which gets sent in a uri-like form. Not every diff --git a/src/rml/oob/oob_base_stubs.c b/src/rml/oob/oob_base_stubs.c new file mode 100644 index 0000000000..31b2edf409 --- /dev/null +++ b/src/rml/oob/oob_base_stubs.c @@ -0,0 +1,494 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. + * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "prte_config.h" +#include "constants.h" + +#include "src/pmix/pmix-internal.h" +#include "src/runtime/prte_globals.h" +#include "src/util/pmix_argv.h" +#include "src/util/pmix_output.h" +#include "src/util/pmix_printf.h" +#include "src/mca/errmgr/errmgr.h" +#include "src/rml/rml.h" +#include "src/mca/state/state.h" +#include "src/threads/pmix_threads.h" + +#include "src/rml/oob/oob.h" +#include "src/rml/oob/oob_tcp_common.h" +#include "src/rml/oob/oob_tcp_connection.h" +#include "src/rml/oob/oob_tcp_peer.h" + +static prte_oob_tcp_peer_t* process_uri(char *uri); + +void prte_oob_base_send_nb(int fd, short args, void *cbdata) +{ + prte_oob_send_t *cd = (prte_oob_send_t *) cbdata; + prte_rml_send_t *msg; + prte_oob_tcp_peer_t *peer; + pmix_proc_t hop; + int rc; + char *uri = NULL; + PRTE_HIDE_UNUSED_PARAMS(fd, args); + + PMIX_ACQUIRE_OBJECT(cd); + + /* done with this. release it now */ + msg = cd->msg; + PMIX_RELEASE(cd); + + pmix_output_verbose(5, prte_oob_base.output, + "%s oob:base:send to target %s - attempt %u", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&msg->dst), + msg->retries); + + /* don't try forever - if we have exceeded the number of retries, + * then report this message as undeliverable even if someone continues + * to think they could reach it */ + if (prte_rml_base.max_retries <= msg->retries) { + msg->status = PRTE_ERR_NO_PATH_TO_TARGET; + PRTE_RML_SEND_COMPLETE(msg); + return; + } + + /* do we have a route to this peer (could be direct)? */ + PMIX_LOAD_NSPACE(hop.nspace, PRTE_PROC_MY_NAME->nspace); + hop.rank = prte_rml_get_route(msg->dst.rank); + /* do we know this hop? */ + if (NULL == (peer = prte_oob_tcp_peer_lookup(&hop))) { + /* if this message is going to the HNP, send it direct */ + if (PRTE_PROC_MY_HNP->rank == msg->dst.rank) { + hop.rank = PRTE_PROC_MY_HNP->rank; + peer = prte_oob_tcp_peer_lookup(&hop); + if (NULL != peer) { + goto send; + } + } + // see if we know the contact info for it + PRTE_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_PROC_URI, &hop, (char **) &uri, PMIX_STRING); + if (PRTE_SUCCESS == rc && NULL != uri) { + peer = process_uri(uri); + if (NULL == peer) { + /* that is just plain wrong */ + pmix_output_verbose(5, prte_oob_base.output, + "%s oob:base:send addressee unknown %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + PRTE_NAME_PRINT(&msg->dst)); + + if (prte_prteds_term_ordered || prte_finalizing || prte_abnormal_term_ordered) { + /* just ignore the problem */ + PMIX_RELEASE(msg); + return; + } + PRTE_ACTIVATE_PROC_STATE(&hop, PRTE_PROC_STATE_UNABLE_TO_SEND_MSG); + PMIX_RELEASE(msg); + return; + } + } else { + // unable to send it + if (prte_prteds_term_ordered || prte_finalizing || prte_abnormal_term_ordered) { + /* just ignore the problem */ + PMIX_RELEASE(msg); + return; + } + PRTE_ACTIVATE_PROC_STATE(&hop, PRTE_PROC_STATE_UNABLE_TO_SEND_MSG); + PMIX_RELEASE(msg); + return; + } + } + +send: + pmix_output_verbose(2, prte_oob_base.output, + "%s:[%s:%d] processing send to peer %s:%d seq_num = %d via %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, + PRTE_NAME_PRINT(&msg->dst), msg->tag, msg->seq_num, + PRTE_NAME_PRINT(&peer->name)); + + /* add the msg to the hop's send queue */ + if (MCA_OOB_TCP_CONNECTED == peer->state) { + pmix_output_verbose(2, prte_oob_base.output, + "%s tcp:send_nb: already connected to %s - queueing for send", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); + MCA_OOB_TCP_QUEUE_SEND(msg, peer); + return; + } + + /* add the message to the queue for sending after the + * connection is formed + */ + MCA_OOB_TCP_QUEUE_PENDING(msg, peer); + + if (MCA_OOB_TCP_CONNECTING != peer->state && MCA_OOB_TCP_CONNECT_ACK != peer->state) { + /* we have to initiate the connection - again, we do not + * want to block while the connection is created. + * So throw us into an event that will create + * the connection via a mini-state-machine :-) + */ + pmix_output_verbose(2, prte_oob_base.output, + "%s tcp:send_nb: initiating connection to %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); + peer->state = MCA_OOB_TCP_CONNECTING; + PRTE_ACTIVATE_TCP_CONN_STATE(peer, prte_oob_tcp_peer_try_connect); + } +} + +/** + * Obtain a uri for initial connection purposes + * + * During initial wireup, we can only transfer contact info on the daemon + * command line. This limits what we can send to a string representation of + * the actual contact info, which gets sent in a uri-like form. Not every + * oob module can support this transaction, so this function will loop + * across all oob components/modules, letting each add to the uri string if + * it supports bootstrap operations. An error will be returned in the cbfunc + * if NO component can successfully provide a contact. + * + * Note: since there is a limit to what an OS will allow on a cmd line, we + * impose a limit on the length of the resulting uri via an MCA param. The + * default value of -1 implies unlimited - however, users with large numbers + * of interfaces on their nodes may wish to restrict the size. + */ +void prte_oob_base_get_addr(char **uri) +{ + char *final = NULL, *tmp; + char *cptr = NULL, *tp, *tm; + size_t len = 0; + pmix_status_t rc; + + /* start with our process name */ + rc = prte_util_convert_process_name_to_string(&final, PRTE_PROC_MY_NAME); + if (PRTE_SUCCESS != rc) { + PRTE_ERROR_LOG(rc); + *uri = NULL; + return; + } + len = strlen(final); + + if (!prte_oob_base.disable_ipv4_family && + NULL != prte_oob_base.ipv4conns) { + tmp = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.ipv4conns, ','); + tp = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.ipv4ports, ','); + tm = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.if_masks, ','); + pmix_asprintf(&cptr, "tcp://%s:%s:%s", tmp, tp, tm); + free(tmp); + free(tp); + free(tm); + } +#if PRTE_ENABLE_IPV6 + if (!prte_oob_base.disable_ipv6_family && + NULL != prte_oob_base.ipv6conns) { + char *tmp2; + + /* Fixes #2498 + * RFC 3986, section 3.2.2 + * The notation in that case is to encode the IPv6 IP number in square brackets: + * "http://[2001:db8:1f70::999:de8:7648:6e8]:100/" + * A host identified by an Internet Protocol literal address, version 6 [RFC3513] + * or later, is distinguished by enclosing the IP literal within square brackets. + * This is the only place where square bracket characters are allowed in the URI + * syntax. In anticipation of future, as-yet-undefined IP literal address formats, + * an implementation may use an optional version flag to indicate such a format + * explicitly rather than rely on heuristic determination. + */ + tmp = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.ipv6conns, ','); + tp = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.ipv6ports, ','); + tm = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.if_masks, ','); + if (NULL == cptr) { + /* no ipv4 stuff */ + pmix_asprintf(&cptr, "tcp6://[%s]:%s:%s", tmp, tp, tm); + } else { + pmix_asprintf(&tmp2, "%s;tcp6://[%s]:%s:%s", cptr, tmp, tp, tm); + free(cptr); + cptr = tmp2; + } + free(tmp); + free(tp); + free(tm); + } +#endif // PRTE_ENABLE_IPV6 + + /* check overall length for limits */ + if (0 < prte_oob_base.max_uri_length + && prte_oob_base.max_uri_length < (int) (len + strlen(cptr))) { + /* cannot accept the payload */ + free(final); + free(cptr); + *uri = NULL; + return; + } + /* add new value to final one */ + pmix_asprintf(&tmp, "%s;%s", final, cptr); + free(cptr); + free(final); + final = tmp; + + *uri = final; +} + +/* the host in this case is always in "dot" notation, and + * thus we do not need to do a DNS lookup to convert it */ +static int parse_uri(const uint16_t af_family, const char *host, const char *port, + struct sockaddr_storage *inaddr) +{ + struct sockaddr_in *in; + + if (AF_INET == af_family) { + memset(inaddr, 0, sizeof(struct sockaddr_in)); + in = (struct sockaddr_in *) inaddr; + in->sin_family = AF_INET; + in->sin_addr.s_addr = inet_addr(host); + if (in->sin_addr.s_addr == INADDR_NONE) { + return PRTE_ERR_BAD_PARAM; + } + ((struct sockaddr_in *) inaddr)->sin_port = htons(atoi(port)); + } +#if PRTE_ENABLE_IPV6 + else if (AF_INET6 == af_family) { + struct sockaddr_in6 *in6; + memset(inaddr, 0, sizeof(struct sockaddr_in6)); + in6 = (struct sockaddr_in6 *) inaddr; + + if (0 == inet_pton(AF_INET6, host, (void *) &in6->sin6_addr)) { + pmix_output(0, "oob_tcp_parse_uri: Could not convert %s\n", host); + return PRTE_ERR_BAD_PARAM; + } + in6->sin6_family = AF_INET6; + in6->sin6_port = htons(atoi(port)); + } +#endif + else { + return PRTE_ERR_NOT_SUPPORTED; + } + return PRTE_SUCCESS; +} + +static void set_addr(pmix_proc_t *peer, char **uris) +{ + char **addrs, **masks, *hptr; + char *tcpuri = NULL, *host, *ports, *masks_string; + int i, j, rc; + uint16_t af_family = AF_UNSPEC; + uint64_t ui64; + prte_oob_tcp_peer_t *pr; + prte_oob_tcp_addr_t *maddr; + + memcpy(&ui64, (char *) peer, sizeof(uint64_t)); + + for (i = 0; NULL != uris[i]; i++) { + tcpuri = strdup(uris[i]); + if (NULL == tcpuri) { + pmix_output_verbose(2, prte_oob_base.output, + "%s oob:tcp: out of memory", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); + continue; + } + if (0 == strncmp(uris[i], "tcp:", 4)) { + af_family = AF_INET; + host = tcpuri + strlen("tcp://"); + } else if (0 == strncmp(uris[i], "tcp6:", 5)) { +#if PRTE_ENABLE_IPV6 + af_family = AF_INET6; + host = tcpuri + strlen("tcp6://"); +#else // PRTE_ENABLE_IPV6 + /* we don't support this connection type */ + pmix_output_verbose(2, prte_oob_base.output, + "%s oob:tcp: address %s not supported", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), uris[i]); + free(tcpuri); + continue; +#endif // PRTE_ENABLE_IPV6 + } else { + /* not one of ours */ + pmix_output_verbose(2, prte_oob_base.output, + "%s oob:tcp: ignoring address %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), uris[i]); + free(tcpuri); + continue; + } + + /* this one is ours - record the peer */ + pmix_output_verbose(2, prte_oob_base.output, + "%s oob:tcp: working peer %s address %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(peer), uris[i]); + + /* separate the mask from the network addrs */ + masks_string = strrchr(tcpuri, ':'); + if (NULL == masks_string) { + PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); + free(tcpuri); + continue; + } + *masks_string = '\0'; + masks_string++; + masks = PMIX_ARGV_SPLIT_COMPAT(masks_string, ','); + + /* separate the ports from the network addrs */ + ports = strrchr(tcpuri, ':'); + if (NULL == ports) { + PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); + free(tcpuri); + continue; + } + *ports = '\0'; + ports++; + + /* split the addrs */ + /* if this is a tcp6 connection, the first one will have a '[' + * at the beginning of it, and the last will have a ']' at the + * end - we need to remove those extra characters + */ + hptr = host; +#if PRTE_ENABLE_IPV6 + if (AF_INET6 == af_family) { + if ('[' == host[0]) { + hptr = &host[1]; + } + if (']' == host[strlen(host) - 1]) { + host[strlen(host) - 1] = '\0'; + } + } +#endif // PRTE_ENABLE_IPV6 + addrs = PMIX_ARGV_SPLIT_COMPAT(hptr, ','); + + /* cycle across the provided addrs */ + for (j = 0; NULL != addrs[j]; j++) { + if (NULL == masks[j]) { + /* Missing mask information */ + pmix_output_verbose(2, prte_oob_base.output, + "%s oob:tcp: uri missing mask information.", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); + return; + } + /* if they gave us "localhost", then just take the first conn on our list */ + if (0 == strcasecmp(addrs[j], "localhost")) { +#if PRTE_ENABLE_IPV6 + if (AF_INET6 == af_family) { + if (NULL == prte_oob_base.ipv6conns + || NULL == prte_oob_base.ipv6conns[0]) { + continue; + } + host = prte_oob_base.ipv6conns[0]; + } else { +#endif // PRTE_ENABLE_IPV6 + if (NULL == prte_oob_base.ipv4conns + || NULL == prte_oob_base.ipv4conns[0]) { + continue; + } + host = prte_oob_base.ipv4conns[0]; +#if PRTE_ENABLE_IPV6 + } +#endif + } else { + host = addrs[j]; + } + + if (NULL == (pr = prte_oob_tcp_peer_lookup(peer))) { + pr = PMIX_NEW(prte_oob_tcp_peer_t); + PMIX_XFER_PROCID(&pr->name, peer); + pmix_output_verbose(20, prte_oob_base.output, + "%s SET_PEER ADDING PEER %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(peer)); + pmix_list_append(&prte_oob_base.peers, &pr->super); + } + + maddr = PMIX_NEW(prte_oob_tcp_addr_t); + ((struct sockaddr_storage *) &(maddr->addr))->ss_family = af_family; + if (PRTE_SUCCESS + != (rc = parse_uri(af_family, host, ports, + (struct sockaddr_storage *) &(maddr->addr)))) { + PRTE_ERROR_LOG(rc); + PMIX_RELEASE(maddr); + pmix_list_remove_item(&prte_oob_base.peers, &pr->super); + PMIX_RELEASE(pr); + return; + } + maddr->if_mask = atoi(masks[j]); + + pmix_output_verbose(20, prte_oob_base.output, + "%s set_peer: peer %s is listening on net %s port %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(peer), + (NULL == host) ? "NULL" : host, (NULL == ports) ? "NULL" : ports); + pmix_list_append(&pr->addrs, &maddr->super); + } + PMIX_ARGV_FREE_COMPAT(addrs); + free(tcpuri); + } +} + +static prte_oob_tcp_peer_t *get_peer(const pmix_proc_t *pr); + +static prte_oob_tcp_peer_t* process_uri(char *uri) +{ + pmix_proc_t peer; + char *cptr; + char **uris = NULL; + prte_oob_tcp_peer_t *pr; + + pmix_output_verbose(5, prte_oob_base.output, + "%s:set_addr processing uri %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), uri); + + /* find the first semi-colon in the string */ + cptr = strchr(uri, ';'); + if (NULL == cptr) { + /* got a problem - there must be at least two fields, + * the first containing the process name of our peer + * and all others containing the OOB contact info + */ + PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); + return NULL; + } + *cptr = '\0'; + cptr++; + /* the first field is the process name, so convert it */ + prte_util_convert_string_to_process_name(&peer, uri); + + /* if the peer is us, no need to go further as we already + * know our own contact info + */ + if (PMIX_CHECK_PROCID(&peer, PRTE_PROC_MY_NAME)) { + pmix_output_verbose(5, prte_oob_base.output, + "%s:set_addr peer %s is me", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + PRTE_NAME_PRINT(&peer)); + return NULL; + } + + /* split the rest of the uri into component parts */ + uris = PMIX_ARGV_SPLIT_COMPAT(cptr, ';'); + + /* get the peer object for this process */ + pr = get_peer(&peer); + if (NULL == pr) { + pr = PMIX_NEW(prte_oob_tcp_peer_t); + PMIX_XFER_PROCID(&pr->name, &peer); + pmix_list_append(&prte_oob_base.peers, &pr->super); + } + + set_addr(&pr->name, uris); + PMIX_ARGV_FREE_COMPAT(uris); + return pr; +} + +static prte_oob_tcp_peer_t *get_peer(const pmix_proc_t *pr) +{ + prte_oob_tcp_peer_t *peer; + + PMIX_LIST_FOREACH(peer, &prte_oob_base.peers, prte_oob_tcp_peer_t) + { + if (PMIX_CHECK_PROCID(pr, &peer->name)) { + return peer; + } + } + return NULL; +} diff --git a/src/rml/oob/oob_tcp.c b/src/rml/oob/oob_tcp.c new file mode 100644 index 0000000000..0c01b47852 --- /dev/null +++ b/src/rml/oob/oob_tcp.c @@ -0,0 +1,813 @@ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "prte_config.h" +#include "types.h" + +#ifdef HAVE_UNISTD_H +# include +#endif +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#include +#ifdef HAVE_NET_IF_H +# include +#endif +#ifdef HAVE_NETINET_IN_H +# include +#endif +#ifdef HAVE_ARPA_INET_H +# include +#endif +#ifdef HAVE_NETDB_H +# include +#endif +#include + +#include "src/include/prte_socket_errno.h" +#include "src/runtime/prte_progress_threads.h" +#include "src/util/pmix_argv.h" +#include "src/util/error.h" +#include "src/util/pmix_if.h" +#include "src/util/pmix_net.h" +#include "src/util/pmix_output.h" +#include "src/util/pmix_show_help.h" + +#include "src/mca/errmgr/errmgr.h" +#include "src/mca/ess/ess.h" +#include "src/runtime/prte_globals.h" +#include "src/threads/pmix_threads.h" +#include "src/util/name_fns.h" +#include "src/util/pmix_parse_options.h" +#include "src/util/pmix_show_help.h" + +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_common.h" +#include "src/rml/oob/oob_tcp_connection.h" +#include "src/rml/oob/oob_tcp_listener.h" +#include "src/rml/oob/oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp_sendrecv.h" + +prte_oob_base_t prte_oob_base = { + .output = -1, + .addr_count = 0, + .num_links = 0, + .max_retries = 0, + .max_uri_length = -1, + .events = PMIX_LIST_STATIC_INIT, + .peer_limit = 0, + .peers = PMIX_LIST_STATIC_INIT, + + .tcp_sndbuf = 0, + .tcp_rcvbuf = 0, + + .disable_ipv4_family = false, + .tcp_static_ports = NULL, + .tcp_dyn_ports = NULL, + .ipv4conns = NULL, + .ipv4ports = NULL, + + .disable_ipv6_family = true, + .tcp6_static_ports = NULL, + .tcp6_dyn_ports = NULL, + .ipv6conns = NULL, + .ipv6ports = NULL, + + .local_ifs = PMIX_LIST_STATIC_INIT, + .if_masks = NULL, + .num_hnp_ports = 1, + .listeners = PMIX_LIST_STATIC_INIT, + .listen_thread_active = false, + .listen_thread_tv = {3600, 0}, + .stop_thread = {-1, -1}, + .keepalive_probes = 0, + .keepalive_time = 0, + .keepalive_intvl = 0, + .retry_delay = 0, + .max_recon_attempts = 0 +}; + +static char **split_and_resolve(char **orig_str, char *name); + +int prte_oob_open(void) +{ + pmix_pif_t *copied_interface, *selected_interface; + struct sockaddr_storage my_ss; + /* Larger than necessary, used for copying mask */ + char string[50], **interfaces = NULL; + int kindex; + int i, rc; + bool keeploopback = false; + bool including = false; + + pmix_output_verbose(5, prte_oob_base.output, + "oob:tcp: component_available called"); + + PMIX_CONSTRUCT(&prte_oob_base.listeners, pmix_list_t); + if (PRTE_PROC_IS_MASTER) { + PMIX_CONSTRUCT(&prte_oob_base.listen_thread, pmix_thread_t); + prte_oob_base.listen_thread_active = false; + prte_oob_base.listen_thread_tv.tv_sec = 3600; + prte_oob_base.listen_thread_tv.tv_usec = 0; + } + prte_oob_base.addr_count = 0; + prte_oob_base.ipv4conns = NULL; + prte_oob_base.ipv4ports = NULL; + prte_oob_base.ipv6conns = NULL; + prte_oob_base.ipv6ports = NULL; + prte_oob_base.if_masks = NULL; + + PMIX_CONSTRUCT(&prte_oob_base.local_ifs, pmix_list_t); + PMIX_CONSTRUCT(&prte_oob_base.peers, pmix_list_t); + + /* if interface include was given, construct a list + * of those interfaces which match the specifications - remember, + * the includes could be given as named interfaces, IP addrs, or + * subnet+mask + */ + if (NULL != prte_if_include) { + interfaces = split_and_resolve(&prte_if_include, + "include"); + including = true; + } else if (NULL != prte_if_exclude) { + interfaces = split_and_resolve(&prte_if_exclude, + "exclude"); + } + + /* if we are the master, then check the interfaces for loopbacks + * and keep loopbacks only if no non-loopback interface exists */ + if (PRTE_PROC_IS_MASTER) { + keeploopback = true; + PMIX_LIST_FOREACH(selected_interface, &pmix_if_list, pmix_pif_t) + { + if (!(selected_interface->if_flags & IFF_LOOPBACK)) { + keeploopback = false; + break; + } + } + } + + /* look at all available interfaces */ + PMIX_LIST_FOREACH(selected_interface, &pmix_if_list, pmix_pif_t) + { + if ((selected_interface->if_flags & IFF_LOOPBACK) && + !keeploopback) { + continue; + } + + + i = selected_interface->if_index; + kindex = selected_interface->if_kernel_index; + memcpy((struct sockaddr *) &my_ss, &selected_interface->if_addr, + MIN(sizeof(struct sockaddr_storage), sizeof(selected_interface->if_addr))); + + /* ignore non-ip4/6 interfaces */ + if (AF_INET != my_ss.ss_family +#if PRTE_ENABLE_IPV6 + && AF_INET6 != my_ss.ss_family +#endif + ) { + continue; + } + + /* ignore any virtual interfaces */ + if (0 == strncmp(selected_interface->if_name, "vir", 3)) { + continue; + } + + /* handle include/exclude directives */ + if (NULL != interfaces) { + /* check for match */ + rc = pmix_ifmatches(kindex, interfaces); + /* if one of the network specifications isn't parseable, then + * error out as we can't do what was requested + */ + if (PRTE_ERR_NETWORK_NOT_PARSEABLE == rc) { + pmix_show_help("help-oob-tcp.txt", "not-parseable", true); + PMIX_ARGV_FREE_COMPAT(interfaces); + return PRTE_ERR_BAD_PARAM; + } + /* if we are including, then ignore this if not present */ + if (including) { + if (PMIX_SUCCESS != rc) { + pmix_output_verbose(20, prte_oob_base.output, + "%s oob:tcp:init rejecting interface %s (not in include list)", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), selected_interface->if_name); + continue; + } + } else { + /* we are excluding, so ignore if present */ + if (PMIX_SUCCESS == rc) { + pmix_output_verbose(20, prte_oob_base.output, + "%s oob:tcp:init rejecting interface %s (in exclude list)", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), selected_interface->if_name); + continue; + } + } + } + + /* Refs ticket #3019 + * it would probably be worthwhile to print out a warning if PRRTE detects multiple + * IP interfaces that are "up" on the same subnet (because that's a Bad Idea). Note + * that we should only check for this after applying the relevant include/exclude + * list MCA params. If we detect redundant ports, we can also automatically ignore + * them so that applications won't hang. + */ + + /* add this address to our connections */ + if (AF_INET == my_ss.ss_family) { + pmix_output_verbose(10, prte_oob_base.output, + "%s oob:tcp:init adding %s to our list of %s connections", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + pmix_net_get_hostname((struct sockaddr *) &my_ss), + (AF_INET == my_ss.ss_family) ? "V4" : "V6"); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_oob_base.ipv4conns, + pmix_net_get_hostname((struct sockaddr *) &my_ss)); + } else if (AF_INET6 == my_ss.ss_family) { +#if PRTE_ENABLE_IPV6 + pmix_output_verbose(10, prte_oob_base.output, + "%s oob:tcp:init adding %s to our list of %s connections", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + pmix_net_get_hostname((struct sockaddr *) &my_ss), + (AF_INET == my_ss.ss_family) ? "V4" : "V6"); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_oob_base.ipv6conns, + pmix_net_get_hostname((struct sockaddr *) &my_ss)); +#endif // PRTE_ENABLE_IPV6 + } else { + pmix_output_verbose(10, prte_oob_base.output, + "%s oob:tcp:init ignoring %s from out list of connections", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + pmix_net_get_hostname((struct sockaddr *) &my_ss)); + continue; + } + copied_interface = PMIX_NEW(pmix_pif_t); + if (NULL == copied_interface) { + return PRTE_ERR_OUT_OF_RESOURCE; + } + pmix_string_copy(copied_interface->if_name, selected_interface->if_name, PMIX_IF_NAMESIZE); + copied_interface->if_index = i; + copied_interface->if_kernel_index = kindex; + copied_interface->af_family = my_ss.ss_family; + copied_interface->if_flags = selected_interface->if_flags; + copied_interface->if_speed = selected_interface->if_speed; + memcpy(&copied_interface->if_addr, &selected_interface->if_addr, + sizeof(struct sockaddr_storage)); + copied_interface->if_mask = selected_interface->if_mask; + /* If bandwidth is not found, set to arbitrary non zero value */ + copied_interface->if_bandwidth = selected_interface->if_bandwidth > 0 + ? selected_interface->if_bandwidth + : 1; + memcpy(&copied_interface->if_mac, &selected_interface->if_mac, + sizeof(copied_interface->if_mac)); + copied_interface->ifmtu = selected_interface->ifmtu; + /* Add the if_mask to the list */ + sprintf(string, "%d", selected_interface->if_mask); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_oob_base.if_masks, string); + pmix_list_append(&prte_oob_base.local_ifs, &(copied_interface->super)); + } + + if (0 == PMIX_ARGV_COUNT_COMPAT(prte_oob_base.ipv4conns) +#if PRTE_ENABLE_IPV6 + && 0 == PMIX_ARGV_COUNT_COMPAT(prte_oob_base.ipv6conns) +#endif + ) { + return PRTE_ERR_NOT_AVAILABLE; + } + + // start the listeners + if (PRTE_SUCCESS != (rc = prte_oob_tcp_start_listening())) { + PRTE_ERROR_LOG(rc); + } + return rc; +} + +void prte_oob_close(void) +{ + int i = 0, rc; + + if (PRTE_PROC_IS_MASTER && prte_oob_base.listen_thread_active) { + prte_oob_base.listen_thread_active = false; + /* tell the thread to exit */ + rc = write(prte_oob_base.stop_thread[1], &i, sizeof(int)); + if (0 < rc) { + pmix_thread_join(&prte_oob_base.listen_thread, NULL); + } + + close(prte_oob_base.stop_thread[0]); + close(prte_oob_base.stop_thread[1]); + + } + + PMIX_LIST_DESTRUCT(&prte_oob_base.local_ifs); + PMIX_LIST_DESTRUCT(&prte_oob_base.peers); + + if (NULL != prte_oob_base.ipv4conns) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.ipv4conns); + } + if (NULL != prte_oob_base.ipv4ports) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.ipv4ports); + } + +#if PRTE_ENABLE_IPV6 + if (NULL != prte_oob_base.ipv6conns) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.ipv6conns); + } + if (NULL != prte_oob_base.ipv6ports) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.ipv6ports); + } +#endif + if (NULL != prte_oob_base.if_masks) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.if_masks); + } + + if (0 <= prte_oob_base.output) { + pmix_output_close(prte_oob_base.output); + } +} + +static char *static_port_string; +#if PRTE_ENABLE_IPV6 +static char *static_port_string6; +#endif // PRTE_ENABLE_IPV6 + +static char *dyn_port_string; +#if PRTE_ENABLE_IPV6 +static char *dyn_port_string6; +#endif + +int prte_oob_register(void) +{ + prte_oob_base.peer_limit = -1; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "peer_limit", + "Maximum number of peer connections to simultaneously maintain (-1 = infinite)", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.peer_limit); + + prte_oob_base.max_retries = 2; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "peer_retries", + "Number of times to try shutting down a connection before giving up", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.max_retries); + + prte_oob_base.tcp_sndbuf = 0; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "sndbuf", + "TCP socket send buffering size (in bytes, 0 => leave system default)", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.tcp_sndbuf); + + prte_oob_base.tcp_rcvbuf = 0; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "rcvbuf", + "TCP socket receive buffering size (in bytes, 0 => leave system default)", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.tcp_rcvbuf); + + + static_port_string = NULL; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "static_ipv4_ports", + "Static ports for daemons and procs (IPv4)", + PMIX_MCA_BASE_VAR_TYPE_STRING, + &static_port_string); + + /* if ports were provided, parse the provided range */ + if (NULL != static_port_string) { + pmix_util_parse_range_options(static_port_string, &prte_oob_base.tcp_static_ports); + if (0 == strcmp(prte_oob_base.tcp_static_ports[0], "-1")) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.tcp_static_ports); + prte_oob_base.tcp_static_ports = NULL; + } + } else { + prte_oob_base.tcp_static_ports = NULL; + } + +#if PRTE_ENABLE_IPV6 + static_port_string6 = NULL; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "static_ipv6_ports", + "Static ports for daemons and procs (IPv6)", + PMIX_MCA_BASE_VAR_TYPE_STRING, + &static_port_string6); + + /* if ports were provided, parse the provided range */ + if (NULL != static_port_string6) { + pmix_util_parse_range_options(static_port_string6, + &prte_oob_base.tcp6_static_ports); + if (0 == strcmp(prte_oob_base.tcp6_static_ports[0], "-1")) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.tcp6_static_ports); + prte_oob_base.tcp6_static_ports = NULL; + } + } else { + prte_oob_base.tcp6_static_ports = NULL; + } +#endif // PRTE_ENABLE_IPV6 + + if (NULL != prte_oob_base.tcp_static_ports + || NULL != prte_oob_base.tcp6_static_ports) { + prte_static_ports = true; + } + + dyn_port_string = NULL; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "dynamic_ipv4_ports", + "Range of ports to be dynamically used by daemons and procs (IPv4)", + PMIX_MCA_BASE_VAR_TYPE_STRING, + &dyn_port_string); + /* if ports were provided, parse the provided range */ + if (NULL != dyn_port_string) { + /* can't have both static and dynamic ports! */ + if (prte_static_ports) { + char *err = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.tcp_static_ports, ','); + pmix_show_help("help-oob-tcp.txt", "static-and-dynamic", true, err, dyn_port_string); + free(err); + return PRTE_ERROR; + } + pmix_util_parse_range_options(dyn_port_string, &prte_oob_base.tcp_dyn_ports); + if (0 == strcmp(prte_oob_base.tcp_dyn_ports[0], "-1")) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.tcp_dyn_ports); + prte_oob_base.tcp_dyn_ports = NULL; + } + } else { + prte_oob_base.tcp_dyn_ports = NULL; + } + +#if PRTE_ENABLE_IPV6 + dyn_port_string6 = NULL; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "dynamic_ipv6_ports", + "Range of ports to be dynamically used by daemons and procs (IPv6)", + PMIX_MCA_BASE_VAR_TYPE_STRING, + &dyn_port_string6); + /* if ports were provided, parse the provided range */ + if (NULL != dyn_port_string6) { + /* can't have both static and dynamic ports! */ + if (prte_static_ports) { + char *err4 = NULL, *err6 = NULL; + if (NULL != prte_oob_base.tcp_static_ports) { + err4 = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.tcp_static_ports, ','); + } + if (NULL != prte_oob_base.tcp6_static_ports) { + err6 = PMIX_ARGV_JOIN_COMPAT(prte_oob_base.tcp6_static_ports, ','); + } + pmix_show_help("help-oob-tcp.txt", "static-and-dynamic-ipv6", true, + (NULL == err4) ? "N/A" : err4, (NULL == err6) ? "N/A" : err6, + dyn_port_string6); + if (NULL != err4) { + free(err4); + } + if (NULL != err6) { + free(err6); + } + return PRTE_ERROR; + } + pmix_util_parse_range_options(dyn_port_string6, &prte_oob_base.tcp6_dyn_ports); + if (0 == strcmp(prte_oob_base.tcp6_dyn_ports[0], "-1")) { + PMIX_ARGV_FREE_COMPAT(prte_oob_base.tcp6_dyn_ports); + prte_oob_base.tcp6_dyn_ports = NULL; + } + } else { + prte_oob_base.tcp6_dyn_ports = NULL; + } +#endif // PRTE_ENABLE_IPV6 + + prte_oob_base.disable_ipv4_family = false; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "disable_ipv4_family", + "Disable the IPv4 interfaces", + PMIX_MCA_BASE_VAR_TYPE_BOOL, + &prte_oob_base.disable_ipv4_family); + +#if PRTE_ENABLE_IPV6 + prte_oob_base.disable_ipv6_family = false; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "disable_ipv6_family", + "Disable the IPv6 interfaces", + PMIX_MCA_BASE_VAR_TYPE_BOOL, + &prte_oob_base.disable_ipv6_family); +#endif // PRTE_ENABLE_IPV6 + + // Wait for this amount of time before sending the first keepalive probe + prte_oob_base.keepalive_time = 300; + (void)pmix_mca_base_var_register("prte", "prte", NULL, "keepalive_time", + "Idle time in seconds before starting to send keepalives (keepalive_time <= 0 disables " + "keepalive functionality)", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.keepalive_time); + + // Resend keepalive probe every INT seconds + prte_oob_base.keepalive_intvl = 20; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "keepalive_intvl", + "Time between successive keepalive pings when peer has not responded, in seconds (ignored " + "if keepalive_time <= 0)", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.keepalive_intvl); + + // After sending PR probes every INT seconds consider the connection dead + prte_oob_base.keepalive_probes = 9; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "keepalive_probes", + "Number of keepalives that can be missed before " + "declaring error (ignored if keepalive_time <= 0)", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.keepalive_probes); + + prte_oob_base.retry_delay = 0; + (void) pmix_mca_base_var_register("prte","prte", NULL, "retry_delay", + "Time (in sec) to wait before trying to connect to peer again", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.retry_delay); + + prte_oob_base.max_recon_attempts = 10; + (void) pmix_mca_base_var_register("prte", "prte", NULL, "max_recon_attempts", + "Max number of times to attempt connection before giving up (-1 -> never give up)", + PMIX_MCA_BASE_VAR_TYPE_INT, + &prte_oob_base.max_recon_attempts); + return PRTE_SUCCESS; +} + +/* + * Local utility functions + */ +static void recv_handler(int sd, short flags, void *user); + +/* Called by prte_oob_tcp_accept() and connection_handler() on + * a socket that has been accepted. This call finishes processing the + * socket, including setting socket options and registering for the + * OOB-level connection handshake. Used in both the threaded and + * event listen modes. + */ +void prte_oob_accept_connection(const int accepted_fd, const struct sockaddr *addr) +{ + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, + "%s accept_connection: %s:%d\n", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + pmix_net_get_hostname(addr), pmix_net_get_port(addr)); + + /* setup socket options */ + prte_oob_tcp_set_socket_options(accepted_fd); + + /* use a one-time event to wait for receipt of peer's + * process ident message to complete this connection + */ + PRTE_ACTIVATE_TCP_ACCEPT_STATE(accepted_fd, addr, recv_handler); +} + +/* API functions */ +void prte_oob_ping(const pmix_proc_t *proc) +{ + prte_oob_tcp_peer_t *peer; + + pmix_output_verbose(2, prte_oob_base.output, + "%s:[%s:%d] processing ping to peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + __FILE__, __LINE__, PRTE_NAME_PRINT(proc)); + + /* do we know this peer? */ + if (NULL == (peer = prte_oob_tcp_peer_lookup(proc))) { + /* push this back to the component so it can try + * another module within this transport. If no + * module can be found, the component can push back + * to the framework so another component can try + */ + pmix_output_verbose(2, prte_oob_base.output, + "%s:[%s:%d] hop %s unknown", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + __FILE__, __LINE__, PRTE_NAME_PRINT(proc)); + PRTE_ACTIVATE_TCP_MSG_ERROR(NULL, NULL, proc, prte_mca_oob_tcp_component_hop_unknown); + return; + } + + /* if we are already connected, there is nothing to do */ + if (MCA_OOB_TCP_CONNECTED == peer->state) { + pmix_output_verbose(2, prte_oob_base.output, + "%s:[%s:%d] already connected to peer %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, + PRTE_NAME_PRINT(proc)); + return; + } + + /* if we are already connecting, there is nothing to do */ + if (MCA_OOB_TCP_CONNECTING == peer->state || MCA_OOB_TCP_CONNECT_ACK == peer->state) { + pmix_output_verbose(2, prte_oob_base.output, + "%s:[%s:%d] already connecting to peer %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, + PRTE_NAME_PRINT(proc)); + return; + } + + /* attempt the connection */ + peer->state = MCA_OOB_TCP_CONNECTING; + PRTE_ACTIVATE_TCP_CONN_STATE(peer, prte_oob_tcp_peer_try_connect); +} + +/* + * Event callback when there is data available on the registered + * socket to recv. This is called for the listen sockets to accept an + * incoming connection, on new sockets trying to complete the software + * connection process, and for probes. Data on an established + * connection is handled elsewhere. + */ +static void recv_handler(int sd, short flg, void *cbdata) +{ + prte_oob_tcp_conn_op_t *op = (prte_oob_tcp_conn_op_t *) cbdata; + int flags; + prte_oob_tcp_hdr_t hdr; + prte_oob_tcp_peer_t *peer; + PRTE_HIDE_UNUSED_PARAMS(flg); + + PMIX_ACQUIRE_OBJECT(op); + + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, + "%s:tcp:recv:handler called", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); + + /* get the handshake */ + if (PRTE_SUCCESS != prte_oob_tcp_peer_recv_connect_ack(NULL, sd, &hdr)) { + goto cleanup; + } + + /* finish processing ident */ + if (MCA_OOB_TCP_IDENT == hdr.type) { + if (NULL == (peer = prte_oob_tcp_peer_lookup(&hdr.origin))) { + /* should never happen */ + prte_oob_tcp_peer_close(peer); + goto cleanup; + } + /* set socket up to be non-blocking */ + if ((flags = fcntl(sd, F_GETFL, 0)) < 0) { + pmix_output(0, "%s prte_oob_tcp_recv_connect: fcntl(F_GETFL) failed: %s (%d)", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), strerror(prte_socket_errno), + prte_socket_errno); + } else { + flags |= O_NONBLOCK; + if (fcntl(sd, F_SETFL, flags) < 0) { + pmix_output(0, "%s prte_oob_tcp_recv_connect: fcntl(F_SETFL) failed: %s (%d)", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), strerror(prte_socket_errno), + prte_socket_errno); + } + } + /* is the peer instance willing to accept this connection */ + peer->sd = sd; + if (prte_oob_tcp_peer_accept(peer) == false) { + if (OOB_TCP_DEBUG_CONNECT + <= pmix_output_get_verbosity(prte_oob_base.output)) { + pmix_output(0, + "%s-%s prte_oob_tcp_recv_connect: " + "rejected connection from %s connection state %d", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), + PRTE_NAME_PRINT(&(hdr.origin)), peer->state); + } + CLOSE_THE_SOCKET(sd); + } + } + +cleanup: + PMIX_RELEASE(op); +} + +/* + * Go through a list of argv; if there are any subnet specifications + * (a.b.c.d/e), resolve them to an interface name (Currently only + * supporting IPv4). If unresolvable, warn and remove. + */ +static char **split_and_resolve(char **orig_str, char *name) +{ + pmix_pif_t *selected_interface; + int i, n, ret, match_count, interface_count; + char **argv, **interfaces, *str, *tmp; + char if_name[IF_NAMESIZE]; + struct sockaddr_storage argv_inaddr, if_inaddr; + uint32_t argv_prefix; + + /* Sanity check */ + if (NULL == orig_str || NULL == *orig_str) { + return NULL; + } + + argv = PMIX_ARGV_SPLIT_COMPAT(*orig_str, ','); + if (NULL == argv) { + return NULL; + } + interface_count = 0; + interfaces = NULL; + for (i = 0; NULL != argv[i]; ++i) { + if (isalpha(argv[i][0])) { + /* This is an interface name. If not already in the interfaces array, add it */ + for (n = 0; n < interface_count; n++) { + if (0 == strcmp(argv[i], interfaces[n])) { + break; + } + } + if (n == interface_count) { + pmix_output_verbose(20, + prte_oob_base.output, + "oob:tcp: Using interface: %s ", argv[i]); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&interfaces, argv[i]); + ++interface_count; + } + continue; + } + + /* Found a subnet notation. Convert it to an IP + address/netmask. Get the prefix first. */ + argv_prefix = 0; + tmp = strdup(argv[i]); + str = strchr(argv[i], '/'); + if (NULL == str) { + pmix_show_help("help-oob-tcp.txt", "invalid if_inexclude", + true, name, prte_process_info.nodename, + tmp, "Invalid specification (missing \"/\")"); + free(argv[i]); + free(tmp); + continue; + } + *str = '\0'; + argv_prefix = atoi(str + 1); + + /* Now convert the IPv4 address */ + ((struct sockaddr*) &argv_inaddr)->sa_family = AF_INET; + ret = inet_pton(AF_INET, argv[i], + &((struct sockaddr_in*) &argv_inaddr)->sin_addr); + free(argv[i]); + + if (1 != ret) { + pmix_show_help("help-oob-tcp.txt", "invalid if_inexclude", + true, name, prte_process_info.nodename, tmp, + "Invalid specification (inet_pton() failed)"); + free(tmp); + continue; + } + pmix_output_verbose(20, prte_oob_base.output, + "%s oob:tcp: Searching for %s address+prefix: %s / %u", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + name, + pmix_net_get_hostname((struct sockaddr*) &argv_inaddr), + argv_prefix); + + /* Go through all interfaces and see if we can find a match */ + match_count = 0; + PMIX_LIST_FOREACH(selected_interface, &pmix_if_list, pmix_pif_t) { + pmix_ifindextoaddr(selected_interface->if_kernel_index, + (struct sockaddr*) &if_inaddr, + sizeof(if_inaddr)); + if (pmix_net_samenetwork((struct sockaddr_storage*) &argv_inaddr, + (struct sockaddr_storage*) &if_inaddr, + argv_prefix)) { + /* We found a match. If it's not already in the interfaces array, + add it. If it's already in the array, treat it as a match */ + match_count = match_count + 1; + pmix_ifindextoname(selected_interface->if_kernel_index, if_name, sizeof(if_name)); + for (n = 0; n < interface_count; n++) { + if (0 == strcmp(if_name, interfaces[n])) { + break; + } + } + if (n == interface_count) { + pmix_output_verbose(20, + prte_oob_base.output, + "oob:tcp: Found match: %s (%s)", + pmix_net_get_hostname((struct sockaddr*) &if_inaddr), + if_name); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&interfaces, if_name); + ++interface_count; + } + } + } + /* If we didn't find a match, keep trying */ + if (0 == match_count) { + pmix_show_help("help-oob-tcp.txt", "invalid if_inexclude", + true, name, prte_process_info.nodename, tmp, + "Did not find interface matching this subnet"); + free(tmp); + continue; + } + + free(tmp); + } + + /* Mark the end of the interface name array with NULL */ + if (NULL != interfaces) { + interfaces[interface_count] = NULL; + } + free(argv); + free(*orig_str); + *orig_str = PMIX_ARGV_JOIN_COMPAT(interfaces, ','); + return interfaces; +} + +PMIX_CLASS_INSTANCE(prte_oob_send_t, + pmix_object_t, + NULL, NULL); diff --git a/src/mca/oob/tcp/oob_tcp.h b/src/rml/oob/oob_tcp.h similarity index 64% rename from src/mca/oob/tcp/oob_tcp.h rename to src/rml/oob/oob_tcp.h index 41bfaba28f..e23586dbd1 100644 --- a/src/mca/oob/tcp/oob_tcp.h +++ b/src/rml/oob/oob_tcp.h @@ -15,7 +15,7 @@ * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,8 +33,7 @@ #include "src/event/event-internal.h" #include "src/mca/base/pmix_base.h" -#include "src/mca/oob/base/base.h" -#include "src/mca/oob/oob.h" +#include "src/rml/oob/oob.h" BEGIN_C_DECLS @@ -42,10 +41,6 @@ BEGIN_C_DECLS #define OOB_TCP_DEBUG_FAIL 2 #define OOB_TCP_DEBUG_CONNECT 7 -/* forward declare a couple of structures */ -struct prte_oob_tcp_module_t; -struct prte_oob_tcp_msg_error_t; - /* define a struct for tracking NIC addresses */ typedef struct { pmix_list_item_t super; @@ -54,19 +49,6 @@ typedef struct { } prte_oob_tcp_nicaddr_t; PMIX_CLASS_DECLARATION(prte_oob_tcp_nicaddr_t); -/* Module definition */ -typedef void (*prte_oob_tcp_module_accept_connection_fn_t)(const int accepted_fd, - const struct sockaddr *addr); -typedef void (*prte_oob_tcp_module_ping_fn_t)(const pmix_proc_t *proc); -typedef void (*prte_oob_tcp_module_send_nb_fn_t)(prte_rml_send_t *msg); - -typedef struct { - prte_oob_tcp_module_accept_connection_fn_t accept_connection; - prte_oob_tcp_module_ping_fn_t ping; - prte_oob_tcp_module_send_nb_fn_t send_nb; -} prte_oob_tcp_module_t; -PRTE_MODULE_EXPORT extern prte_oob_tcp_module_t prte_oob_tcp_module; - /** * the state of the connection */ @@ -82,10 +64,15 @@ typedef enum { } prte_oob_tcp_state_t; /* module-level shared functions */ -PRTE_MODULE_EXPORT void prte_oob_tcp_send_handler(int fd, short args, void *cbdata); -PRTE_MODULE_EXPORT void prte_oob_tcp_recv_handler(int fd, short args, void *cbdata); -PRTE_MODULE_EXPORT void prte_oob_tcp_queue_msg(int sd, short args, void *cbdata); - +PRTE_EXPORT void prte_oob_tcp_send_handler(int fd, short args, void *cbdata); +PRTE_EXPORT void prte_oob_tcp_recv_handler(int fd, short args, void *cbdata); +PRTE_EXPORT void prte_oob_tcp_queue_msg(int sd, short args, void *cbdata); +PRTE_EXPORT void prte_oob_accept_connection(const int accepted_fd, const struct sockaddr *addr); +PRTE_EXPORT void prte_mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata); +PRTE_EXPORT void prte_mca_oob_tcp_component_failed_to_connect(int fd, short args, void *cbdata); +PRTE_EXPORT void prte_mca_oob_tcp_component_no_route(int fd, short args, void *cbdata); +PRTE_EXPORT void prte_mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata); +PRTE_EXPORT void prte_oob_ping(const pmix_proc_t *proc); END_C_DECLS #endif /* MCA_OOB_TCP_H_ */ diff --git a/src/mca/oob/tcp/oob_tcp_common.c b/src/rml/oob/oob_tcp_common.c similarity index 77% rename from src/mca/oob/tcp/oob_tcp_common.c rename to src/rml/oob/oob_tcp_common.c index 9671ee254f..928dff0056 100644 --- a/src/mca/oob/tcp/oob_tcp_common.c +++ b/src/rml/oob/oob_tcp_common.c @@ -16,7 +16,7 @@ * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2014-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -63,10 +63,9 @@ #include "src/util/pmix_net.h" #include "src/util/pmix_output.h" -#include "oob_tcp_common.h" -#include "oob_tcp_peer.h" -#include "src/mca/oob/tcp/oob_tcp.h" -#include "src/mca/oob/tcp/oob_tcp_component.h" +#include "src/rml/oob/oob_tcp_common.h" +#include "src/rml/oob/oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp.h" /** * Set socket buffering @@ -87,27 +86,27 @@ static void set_keepalive(int sd) /* Set the option active */ option = 1; if (setsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, optlen) < 0) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(SO_KEEPALIVE) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); return; } # if defined(TCP_KEEPALIVE) /* set the idle time */ - if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPALIVE, &prte_mca_oob_tcp_component.keepalive_time, - sizeof(prte_mca_oob_tcp_component.keepalive_time)) + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPALIVE, &prte_oob_base.keepalive_time, + sizeof(prte_oob_base.keepalive_time)) < 0) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(TCP_KEEPALIVE) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); return; } # elif defined(TCP_KEEPIDLE) /* set the idle time */ - if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPIDLE, &prte_mca_oob_tcp_component.keepalive_time, - sizeof(prte_mca_oob_tcp_component.keepalive_time)) + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPIDLE, &prte_oob_base.keepalive_time, + sizeof(prte_oob_base.keepalive_time)) < 0) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(TCP_KEEPIDLE) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); return; @@ -115,10 +114,10 @@ static void set_keepalive(int sd) # endif // TCP_KEEPIDLE # if defined(TCP_KEEPINTVL) /* set the keepalive interval */ - if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPINTVL, &prte_mca_oob_tcp_component.keepalive_intvl, - sizeof(prte_mca_oob_tcp_component.keepalive_intvl)) + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPINTVL, &prte_oob_base.keepalive_intvl, + sizeof(prte_oob_base.keepalive_intvl)) < 0) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(TCP_KEEPINTVL) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); return; @@ -126,10 +125,10 @@ static void set_keepalive(int sd) # endif // TCP_KEEPINTVL # if defined(TCP_KEEPCNT) /* set the miss rate */ - if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPCNT, &prte_mca_oob_tcp_component.keepalive_probes, - sizeof(prte_mca_oob_tcp_component.keepalive_probes)) + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPCNT, &prte_oob_base.keepalive_probes, + sizeof(prte_oob_base.keepalive_probes)) < 0) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(TCP_KEEPCNT) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); } @@ -144,33 +143,33 @@ void prte_oob_tcp_set_socket_options(int sd) optval = 1; if (setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *) &optval, sizeof(optval)) < 0) { prte_backtrace_print(stderr, NULL, 1); - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(TCP_NODELAY) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); } #endif #if defined(SO_SNDBUF) - if (prte_mca_oob_tcp_component.tcp_sndbuf > 0 - && setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *) &prte_mca_oob_tcp_component.tcp_sndbuf, + if (prte_oob_base.tcp_sndbuf > 0 + && setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *) &prte_oob_base.tcp_sndbuf, sizeof(int)) < 0) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(SO_SNDBUF) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); } #endif #if defined(SO_RCVBUF) - if (prte_mca_oob_tcp_component.tcp_rcvbuf > 0 - && setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *) &prte_mca_oob_tcp_component.tcp_rcvbuf, + if (prte_oob_base.tcp_rcvbuf > 0 + && setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *) &prte_oob_base.tcp_rcvbuf, sizeof(int)) < 0) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "[%s:%d] setsockopt(SO_RCVBUF) failed: %s (%d)", __FILE__, __LINE__, strerror(prte_socket_errno), prte_socket_errno); } #endif - if (0 < prte_mca_oob_tcp_component.keepalive_time) { + if (0 < prte_oob_base.keepalive_time) { set_keepalive(sd); } } @@ -179,7 +178,7 @@ prte_oob_tcp_peer_t *prte_oob_tcp_peer_lookup(const pmix_proc_t *name) { prte_oob_tcp_peer_t *peer; - PMIX_LIST_FOREACH(peer, &prte_mca_oob_tcp_component.peers, prte_oob_tcp_peer_t) + PMIX_LIST_FOREACH(peer, &prte_oob_base.peers, prte_oob_tcp_peer_t) { if (PMIX_CHECK_PROCID(name, &peer->name)) { return peer; diff --git a/src/mca/oob/tcp/oob_tcp_common.h b/src/rml/oob/oob_tcp_common.h similarity index 77% rename from src/mca/oob/tcp/oob_tcp_common.h rename to src/rml/oob/oob_tcp_common.h index 4e2bfe5043..26e1408208 100644 --- a/src/mca/oob/tcp/oob_tcp_common.h +++ b/src/rml/oob/oob_tcp_common.h @@ -15,7 +15,7 @@ * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,10 +28,10 @@ #include "prte_config.h" -#include "oob_tcp.h" -#include "oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_peer.h" -PRTE_MODULE_EXPORT void prte_oob_tcp_set_socket_options(int sd); -PRTE_MODULE_EXPORT char *prte_oob_tcp_state_print(prte_oob_tcp_state_t state); -PRTE_MODULE_EXPORT prte_oob_tcp_peer_t *prte_oob_tcp_peer_lookup(const pmix_proc_t *name); +PRTE_EXPORT void prte_oob_tcp_set_socket_options(int sd); +PRTE_EXPORT char *prte_oob_tcp_state_print(prte_oob_tcp_state_t state); +PRTE_EXPORT prte_oob_tcp_peer_t *prte_oob_tcp_peer_lookup(const pmix_proc_t *name); #endif /* _MCA_OOB_TCP_COMMON_H_ */ diff --git a/src/rml/oob/oob_tcp_component.c b/src/rml/oob/oob_tcp_component.c new file mode 100644 index 0000000000..734b3eb1c3 --- /dev/null +++ b/src/rml/oob/oob_tcp_component.c @@ -0,0 +1,266 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. + * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights + * reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * In windows, many of the socket functions return an EWOULDBLOCK + * instead of things like EAGAIN, EINPROGRESS, etc. It has been + * verified that this will not conflict with other error codes that + * are returned by these functions under UNIX/Linux environments + */ + +#include "prte_config.h" +#include "types.h" + +#ifdef HAVE_UNISTD_H +# include +#endif +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#include +#ifdef HAVE_NET_IF_H +# include +#endif +#ifdef HAVE_NETINET_IN_H +# include +#endif +#ifdef HAVE_ARPA_INET_H +# include +#endif +#ifdef HAVE_NETDB_H +# include +#endif +#include +#include + +#ifndef MIN +# define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#include "src/class/pmix_list.h" +#include "src/event/event-internal.h" +#include "src/include/prte_socket_errno.h" +#include "src/runtime/prte_progress_threads.h" +#include "src/util/pmix_argv.h" +#include "src/util/pmix_if.h" +#include "src/util/error.h" +#include "src/util/pmix_net.h" +#include "src/util/pmix_output.h" +#include "src/util/pmix_show_help.h" + +#include "src/mca/errmgr/errmgr.h" +#include "src/mca/ess/ess.h" +#include "src/rml/rml.h" +#include "src/mca/state/state.h" +#include "src/runtime/prte_globals.h" +#include "src/runtime/prte_wait.h" +#include "src/threads/pmix_threads.h" +#include "src/util/attr.h" +#include "src/util/name_fns.h" +#include "src/util/pmix_parse_options.h" +#include "src/util/pmix_show_help.h" + +#include "src/rml/oob/oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_common.h" +#include "src/rml/oob/oob_tcp_connection.h" +#include "src/rml/oob/oob_tcp_listener.h" +#include "src/rml/oob/oob_tcp_peer.h" + +void prte_mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata) +{ + prte_oob_tcp_peer_op_t *pop = (prte_oob_tcp_peer_op_t *) cbdata; + PRTE_HIDE_UNUSED_PARAMS(fd, args); + + PMIX_ACQUIRE_OBJECT(pop); + + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, + "%s tcp:lost connection called for peer %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&pop->peer)); + + if (!prte_finalizing) { + /* activate the proc state */ + if (PRTE_SUCCESS != prte_rml_route_lost(pop->peer.rank)) { + PRTE_ACTIVATE_PROC_STATE(&pop->peer, PRTE_PROC_STATE_LIFELINE_LOST); + } else { + PRTE_ACTIVATE_PROC_STATE(&pop->peer, PRTE_PROC_STATE_COMM_FAILED); + } + } + PMIX_RELEASE(pop); +} + +void prte_mca_oob_tcp_component_no_route(int fd, short args, void *cbdata) +{ + prte_oob_tcp_msg_error_t *mop = (prte_oob_tcp_msg_error_t *) cbdata; + PRTE_HIDE_UNUSED_PARAMS(fd, args); + + PMIX_ACQUIRE_OBJECT(mop); + + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, + "%s tcp:no route called for peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + PRTE_NAME_PRINT(&mop->hop)); + + if (prte_prteds_term_ordered || prte_finalizing || prte_abnormal_term_ordered) { + /* just ignore the problem */ + PMIX_RELEASE(mop); + return; + } + + /* report the error */ + PRTE_ACTIVATE_PROC_STATE(&mop->hop, PRTE_PROC_STATE_UNABLE_TO_SEND_MSG); + + PMIX_RELEASE(mop); +} + +void prte_mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata) +{ + prte_oob_tcp_msg_error_t *mop = (prte_oob_tcp_msg_error_t *) cbdata; + PRTE_HIDE_UNUSED_PARAMS(fd, args); + + PMIX_ACQUIRE_OBJECT(mop); + + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, + "%s tcp:unknown hop called for peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), + PRTE_NAME_PRINT(&mop->hop)); + + if (prte_prteds_term_ordered || prte_finalizing || prte_abnormal_term_ordered) { + /* just ignore the problem */ + PMIX_RELEASE(mop); + return; + } + + /* post the error */ + PRTE_ACTIVATE_PROC_STATE(&mop->hop, PRTE_PROC_STATE_UNABLE_TO_SEND_MSG); + + PMIX_RELEASE(mop); +} + +void prte_mca_oob_tcp_component_failed_to_connect(int fd, short args, void *cbdata) +{ + prte_oob_tcp_peer_op_t *pop = (prte_oob_tcp_peer_op_t *) cbdata; + PRTE_HIDE_UNUSED_PARAMS(fd, args); + + PMIX_ACQUIRE_OBJECT(pop); + + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, + "%s tcp:failed_to_connect called for peer %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&pop->peer)); + + /* if we are terminating, then don't attempt to reconnect */ + if (prte_prteds_term_ordered || prte_finalizing || prte_abnormal_term_ordered) { + PMIX_RELEASE(pop); + return; + } + + /* activate the proc state */ + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, + "%s tcp:failed_to_connect unable to reach peer %s", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&pop->peer)); + + PRTE_ACTIVATE_PROC_STATE(&pop->peer, PRTE_PROC_STATE_FAILED_TO_CONNECT); + PMIX_RELEASE(pop); +} + + +/* OOB TCP Class instances */ + +static void peer_cons(prte_oob_tcp_peer_t *peer) +{ + peer->auth_method = NULL; + peer->sd = -1; + PMIX_CONSTRUCT(&peer->addrs, pmix_list_t); + peer->active_addr = NULL; + peer->state = MCA_OOB_TCP_UNCONNECTED; + peer->num_retries = 0; + PMIX_CONSTRUCT(&peer->send_queue, pmix_list_t); + peer->send_msg = NULL; + peer->recv_msg = NULL; + peer->send_ev_active = false; + peer->recv_ev_active = false; + peer->timer_ev_active = false; +} +static void peer_des(prte_oob_tcp_peer_t *peer) +{ + if (NULL != peer->auth_method) { + free(peer->auth_method); + } + if (peer->send_ev_active) { + prte_event_del(&peer->send_event); + } + if (peer->recv_ev_active) { + prte_event_del(&peer->recv_event); + } + if (peer->timer_ev_active) { + prte_event_del(&peer->timer_event); + } + if (0 <= peer->sd) { + pmix_output_verbose(2, prte_oob_base.output, + "%s CLOSING SOCKET %d", + PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), peer->sd); + CLOSE_THE_SOCKET(peer->sd); + } + PMIX_LIST_DESTRUCT(&peer->addrs); + PMIX_LIST_DESTRUCT(&peer->send_queue); +} +PMIX_CLASS_INSTANCE(prte_oob_tcp_peer_t, pmix_list_item_t, peer_cons, peer_des); + +static void padd_cons(prte_oob_tcp_addr_t *ptr) +{ + memset(&ptr->addr, 0, sizeof(ptr->addr)); + ptr->retries = 0; + ptr->state = MCA_OOB_TCP_UNCONNECTED; +} +PMIX_CLASS_INSTANCE(prte_oob_tcp_addr_t, pmix_list_item_t, padd_cons, NULL); + +static void pop_cons(prte_oob_tcp_peer_op_t *pop) +{ + pop->net = NULL; + pop->port = NULL; +} +static void pop_des(prte_oob_tcp_peer_op_t *pop) +{ + if (NULL != pop->net) { + free(pop->net); + } + if (NULL != pop->port) { + free(pop->port); + } +} +PMIX_CLASS_INSTANCE(prte_oob_tcp_peer_op_t, pmix_object_t, pop_cons, pop_des); + +PMIX_CLASS_INSTANCE(prte_oob_tcp_msg_op_t, pmix_object_t, NULL, NULL); + +PMIX_CLASS_INSTANCE(prte_oob_tcp_conn_op_t, pmix_object_t, NULL, NULL); + +static void nicaddr_cons(prte_oob_tcp_nicaddr_t *ptr) +{ + ptr->af_family = PF_UNSPEC; + memset(&ptr->addr, 0, sizeof(ptr->addr)); +} +PMIX_CLASS_INSTANCE(prte_oob_tcp_nicaddr_t, pmix_list_item_t, nicaddr_cons, NULL); diff --git a/src/mca/oob/tcp/oob_tcp_connection.c b/src/rml/oob/oob_tcp_connection.c similarity index 92% rename from src/mca/oob/tcp/oob_tcp_connection.c rename to src/rml/oob/oob_tcp_connection.c index d77bf2de2e..4218f26431 100644 --- a/src/mca/oob/tcp/oob_tcp_connection.c +++ b/src/rml/oob/oob_tcp_connection.c @@ -19,7 +19,7 @@ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights * reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -76,14 +76,10 @@ #include "src/util/name_fns.h" #include "src/util/pmix_show_help.h" -#include "oob_tcp.h" -#include "oob_tcp_common.h" -#include "oob_tcp_connection.h" -#include "oob_tcp_peer.h" -#include "src/mca/oob/tcp/oob_tcp_common.h" -#include "src/mca/oob/tcp/oob_tcp_component.h" -#include "src/mca/oob/tcp/oob_tcp_connection.h" -#include "src/mca/oob/tcp/oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_common.h" +#include "src/rml/oob/oob_tcp_connection.h" +#include "src/rml/oob/oob_tcp_peer.h" static void tcp_peer_event_init(prte_oob_tcp_peer_t *peer); static int tcp_peer_send_connect_ack(prte_oob_tcp_peer_t *peer); @@ -100,7 +96,7 @@ static int tcp_peer_create_socket(prte_oob_tcp_peer_t *peer, sa_family_t family) return PRTE_SUCCESS; } - PMIX_OUTPUT_VERBOSE((1, prte_oob_base_framework.framework_output, + PMIX_OUTPUT_VERBOSE((1, prte_oob_base.output, "%s oob:tcp:peer creating socket to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)))); peer->sd = socket(family, SOCK_STREAM, 0); @@ -149,7 +145,7 @@ static int tcp_peer_create_socket(prte_oob_tcp_peer_t *peer, sa_family_t family) */ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) { - pmix_list_t *local_list = &prte_mca_oob_tcp_component.local_ifs, *remote_list; + pmix_list_t *local_list = &prte_oob_base.local_ifs, *remote_list; int rc, i, j, local_if_count, remote_if_count, best, best_i = 0, best_j = 0; prte_oob_tcp_conn_op_t *op = (prte_oob_tcp_conn_op_t *) cbdata; prte_reachable_t *results = NULL; @@ -198,12 +194,12 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) results = prte_reachable.reachable(local_list, remote_list); /* Find match, bind socket. If connect attempt failed, move to next */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_tcp_peer_try_connect: " "attempting to connect to proc %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name))); - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_tcp_peer_try_connect: " "attempting to connect to proc %s on socket %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), @@ -246,27 +242,27 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) peer->active_addr = (prte_oob_tcp_addr_t *) ptr; addr = peer->active_addr; /* Grab the local address we are using to bind the socket with */ - ptr = prte_mca_oob_tcp_component.local_ifs.pmix_list_sentinel.pmix_list_next; + ptr = prte_oob_base.local_ifs.pmix_list_sentinel.pmix_list_next; for (i = 0; i < best_i; i++) { ptr = ptr->pmix_list_next; } intf = (pmix_pif_t *) ptr; - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_tcp_peer_try_connect: " "attempting to connect to proc %s on %s:%d - %d retries", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), pmix_net_get_hostname((struct sockaddr *) &addr->addr), pmix_net_get_port((struct sockaddr *) &addr->addr), addr->retries); if (MCA_OOB_TCP_FAILED == addr->state) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_tcp_peer_try_connect: %s:%d is down", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), pmix_net_get_hostname((struct sockaddr *) &addr->addr), pmix_net_get_port((struct sockaddr *) &addr->addr)); continue; } - if (prte_mca_oob_tcp_component.max_retries < addr->retries) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + if (prte_oob_base.max_retries < addr->retries) { + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_tcp_peer_try_connect: %s:%d retries exceeded", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), pmix_net_get_hostname((struct sockaddr *) &addr->addr), @@ -333,7 +329,7 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) /* non-blocking so wait for completion */ if (prte_socket_errno == EINPROGRESS || prte_socket_errno == EWOULDBLOCK) { pmix_output_verbose( - OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s waiting for connect completion to %s - activating send event", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); /* just ensure the send_event is active */ @@ -352,9 +348,9 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) * way by trying twice before giving up */ if (ECONNABORTED == prte_socket_errno) { - if (addr->retries < prte_mca_oob_tcp_component.max_retries) { + if (addr->retries < prte_oob_base.max_retries) { pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, - prte_oob_base_framework.framework_output, + prte_oob_base.output, "%s connection aborted by OS to %s - retrying", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); @@ -381,9 +377,9 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) /* it could be that the intended recipient just hasn't * started yet. if requested, wait awhile and try again * unless/until we hit the maximum number of retries */ - if (0 < prte_mca_oob_tcp_component.retry_delay) { - if (prte_mca_oob_tcp_component.max_recon_attempts < 0 - || peer->num_retries < prte_mca_oob_tcp_component.max_recon_attempts) { + if (0 < prte_oob_base.retry_delay) { + if (prte_oob_base.max_recon_attempts < 0 + || peer->num_retries < prte_oob_base.max_recon_attempts) { struct timeval tv; /* close the current socket */ CLOSE_THE_SOCKET(peer->sd); @@ -394,7 +390,7 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) addr->retries = 0; } /* give it awhile and try again */ - tv.tv_sec = prte_mca_oob_tcp_component.retry_delay; + tv.tv_sec = prte_oob_base.retry_delay; tv.tv_usec = 0; ++peer->num_retries; PRTE_RETRY_TCP_CONN_STATE(peer, prte_oob_tcp_peer_try_connect, &tv); @@ -439,7 +435,7 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) goto cleanup; } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_tcp_peer_try_connect: " "Connection to proc %s succeeded", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); @@ -502,7 +498,7 @@ static int tcp_peer_send_connect_ack(prte_oob_tcp_peer_t *peer) uint16_t ack_flag = htons(1); size_t sdsize, offset = 0; - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s SEND CONNECT ACK", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); /* load the header */ @@ -557,7 +553,7 @@ static int tcp_peer_send_connect_nack(int sd, pmix_proc_t *name) int rc = PRTE_SUCCESS; size_t sdsize, offset = 0; - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s SEND CONNECT NACK", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); /* load the header */ @@ -630,7 +626,7 @@ void prte_oob_tcp_peer_complete_connect(prte_oob_tcp_peer_t *peer) int so_error = 0; prte_socklen_t so_length = sizeof(so_error); - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:complete_connect called for peer %s on socket %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name), peer->sd); @@ -645,12 +641,12 @@ void prte_oob_tcp_peer_complete_connect(prte_oob_tcp_peer_t *peer) } if (so_error == EINPROGRESS) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:send:handler still in progress", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); return; } else if (so_error == ECONNREFUSED || so_error == ETIMEDOUT) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s-%s tcp_peer_complete_connect: connection failed: %s (%d)", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), strerror(so_error), so_error); @@ -660,7 +656,7 @@ void prte_oob_tcp_peer_complete_connect(prte_oob_tcp_peer_t *peer) /* No need to worry about the return code here - we return regardless at this point, and if an error did occur a message has already been printed for the user */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s-%s tcp_peer_complete_connect: " "connection failed with error %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), @@ -669,14 +665,14 @@ void prte_oob_tcp_peer_complete_connect(prte_oob_tcp_peer_t *peer) return; } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp_peer_complete_connect: " "sending ack to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name))); if (tcp_peer_send_connect_ack(peer) == PRTE_SUCCESS) { peer->state = MCA_OOB_TCP_CONNECT_ACK; - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp_peer_complete_connect: " "setting read event on connection to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name))); @@ -706,7 +702,7 @@ static int tcp_peer_send_blocking(int sd, void *data, size_t size) PMIX_ACQUIRE_OBJECT(ptr); - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s send blocking of %" PRIsize_t " bytes to socket %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), size, sd); @@ -725,7 +721,7 @@ static int tcp_peer_send_blocking(int sd, void *data, size_t size) cnt += retval; } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s blocking send complete to socket %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), sd); @@ -741,7 +737,7 @@ static bool retry(prte_oob_tcp_peer_t *peer, int sd, bool fatal) { int cmpval; - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s SIMUL CONNECTION WITH %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); cmpval = prte_util_compare_name_fields(PRTE_NS_CMP_ALL, &peer->name, PRTE_PROC_MY_NAME); @@ -800,7 +796,7 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob uint16_t ack_flag; bool is_new = (NULL == pr); - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s RECV CONNECT ACK FROM %s ON SOCKET %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (NULL == pr) ? "UNKNOWN" : PRTE_NAME_PRINT(&pr->name), sd); @@ -823,14 +819,14 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob } } else { /* unable to complete the recv */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s unable to complete recv of connect-ack from %s ON SOCKET %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (NULL == peer) ? "UNKNOWN" : PRTE_NAME_PRINT(&peer->name), sd); return PRTE_ERR_UNREACH; } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s connect-ack recvd from %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (NULL == peer) ? "UNKNOWN" : PRTE_NAME_PRINT(&peer->name)); @@ -867,13 +863,13 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob if (NULL == peer) { peer = prte_oob_tcp_peer_lookup(&hdr.origin); if (NULL == peer) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_oob_tcp_recv_connect: connection from new peer", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); peer = PMIX_NEW(prte_oob_tcp_peer_t); PMIX_XFER_PROCID(&peer->name, &hdr.origin); peer->state = MCA_OOB_TCP_ACCEPTING; - pmix_list_append(&prte_mca_oob_tcp_component.peers, &peer->super); + pmix_list_append(&prte_oob_base.peers, &peer->super); } } else { /* compare the peers name to the expected value */ @@ -889,7 +885,7 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob } } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s connect-ack header from %s is okay", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); @@ -901,7 +897,7 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob } if (!tcp_peer_recv_blocking(peer, sd, msg, hdr.nbytes)) { /* unable to complete the recv but should never happen */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s unable to complete recv of connect-ack from %s ON SOCKET %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name), peer->sd); @@ -986,7 +982,7 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob } free(msg); - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s connect-ack version from %s matches ours", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); @@ -997,15 +993,10 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob return PRTE_SUCCESS; } - /* set the peer into the component and OOB-level peer tables to indicate - * that we know this peer and we will be handling him - */ - PRTE_ACTIVATE_TCP_CMP_OP(peer, prte_mca_oob_tcp_component_set_module); - /* connected */ tcp_peer_connected(peer); if (OOB_TCP_DEBUG_CONNECT - <= pmix_output_get_verbosity(prte_oob_base_framework.framework_output)) { + <= pmix_output_get_verbosity(prte_oob_base.output)) { prte_oob_tcp_peer_dump(peer, "connected"); } return PRTE_SUCCESS; @@ -1017,7 +1008,7 @@ int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *pr, int sd, prte_oob */ static void tcp_peer_connected(prte_oob_tcp_peer_t *peer) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s-%s tcp_peer_connected on socket %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), peer->sd); @@ -1048,7 +1039,7 @@ static void tcp_peer_connected(prte_oob_tcp_peer_t *peer) */ void prte_oob_tcp_peer_close(prte_oob_tcp_peer_t *peer) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp_peer_close for %s sd %d state %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), peer->sd, prte_oob_tcp_state_print(peer->state)); @@ -1115,7 +1106,7 @@ static bool tcp_peer_recv_blocking(prte_oob_tcp_peer_t *peer, int sd, void *data unsigned char *ptr = (unsigned char *) data; size_t cnt = 0; - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s waiting for connect ack from %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (NULL == peer) ? "UNKNOWN" : PRTE_NAME_PRINT(&(peer->name))); @@ -1124,7 +1115,7 @@ static bool tcp_peer_recv_blocking(prte_oob_tcp_peer_t *peer, int sd, void *data /* remote closed connection */ if (retval == 0) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s-%s tcp_peer_recv_blocking: " "peer closed connection: peer state %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), @@ -1162,7 +1153,7 @@ static bool tcp_peer_recv_blocking(prte_oob_tcp_peer_t *peer, int sd, void *data recv_connect_ack, who will try to establish the connection again */ pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, - prte_oob_base_framework.framework_output, + prte_oob_base.output, "%s connect ack received error %s from %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), strerror(prte_socket_errno), @@ -1184,7 +1175,7 @@ static bool tcp_peer_recv_blocking(prte_oob_tcp_peer_t *peer, int sd, void *data cnt += retval; } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s connect ack received from %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (NULL == peer) ? "UNKNOWN" : PRTE_NAME_PRINT(&(peer->name))); return true; @@ -1263,7 +1254,7 @@ void prte_oob_tcp_peer_dump(prte_oob_tcp_peer_t *peer, const char *msg) bool prte_oob_tcp_peer_accept(prte_oob_tcp_peer_t *peer) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp:peer_accept called for peer %s in state %s on socket %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name), prte_oob_tcp_state_print(peer->state), peer->sd); @@ -1282,11 +1273,6 @@ bool prte_oob_tcp_peer_accept(prte_oob_tcp_peer_t *peer) return false; } - /* set the peer into the component and OOB-level peer tables to indicate - * that we know this peer and we will be handling him - */ - PRTE_ACTIVATE_TCP_CMP_OP(peer, prte_mca_oob_tcp_component_set_module); - tcp_peer_connected(peer); if (!peer->recv_ev_active) { peer->recv_ev_active = true; @@ -1294,13 +1280,13 @@ bool prte_oob_tcp_peer_accept(prte_oob_tcp_peer_t *peer) prte_event_add(&peer->recv_event, 0); } if (OOB_TCP_DEBUG_CONNECT - <= pmix_output_get_verbosity(prte_oob_base_framework.framework_output)) { + <= pmix_output_get_verbosity(prte_oob_base.output)) { prte_oob_tcp_peer_dump(peer, "accepted"); } return true; } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp:peer_accept ignored for peer %s in state %s on socket %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name), prte_oob_tcp_state_print(peer->state), peer->sd); diff --git a/src/mca/oob/tcp/oob_tcp_connection.h b/src/rml/oob/oob_tcp_connection.h similarity index 83% rename from src/mca/oob/tcp/oob_tcp_connection.h rename to src/rml/oob/oob_tcp_connection.h index db2272959c..731cfec6d1 100644 --- a/src/mca/oob/tcp/oob_tcp_connection.h +++ b/src/rml/oob/oob_tcp_connection.h @@ -15,7 +15,7 @@ * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,8 +35,8 @@ # include #endif -#include "oob_tcp.h" -#include "oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_peer.h" #include "src/threads/pmix_threads.h" /* State machine for connection operations */ @@ -56,7 +56,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_conn_op_t); #define PRTE_ACTIVATE_TCP_CONN_STATE(p, cbfunc) \ do { \ prte_oob_tcp_conn_op_t *cop; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base.output, \ "%s:[%s:%d] connect to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT((&(p)->name))); \ cop = PMIX_NEW(prte_oob_tcp_conn_op_t); \ @@ -76,7 +76,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_conn_op_t); #define PRTE_RETRY_TCP_CONN_STATE(p, cbfunc, tv) \ do { \ prte_oob_tcp_conn_op_t *cop; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base.output, \ "%s:[%s:%d] retry connect to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT((&(p)->name))); \ cop = PMIX_NEW(prte_oob_tcp_conn_op_t); \ @@ -86,12 +86,12 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_conn_op_t); prte_event_evtimer_add(&cop->ev, (tv)); \ } while (0); -PRTE_MODULE_EXPORT void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata); -PRTE_MODULE_EXPORT void prte_oob_tcp_peer_dump(prte_oob_tcp_peer_t *peer, const char *msg); -PRTE_MODULE_EXPORT bool prte_oob_tcp_peer_accept(prte_oob_tcp_peer_t *peer); -PRTE_MODULE_EXPORT void prte_oob_tcp_peer_complete_connect(prte_oob_tcp_peer_t *peer); -PRTE_MODULE_EXPORT int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *peer, int sd, +PRTE_EXPORT void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata); +PRTE_EXPORT void prte_oob_tcp_peer_dump(prte_oob_tcp_peer_t *peer, const char *msg); +PRTE_EXPORT bool prte_oob_tcp_peer_accept(prte_oob_tcp_peer_t *peer); +PRTE_EXPORT void prte_oob_tcp_peer_complete_connect(prte_oob_tcp_peer_t *peer); +PRTE_EXPORT int prte_oob_tcp_peer_recv_connect_ack(prte_oob_tcp_peer_t *peer, int sd, prte_oob_tcp_hdr_t *dhdr); -PRTE_MODULE_EXPORT void prte_oob_tcp_peer_close(prte_oob_tcp_peer_t *peer); +PRTE_EXPORT void prte_oob_tcp_peer_close(prte_oob_tcp_peer_t *peer); #endif /* _MCA_OOB_TCP_CONNECTION_H_ */ diff --git a/src/mca/oob/tcp/oob_tcp_hdr.h b/src/rml/oob/oob_tcp_hdr.h similarity index 97% rename from src/mca/oob/tcp/oob_tcp_hdr.h rename to src/rml/oob/oob_tcp_hdr.h index e014ccdd37..05977156be 100644 --- a/src/mca/oob/tcp/oob_tcp_hdr.h +++ b/src/rml/oob/oob_tcp_hdr.h @@ -16,7 +16,7 @@ * Copyright (c) 2017-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/src/mca/oob/tcp/oob_tcp_listener.c b/src/rml/oob/oob_tcp_listener.c similarity index 81% rename from src/mca/oob/tcp/oob_tcp_listener.c rename to src/rml/oob/oob_tcp_listener.c index 90cf611878..0c11764bae 100644 --- a/src/mca/oob/tcp/oob_tcp_listener.c +++ b/src/rml/oob/oob_tcp_listener.c @@ -68,12 +68,11 @@ #include "src/util/pmix_parse_options.h" #include "src/util/pmix_show_help.h" -#include "src/mca/oob/tcp/oob_tcp.h" -#include "src/mca/oob/tcp/oob_tcp_common.h" -#include "src/mca/oob/tcp/oob_tcp_component.h" -#include "src/mca/oob/tcp/oob_tcp_connection.h" -#include "src/mca/oob/tcp/oob_tcp_listener.h" -#include "src/mca/oob/tcp/oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_common.h" +#include "src/rml/oob/oob_tcp_connection.h" +#include "src/rml/oob/oob_tcp_listener.h" +#include "src/rml/oob/oob_tcp_peer.h" static void connection_event_handler(int incoming_sd, short flags, void *cbdata); static void *listen_thread(pmix_object_t *obj); @@ -101,9 +100,9 @@ int prte_oob_tcp_start_listening(void) prte_oob_tcp_listener_t *listener; /* if we don't have any TCP interfaces, we shouldn't be here */ - if (NULL == prte_mca_oob_tcp_component.ipv4conns + if (NULL == prte_oob_base.ipv4conns #if PRTE_ENABLE_IPV6 - && NULL == prte_mca_oob_tcp_component.ipv6conns + && NULL == prte_oob_base.ipv6conns #endif ) { PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); @@ -128,25 +127,25 @@ int prte_oob_tcp_start_listening(void) * harvest connection requests as rapidly as possible */ if (PRTE_PROC_IS_MASTER) { - if (0 > pipe(prte_mca_oob_tcp_component.stop_thread)) { + if (0 > pipe(prte_oob_base.stop_thread)) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return PRTE_ERR_OUT_OF_RESOURCE; } /* Make sure the pipe FDs are set to close-on-exec so that they don't leak into children */ - if (pmix_fd_set_cloexec(prte_mca_oob_tcp_component.stop_thread[0]) != PRTE_SUCCESS - || pmix_fd_set_cloexec(prte_mca_oob_tcp_component.stop_thread[1]) != PRTE_SUCCESS) { - close(prte_mca_oob_tcp_component.stop_thread[0]); - close(prte_mca_oob_tcp_component.stop_thread[1]); + if (pmix_fd_set_cloexec(prte_oob_base.stop_thread[0]) != PRTE_SUCCESS + || pmix_fd_set_cloexec(prte_oob_base.stop_thread[1]) != PRTE_SUCCESS) { + close(prte_oob_base.stop_thread[0]); + close(prte_oob_base.stop_thread[1]); PRTE_ERROR_LOG(PRTE_ERR_IN_ERRNO); return PRTE_ERR_IN_ERRNO; } - prte_mca_oob_tcp_component.listen_thread_active = true; - prte_mca_oob_tcp_component.listen_thread.t_run = listen_thread; - prte_mca_oob_tcp_component.listen_thread.t_arg = NULL; - if (PRTE_SUCCESS != (rc = pmix_thread_start(&prte_mca_oob_tcp_component.listen_thread))) { + prte_oob_base.listen_thread_active = true; + prte_oob_base.listen_thread.t_run = listen_thread; + prte_oob_base.listen_thread.t_arg = NULL; + if (PRTE_SUCCESS != (rc = pmix_thread_start(&prte_oob_base.listen_thread))) { PRTE_ERROR_LOG(rc); pmix_output(0, "%s Unable to start listen thread", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); } @@ -155,7 +154,7 @@ int prte_oob_tcp_start_listening(void) /* otherwise, setup to listen via the event lib */ - PMIX_LIST_FOREACH(listener, &prte_mca_oob_tcp_component.listeners, prte_oob_tcp_listener_t) + PMIX_LIST_FOREACH(listener, &prte_oob_base.listeners, prte_oob_tcp_listener_t) { listener->ev_active = true; prte_event_set(prte_event_base, &listener->event, listener->sd, @@ -191,16 +190,16 @@ static int create_listen(void) * port in the range. Otherwise, tcp_port_min will be 0, which * means "pick any port" */ - if (NULL != prte_mca_oob_tcp_component.tcp_static_ports) { + if (NULL != prte_oob_base.tcp_static_ports) { /* if static ports were provided, take the * first entry in the list */ - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ports, prte_mca_oob_tcp_component.tcp_static_ports[0]); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ports, prte_oob_base.tcp_static_ports[0]); /* flag that we are using static ports */ prte_static_ports = true; - } else if (NULL != prte_mca_oob_tcp_component.tcp_dyn_ports) { + } else if (NULL != prte_oob_base.tcp_dyn_ports) { /* take the entire range */ - ports = PMIX_ARGV_COPY_COMPAT(prte_mca_oob_tcp_component.tcp_dyn_ports); + ports = PMIX_ARGV_COPY_COMPAT(prte_oob_base.tcp_dyn_ports); prte_static_ports = false; } else { /* flag the system to dynamically take any available port */ @@ -225,7 +224,7 @@ static int create_listen(void) * sockets to support more flexible wireup protocols */ for (i = 0; i < PMIX_ARGV_COUNT_COMPAT(ports); i++) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "%s attempting to bind to IPv4 port %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), ports[i]); /* get the port number */ @@ -239,7 +238,7 @@ static int create_listen(void) sd = socket(AF_INET, SOCK_STREAM, 0); if (sd < 0) { if (EAFNOSUPPORT != prte_socket_errno) { - pmix_output(0, "prte_mca_oob_tcp_component_init: socket() failed: %s (%d)", + pmix_output(0, "prte_oob_create_listen: socket() failed: %s (%d)", strerror(prte_socket_errno), prte_socket_errno); } PMIX_ARGV_FREE_COMPAT(ports); @@ -254,7 +253,7 @@ static int create_listen(void) } if (setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, (const char *) &flags, sizeof(flags)) < 0) { pmix_output(0, - "prte_oob_tcp_create_listen: unable to set the " + "prte_oob_create_listen: unable to set the " "SO_REUSEADDR option (%s:%d)\n", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); @@ -266,7 +265,7 @@ static int create_listen(void) this FD */ if (pmix_fd_set_cloexec(sd) != PRTE_SUCCESS) { pmix_output(0, - "prte_oob_tcp_create_listen: unable to set the " + "prte_oob_create_listen: unable to set the " "listening socket to CLOEXEC (%s:%d)\n", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); @@ -287,7 +286,7 @@ static int create_listen(void) } /* resolve assigned port */ if (getsockname(sd, (struct sockaddr *) &inaddr, &addrlen) < 0) { - pmix_output(0, "prte_oob_tcp_create_listen: getsockname(): %s (%d)", + pmix_output(0, "prte_oob_create_listen: getsockname(): %s (%d)", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); PMIX_ARGV_FREE_COMPAT(ports); @@ -296,7 +295,7 @@ static int create_listen(void) /* setup listen backlog to maximum allowed by kernel */ if (listen(sd, SOMAXCONN) < 0) { - pmix_output(0, "prte_mca_oob_tcp_component_init: listen(): %s (%d)", + pmix_output(0, "prte_oob_create_listen: listen(): %s (%d)", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); PMIX_ARGV_FREE_COMPAT(ports); @@ -305,7 +304,7 @@ static int create_listen(void) /* set socket up to be non-blocking, otherwise accept could block */ if ((flags = fcntl(sd, F_GETFL, 0)) < 0) { - pmix_output(0, "prte_mca_oob_tcp_component_init: fcntl(F_GETFL) failed: %s (%d)", + pmix_output(0, "prte_oob_create_listen init: fcntl(F_GETFL) failed: %s (%d)", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); PMIX_ARGV_FREE_COMPAT(ports); @@ -313,7 +312,7 @@ static int create_listen(void) } flags |= O_NONBLOCK; if (fcntl(sd, F_SETFL, flags) < 0) { - pmix_output(0, "prte_mca_oob_tcp_component_init: fcntl(F_SETFL) failed: %s (%d)", + pmix_output(0, "prte_oob_create_listen init: fcntl(F_SETFL) failed: %s (%d)", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); PMIX_ARGV_FREE_COMPAT(ports); @@ -328,13 +327,13 @@ static int create_listen(void) /* save the first one */ prte_process_info.my_port = conn->port; } - pmix_list_append(&prte_mca_oob_tcp_component.listeners, &conn->item); + pmix_list_append(&prte_oob_base.listeners, &conn->item); /* and to our ports */ pmix_asprintf(&tconn, "%d", ntohs(((struct sockaddr_in *) &inaddr)->sin_port)); - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_mca_oob_tcp_component.ipv4ports, tconn); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_oob_base.ipv4ports, tconn); free(tconn); if (OOB_TCP_DEBUG_CONNECT - <= pmix_output_get_verbosity(prte_oob_base_framework.framework_output)) { + <= pmix_output_get_verbosity(prte_oob_base.output)) { port = ntohs(((struct sockaddr_in *) &inaddr)->sin_port); pmix_output(0, "%s assigned IPv4 port %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), port); } @@ -347,7 +346,7 @@ static int create_listen(void) /* done with this, so release it */ PMIX_ARGV_FREE_COMPAT(ports); - if (0 == pmix_list_get_size(&prte_mca_oob_tcp_component.listeners)) { + if (0 == pmix_list_get_size(&prte_oob_base.listeners)) { /* cleanup */ if (0 <= sd) { CLOSE_THE_SOCKET(sd); @@ -384,16 +383,16 @@ static int create_listen6(void) * means "pick any port" */ if (PRTE_PROC_IS_DAEMON) { - if (NULL != prte_mca_oob_tcp_component.tcp6_static_ports) { + if (NULL != prte_oob_base.tcp6_static_ports) { /* if static ports were provided, take the * first entry in the list */ - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ports, prte_mca_oob_tcp_component.tcp6_static_ports[0]); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ports, prte_oob_base.tcp6_static_ports[0]); /* flag that we are using static ports */ prte_static_ports = true; - } else if (NULL != prte_mca_oob_tcp_component.tcp6_dyn_ports) { + } else if (NULL != prte_oob_base.tcp6_dyn_ports) { /* take the entire range */ - ports = PMIX_ARGV_COPY_COMPAT(prte_mca_oob_tcp_component.tcp6_dyn_ports); + ports = PMIX_ARGV_COPY_COMPAT(prte_oob_base.tcp6_dyn_ports); prte_static_ports = false; } else { /* flag the system to dynamically take any available port */ @@ -401,16 +400,16 @@ static int create_listen6(void) prte_static_ports = false; } } else { - if (NULL != prte_mca_oob_tcp_component.tcp6_static_ports) { + if (NULL != prte_oob_base.tcp6_static_ports) { /* if static ports were provided, take the * first entry in the list */ - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ports, prte_mca_oob_tcp_component.tcp6_static_ports[0]); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ports, prte_oob_base.tcp6_static_ports[0]); /* flag that we are using static ports */ prte_static_ports = true; - } else if (NULL != prte_mca_oob_tcp_component.tcp6_dyn_ports) { + } else if (NULL != prte_oob_base.tcp6_dyn_ports) { /* take the entire range */ - ports = PMIX_ARGV_COPY_COMPAT(prte_mca_oob_tcp_component.tcp6_dyn_ports); + ports = PMIX_ARGV_COPY_COMPAT(prte_oob_base.tcp6_dyn_ports); prte_static_ports = false; } else { /* flag the system to dynamically take any available port */ @@ -436,7 +435,7 @@ static int create_listen6(void) * sockets to support more flexible wireup protocols */ for (i = 0; i < PMIX_ARGV_COUNT_COMPAT(ports); i++) { - pmix_output_verbose(5, prte_oob_base_framework.framework_output, + pmix_output_verbose(5, prte_oob_base.output, "%s attempting to bind to IPv6 port %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), ports[i]); /* get the port number */ @@ -450,7 +449,7 @@ static int create_listen6(void) sd = socket(AF_INET6, SOCK_STREAM, 0); if (sd < 0) { if (EAFNOSUPPORT != prte_socket_errno) { - pmix_output(0, "prte_mca_oob_tcp_component_init: socket() failed: %s (%d)", + pmix_output(0, "prte_oob_create_listen6: socket() failed: %s (%d)", strerror(prte_socket_errno), prte_socket_errno); } return PRTE_ERR_IN_ERRNO; @@ -459,7 +458,7 @@ static int create_listen6(void) this FD */ if (pmix_fd_set_cloexec(sd) != PRTE_SUCCESS) { pmix_output(0, - "prte_oob_tcp_create_listen6: unable to set the " + "prte_oob_create_listen6: unable to set the " "listening socket to CLOEXEC (%s:%d)\n", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); @@ -496,7 +495,7 @@ static int create_listen6(void) } /* resolve assigned port */ if (getsockname(sd, (struct sockaddr *) &inaddr, &addrlen) < 0) { - pmix_output(0, "prte_oob_tcp_create_listen: getsockname(): %s (%d)", + pmix_output(0, "prte_oob_create_listen6: getsockname(): %s (%d)", strerror(prte_socket_errno), prte_socket_errno); CLOSE_THE_SOCKET(sd); return PRTE_ERROR; @@ -504,20 +503,20 @@ static int create_listen6(void) /* setup listen backlog to maximum allowed by kernel */ if (listen(sd, SOMAXCONN) < 0) { - pmix_output(0, "prte_mca_oob_tcp_component_init: listen(): %s (%d)", + pmix_output(0, "prte_oob_create_listen6: listen(): %s (%d)", strerror(prte_socket_errno), prte_socket_errno); return PRTE_ERROR; } /* set socket up to be non-blocking, otherwise accept could block */ if ((flags = fcntl(sd, F_GETFL, 0)) < 0) { - pmix_output(0, "prte_mca_oob_tcp_component_init: fcntl(F_GETFL) failed: %s (%d)", + pmix_output(0, "prte_oob_create_listen6: fcntl(F_GETFL) failed: %s (%d)", strerror(prte_socket_errno), prte_socket_errno); return PRTE_ERROR; } flags |= O_NONBLOCK; if (fcntl(sd, F_SETFL, flags) < 0) { - pmix_output(0, "prte_mca_oob_tcp_component_init: fcntl(F_SETFL) failed: %s (%d)", + pmix_output(0, "prte_oob_create_listen6: fcntl(F_SETFL) failed: %s (%d)", strerror(prte_socket_errno), prte_socket_errno); return PRTE_ERROR; } @@ -527,13 +526,13 @@ static int create_listen6(void) conn->tcp6 = true; conn->sd = sd; conn->port = ntohs(((struct sockaddr_in6 *) &inaddr)->sin6_port); - pmix_list_append(&prte_mca_oob_tcp_component.listeners, &conn->item); + pmix_list_append(&prte_oob_base.listeners, &conn->item); /* and to our ports */ pmix_asprintf(&tconn, "%d", ntohs(((struct sockaddr_in6 *) &inaddr)->sin6_port)); - PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_mca_oob_tcp_component.ipv6ports, tconn); + PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_oob_base.ipv6ports, tconn); free(tconn); if (OOB_TCP_DEBUG_CONNECT - <= pmix_output_get_verbosity(prte_oob_base_framework.framework_output)) { + <= pmix_output_get_verbosity(prte_oob_base.output)) { pmix_output(0, "%s assigned IPv6 port %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (int) ntohs(((struct sockaddr_in6 *) &inaddr)->sin6_port)); } @@ -543,7 +542,7 @@ static int create_listen6(void) break; } } - if (0 == pmix_list_get_size(&prte_mca_oob_tcp_component.listeners)) { + if (0 == pmix_list_get_size(&prte_oob_base.listeners)) { /* cleanup */ CLOSE_THE_SOCKET(sd); PMIX_ARGV_FREE_COMPAT(ports); @@ -579,28 +578,27 @@ static void *listen_thread(pmix_object_t *obj) * to the event method for handling any further connections * so as to minimize overhead */ - while (prte_mca_oob_tcp_component.listen_thread_active) { + while (prte_oob_base.listen_thread_active) { FD_ZERO(&readfds); max = -1; - PMIX_LIST_FOREACH(listener, &prte_mca_oob_tcp_component.listeners, prte_oob_tcp_listener_t) + PMIX_LIST_FOREACH(listener, &prte_oob_base.listeners, prte_oob_tcp_listener_t) { FD_SET(listener->sd, &readfds); max = (listener->sd > max) ? listener->sd : max; } /* add the stop_thread fd */ - FD_SET(prte_mca_oob_tcp_component.stop_thread[0], &readfds); - max = (prte_mca_oob_tcp_component.stop_thread[0] > max) ? prte_mca_oob_tcp_component.stop_thread[0] - : max; + FD_SET(prte_oob_base.stop_thread[0], &readfds); + max = (prte_oob_base.stop_thread[0] > max) ? prte_oob_base.stop_thread[0] : max; /* set timeout interval */ - timeout.tv_sec = prte_mca_oob_tcp_component.listen_thread_tv.tv_sec; - timeout.tv_usec = prte_mca_oob_tcp_component.listen_thread_tv.tv_usec; + timeout.tv_sec = prte_oob_base.listen_thread_tv.tv_sec; + timeout.tv_usec = prte_oob_base.listen_thread_tv.tv_usec; /* Block in a select to avoid hammering the cpu. If a connection * comes in, we'll get woken up right away. */ rc = select(max + 1, &readfds, NULL, NULL, &timeout); - if (!prte_mca_oob_tcp_component.listen_thread_active) { + if (!prte_oob_base.listen_thread_active) { /* we've been asked to terminate */ return NULL; } @@ -617,7 +615,7 @@ static void *listen_thread(pmix_object_t *obj) */ do { accepted_connections = 0; - PMIX_LIST_FOREACH(listener, &prte_mca_oob_tcp_component.listeners, prte_oob_tcp_listener_t) + PMIX_LIST_FOREACH(listener, &prte_oob_base.listeners, prte_oob_tcp_listener_t) { sd = listener->sd; @@ -677,7 +675,7 @@ static void *listen_thread(pmix_object_t *obj) } } - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s prte_oob_tcp_listen_thread: incoming connection: " "(%d, %d) %s:%d\n", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), pending_connection->fd, @@ -716,28 +714,6 @@ static void *listen_thread(pmix_object_t *obj) } done: -#if 0 - /* once we complete the initial launch, the "flood" of connections - * will end - only connection requests from local procs, connect/accept - * operations across mpirun instances, or the occasional tool will need - * to be serviced. As these are relatively small events, we can easily - * handle them in the context of the event library and no longer require - * a separate connection harvesting thread. So switch over to the event - * lib handler now - */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, - "%s prte_oob_tcp_listen_thread: switching to event lib", - PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); - /* setup to listen via event library */ - PMIX_LIST_FOREACH(listener, &prte_mca_oob_tcp_component.listeners, prte_oob_tcp_listener_t) { - prte_event_set(prte_event_base, listener->event, - listener->sd, - PRTE_EV_READ|PRTE_EV_PERSIST, - connection_event_handler, - 0); - prte_event_add(listener->event, 0); - } -#endif return NULL; } @@ -753,7 +729,7 @@ static void connection_handler(int sd, short flags, void *cbdata) PMIX_ACQUIRE_OBJECT(new_connection); - pmix_output_verbose(4, prte_oob_base_framework.framework_output, + pmix_output_verbose(4, prte_oob_base.output, "%s connection_handler: working connection " "(%d, %d) %s:%d\n", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), new_connection->fd, prte_socket_errno, @@ -761,8 +737,8 @@ static void connection_handler(int sd, short flags, void *cbdata) pmix_net_get_port((struct sockaddr *) &new_connection->addr)); /* process the connection */ - prte_oob_tcp_module.accept_connection(new_connection->fd, - (struct sockaddr *) &(new_connection->addr)); + prte_oob_accept_connection(new_connection->fd, (struct sockaddr *) &(new_connection->addr)); + /* cleanup */ PMIX_RELEASE(new_connection); } @@ -778,7 +754,7 @@ static void connection_event_handler(int incoming_sd, short flags, void *cbdata) PRTE_HIDE_UNUSED_PARAMS(flags, cbdata); sd = accept(incoming_sd, (struct sockaddr *) &addr, &addrlen); - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s connection_event_handler: working connection " "(%d, %d) %s:%d\n", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), sd, prte_socket_errno, @@ -815,7 +791,7 @@ static void connection_event_handler(int incoming_sd, short flags, void *cbdata) } /* process the connection */ - prte_oob_tcp_module.accept_connection(sd, &addr); + prte_oob_accept_connection(sd, &addr); } static void tcp_ev_cons(prte_oob_tcp_listener_t *event) diff --git a/src/mca/oob/tcp/oob_tcp_listener.h b/src/rml/oob/oob_tcp_listener.h similarity index 93% rename from src/mca/oob/tcp/oob_tcp_listener.h rename to src/rml/oob/oob_tcp_listener.h index a109b15493..bfb4b984ae 100644 --- a/src/mca/oob/tcp/oob_tcp_listener.h +++ b/src/rml/oob/oob_tcp_listener.h @@ -15,7 +15,7 @@ * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,6 +60,6 @@ typedef struct { } prte_oob_tcp_pending_connection_t; PMIX_CLASS_DECLARATION(prte_oob_tcp_pending_connection_t); -PRTE_MODULE_EXPORT int prte_oob_tcp_start_listening(void); +PRTE_EXPORT int prte_oob_tcp_start_listening(void); #endif /* _MCA_OOB_TCP_LISTENER_H_ */ diff --git a/src/mca/oob/tcp/oob_tcp_peer.h b/src/rml/oob/oob_tcp_peer.h similarity index 96% rename from src/mca/oob/tcp/oob_tcp_peer.h rename to src/rml/oob/oob_tcp_peer.h index a5753538be..04cac5c3cd 100644 --- a/src/mca/oob/tcp/oob_tcp_peer.h +++ b/src/rml/oob/oob_tcp_peer.h @@ -17,7 +17,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights * reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,8 +32,8 @@ #include "src/event/event-internal.h" -#include "oob_tcp.h" -#include "oob_tcp_sendrecv.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_sendrecv.h" #include "src/threads/pmix_threads.h" typedef struct { diff --git a/src/mca/oob/tcp/oob_tcp_sendrecv.c b/src/rml/oob/oob_tcp_sendrecv.c similarity index 94% rename from src/mca/oob/tcp/oob_tcp_sendrecv.c rename to src/rml/oob/oob_tcp_sendrecv.c index 8b2b627fc0..6d473cb1b6 100644 --- a/src/mca/oob/tcp/oob_tcp_sendrecv.c +++ b/src/rml/oob/oob_tcp_sendrecv.c @@ -16,7 +16,7 @@ * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2017-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -71,11 +71,10 @@ #include "src/threads/pmix_threads.h" #include "src/util/name_fns.h" -#include "oob_tcp.h" -#include "src/mca/oob/tcp/oob_tcp_common.h" -#include "src/mca/oob/tcp/oob_tcp_component.h" -#include "src/mca/oob/tcp/oob_tcp_connection.h" -#include "src/mca/oob/tcp/oob_tcp_peer.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_common.h" +#include "src/rml/oob/oob_tcp_connection.h" +#include "src/rml/oob/oob_tcp_peer.h" #define OOB_SEND_MAX_RETRIES 3 @@ -205,14 +204,14 @@ void prte_oob_tcp_send_handler(int sd, short flags, void *cbdata) PMIX_ACQUIRE_OBJECT(peer); msg = peer->send_msg; - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp:send_handler called to send to peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); switch (peer->state) { case MCA_OOB_TCP_CONNECTING: case MCA_OOB_TCP_CLOSED: - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp:send_handler %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), prte_oob_tcp_state_print(peer->state)); prte_oob_tcp_peer_complete_connect(peer); @@ -225,17 +224,17 @@ void prte_oob_tcp_send_handler(int sd, short flags, void *cbdata) } break; case MCA_OOB_TCP_CONNECTED: - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s tcp:send_handler SENDING TO %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (NULL == peer->send_msg) ? "NULL" : PRTE_NAME_PRINT(&peer->name)); if (NULL != msg) { - pmix_output_verbose(2, prte_oob_base_framework.framework_output, + pmix_output_verbose(2, prte_oob_base.output, "oob:tcp:send_handler SENDING MSG"); if (PRTE_SUCCESS == (rc = send_msg(peer, msg))) { /* this msg is complete */ if (NULL != msg->data || NULL == msg->msg) { /* the relay is complete - release the data */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, + pmix_output_verbose(2, prte_oob_base.output, "%s MESSAGE RELAY COMPLETE TO %s OF %d BYTES ON SOCKET %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), @@ -244,7 +243,7 @@ void prte_oob_tcp_send_handler(int sd, short flags, void *cbdata) peer->send_msg = NULL; } else { /* we are done - notify the RML */ - pmix_output_verbose(2, prte_oob_base_framework.framework_output, + pmix_output_verbose(2, prte_oob_base.output, "%s MESSAGE SEND COMPLETE TO %s OF %d BYTES ON SOCKET %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), @@ -327,7 +326,7 @@ static int read_bytes(prte_oob_tcp_peer_t *peer) * the error back to the RML and let the caller know * to abort this message */ - pmix_output_verbose(OOB_TCP_DEBUG_FAIL, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_FAIL, prte_oob_base.output, "%s-%s prte_oob_tcp_msg_recv: readv failed: %s (%d)", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name)), strerror(prte_socket_errno), prte_socket_errno); @@ -340,7 +339,7 @@ static int read_bytes(prte_oob_tcp_peer_t *peer) /* the remote peer closed the connection - report that condition * and let the caller know */ - pmix_output_verbose(OOB_TCP_DEBUG_FAIL, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_FAIL, prte_oob_base.output, "%s-%s prte_oob_tcp_msg_recv: peer closed connection", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&(peer->name))); /* stop all events */ @@ -390,14 +389,14 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) PMIX_ACQUIRE_OBJECT(peer); - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:recv:handler called for peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); switch (peer->state) { case MCA_OOB_TCP_CONNECT_ACK: if (PRTE_SUCCESS == (rc = prte_oob_tcp_peer_recv_connect_ack(peer, peer->sd, NULL))) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:recv:handler starting send/recv events", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); /* we connected! Start the send/recv events */ @@ -425,7 +424,7 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) /* we get an unreachable error returned if a connection * completes but is rejected - otherwise, we don't want * to terminate as we might be retrying the connection */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s UNABLE TO COMPLETE CONNECT ACK WITH %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name)); prte_event_del(&peer->recv_event); @@ -434,11 +433,11 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) } break; case MCA_OOB_TCP_CONNECTED: - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:recv:handler CONNECTED", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); /* allocate a new message and setup for recv */ if (NULL == peer->recv_msg) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:recv:handler allocate new recv msg", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); peer->recv_msg = PMIX_NEW(prte_oob_tcp_recv_t); @@ -454,7 +453,7 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) } /* if the header hasn't been completely read, read it */ if (!peer->recv_msg->hdr_recvd) { - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:recv:handler read hdr", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); if (PRTE_SUCCESS == (rc = read_bytes(peer))) { /* completed reading the header */ @@ -464,14 +463,14 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) /* if this is a zero-byte message, then we are done */ if (0 == peer->recv_msg->hdr.nbytes) { pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, - prte_oob_base_framework.framework_output, + prte_oob_base.output, "%s RECVD ZERO-BYTE MESSAGE FROM %s for tag %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name), peer->recv_msg->hdr.tag); peer->recv_msg->data = NULL; // make sure } else { pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, - prte_oob_base_framework.framework_output, + prte_oob_base.output, "%s:tcp:recv:handler allocate data region of size %lu", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (unsigned long) peer->recv_msg->hdr.nbytes); @@ -487,7 +486,7 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) return; } else { /* close the connection */ - pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s:tcp:recv:handler error reading bytes - closing connection", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); prte_oob_tcp_peer_close(peer); @@ -503,7 +502,7 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) if (PRTE_SUCCESS == (rc = read_bytes(peer))) { /* we recvd all of the message */ pmix_output_verbose( - OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, + OOB_TCP_DEBUG_CONNECT, prte_oob_base.output, "%s RECVD COMPLETE MESSAGE FROM %s (ORIGIN %s) OF %d BYTES FOR DEST %s TAG %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->name), PRTE_NAME_PRINT(&peer->recv_msg->hdr.origin), (int) peer->recv_msg->hdr.nbytes, @@ -513,7 +512,7 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) if (PMIX_CHECK_PROCID(&peer->recv_msg->hdr.dst, PRTE_PROC_MY_NAME)) { /* yes - post it to the RML for delivery */ pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, - prte_oob_base_framework.framework_output, + prte_oob_base.output, "%s DELIVERING TO RML tag = %d seq_num = %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), peer->recv_msg->hdr.tag, peer->recv_msg->hdr.seq_num); @@ -525,7 +524,7 @@ void prte_oob_tcp_recv_handler(int sd, short flags, void *cbdata) /* promote this to the OOB as some other transport might * be the next best hop */ pmix_output_verbose(OOB_TCP_DEBUG_CONNECT, - prte_oob_base_framework.framework_output, + prte_oob_base.output, "%s TCP PROMOTING ROUTED MESSAGE FOR %s TO OOB", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&peer->recv_msg->hdr.dst)); diff --git a/src/mca/oob/tcp/oob_tcp_sendrecv.h b/src/rml/oob/oob_tcp_sendrecv.h similarity index 95% rename from src/mca/oob/tcp/oob_tcp_sendrecv.h rename to src/rml/oob/oob_tcp_sendrecv.h index 973f0d0136..c654bc8abe 100644 --- a/src/mca/oob/tcp/oob_tcp_sendrecv.h +++ b/src/rml/oob/oob_tcp_sendrecv.h @@ -15,7 +15,7 @@ * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,8 +31,8 @@ #include "src/class/pmix_list.h" #include "src/util/pmix_string_copy.h" -#include "oob_tcp.h" -#include "oob_tcp_hdr.h" +#include "src/rml/oob/oob_tcp.h" +#include "src/rml/oob/oob_tcp_hdr.h" #include "src/rml/rml.h" #include "src/threads/pmix_threads.h" @@ -98,7 +98,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_recv_t); #define MCA_OOB_TCP_QUEUE_SEND(m, p) \ do { \ prte_oob_tcp_send_t *_s; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base.output, \ "%s:[%s:%d] queue send to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT(&((m)->dst))); \ _s = PMIX_NEW(prte_oob_tcp_send_t); \ @@ -130,7 +130,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_recv_t); #define MCA_OOB_TCP_QUEUE_PENDING(m, p) \ do { \ prte_oob_tcp_send_t *_s; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base.output, \ "%s:[%s:%d] queue pending to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT(&((m)->dst))); \ _s = PMIX_NEW(prte_oob_tcp_send_t); \ @@ -162,7 +162,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_recv_t); #define MCA_OOB_TCP_QUEUE_RELAY(m, p) \ do { \ prte_oob_tcp_send_t *_s; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base.output, \ "%s:[%s:%d] queue relay to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT(&((p)->name))); \ _s = PMIX_NEW(prte_oob_tcp_send_t); \ @@ -196,7 +196,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_msg_op_t); #define PRTE_ACTIVATE_TCP_POST_SEND(ms, cbfunc) \ do { \ prte_oob_tcp_msg_op_t *mop; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base.output, \ "%s:[%s:%d] post send to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT(&((ms)->dst))); \ mop = PMIX_NEW(prte_oob_tcp_msg_op_t); \ @@ -218,7 +218,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_msg_error_t); prte_oob_tcp_msg_error_t *mop; \ prte_oob_tcp_send_t *snd; \ prte_oob_tcp_recv_t *proxy; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base.output, \ "%s:[%s:%d] post msg error to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT((h))); \ mop = PMIX_NEW(prte_oob_tcp_msg_error_t); \ @@ -249,7 +249,7 @@ PMIX_CLASS_DECLARATION(prte_oob_tcp_msg_error_t); #define PRTE_ACTIVATE_TCP_NO_ROUTE(r, h, c) \ do { \ prte_oob_tcp_msg_error_t *mop; \ - pmix_output_verbose(5, prte_oob_base_framework.framework_output, \ + pmix_output_verbose(5, prte_oob_base_.output, \ "%s:[%s:%d] post no route to %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), \ __FILE__, __LINE__, PRTE_NAME_PRINT((h))); \ mop = PMIX_NEW(prte_oob_tcp_msg_error_t); \ diff --git a/src/mca/oob/tcp/owner.txt b/src/rml/oob/owner.txt similarity index 100% rename from src/mca/oob/tcp/owner.txt rename to src/rml/oob/owner.txt diff --git a/src/rml/rml.c b/src/rml/rml.c index f8be31be90..be965841b0 100644 --- a/src/rml/rml.c +++ b/src/rml/rml.c @@ -24,12 +24,14 @@ #include "src/mca/mca.h" #include "src/util/pmix_output.h" -#include "src/mca/errmgr/errmgr.h" -#include "src/rml/rml.h" #include "src/mca/state/state.h" #include "src/runtime/prte_wait.h" #include "src/threads/pmix_threads.h" #include "src/util/name_fns.h" +#include "src/mca/errmgr/errmgr.h" +#include "src/rml/rml.h" +#include "src/rml/rml_contact.h" +#include "src/rml/oob/oob.h" prte_rml_base_t prte_rml_base = { .rml_output = -1, @@ -82,10 +84,22 @@ void prte_rml_register(void) pmix_mca_base_var_register_synonym(ret, "prte", "routed", "radix", NULL, PMIX_MCA_BASE_VAR_SYN_FLAG_DEPRECATED); + prte_oob_register(); + + verbosity = 0; + pmix_mca_base_var_register("prte", "oob", "base", "verbose", + "Debug verbosity of the out-of-band subsystem", + PMIX_MCA_BASE_VAR_TYPE_INT, + &verbosity); + if (0 < verbosity) { + prte_oob_base.output = pmix_output_open(NULL); + pmix_output_set_verbosity(prte_oob_base.output, verbosity); + } } void prte_rml_close(void) { + prte_oob_close(); PMIX_LIST_DESTRUCT(&prte_rml_base.posted_recvs); PMIX_LIST_DESTRUCT(&prte_rml_base.unmatched_msgs); PMIX_LIST_DESTRUCT(&prte_rml_base.children); @@ -94,8 +108,12 @@ void prte_rml_close(void) } } -void prte_rml_open(void) +int prte_rml_open(void) { + char *uri = NULL; + pmix_value_t val; + int ret; + /* construct object for holding the active plugin modules */ PMIX_CONSTRUCT(&prte_rml_base.posted_recvs, pmix_list_t); PMIX_CONSTRUCT(&prte_rml_base.unmatched_msgs, pmix_list_t); @@ -106,6 +124,54 @@ void prte_rml_open(void) prte_rml_compute_routing_tree(); prte_rml_base.lifeline = PRTE_PROC_MY_PARENT->rank; + + prte_oob_open(); + + /* store our URI for later */ + prte_oob_base_get_addr(&uri); + PMIX_VALUE_LOAD(&val, uri, PMIX_STRING); + ret = PMIx_Store_internal(PRTE_PROC_MY_NAME, PMIX_PROC_URI, &val); + if (PMIX_SUCCESS != ret) { + PRTE_ERROR_LOG(PRTE_ERROR); + PMIX_VALUE_DESTRUCT(&val); + return PRTE_ERROR; + } + PMIX_VALUE_DESTRUCT(&val); + // add it to our local info + prte_process_info.my_uri = strdup(uri); + + if (PRTE_PROC_IS_MASTER) { + prte_process_info.my_hnp_uri = uri; + } else { + free(uri); + if (NULL == prte_process_info.my_hnp_uri) { + // this is an error + PRTE_ERROR_LOG(PRTE_ERROR); + return PRTE_ERROR; + } + /* extract the HNP's name so we can update the routing table */ + ret = prte_rml_parse_uris(prte_process_info.my_hnp_uri, + PRTE_PROC_MY_HNP, + NULL); + if (PRTE_SUCCESS != ret) { + PRTE_ERROR_LOG(ret); + return ret; + } + /* Set the contact info in the RML - this won't actually establish + * the connection, but just tells the RML how to reach the HNP + * if/when we attempt to send to it + */ + PMIX_VALUE_LOAD(&val, prte_process_info.my_hnp_uri, PMIX_STRING); + ret = PMIx_Store_internal(PRTE_PROC_MY_HNP, PMIX_PROC_URI, &val); + if (PMIX_SUCCESS != ret) { + PRTE_ERROR_LOG(ret); + PMIX_VALUE_DESTRUCT(&val); + return ret; + } + PMIX_VALUE_DESTRUCT(&val); + } + + return PRTE_SUCCESS; } void prte_rml_send_callback(int status, pmix_proc_t *peer, diff --git a/src/rml/rml.h b/src/rml/rml.h index dff3bc801e..7cb46aaed1 100644 --- a/src/rml/rml.h +++ b/src/rml/rml.h @@ -17,7 +17,7 @@ * and Technology (RIST). All rights reserved. * * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -145,6 +145,7 @@ PRTE_EXPORT void prte_rml_recv_cancel(pmix_proc_t *peer, prte_rml_tag_t tag); typedef struct { int rml_output; int routed_output; + int oob_output; int max_retries; pmix_list_t posted_recvs; pmix_list_t unmatched_msgs; @@ -158,7 +159,7 @@ PRTE_EXPORT extern prte_rml_base_t prte_rml_base; PRTE_EXPORT void prte_rml_register(void); PRTE_EXPORT void prte_rml_close(void); -PRTE_EXPORT void prte_rml_open(void); +PRTE_EXPORT int prte_rml_open(void); /* common implementations */ PRTE_EXPORT void prte_rml_base_post_recv(int sd, short args, void *cbdata); PRTE_EXPORT void prte_rml_base_process_msg(int fd, short flags, void *cbdata); diff --git a/src/rml/rml_send.c b/src/rml/rml_send.c index a429650476..8a81f143a4 100644 --- a/src/rml/rml_send.c +++ b/src/rml/rml_send.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,11 +31,11 @@ #include "src/util/pmix_name_fns.h" #include "src/mca/errmgr/errmgr.h" -#include "src/mca/oob/base/base.h" #include "src/runtime/prte_globals.h" #include "src/threads/pmix_threads.h" #include "src/rml/rml.h" +#include "src/rml/oob/oob.h" int prte_rml_send_buffer_nb(pmix_rank_t rank, pmix_data_buffer_t *buffer, diff --git a/src/runtime/prte_init.c b/src/runtime/prte_init.c index 5e7a234f63..6e79131572 100644 --- a/src/runtime/prte_init.c +++ b/src/runtime/prte_init.c @@ -72,7 +72,6 @@ #include "src/mca/grpcomm/base/base.h" #include "src/mca/iof/base/base.h" #include "src/mca/odls/base/base.h" -#include "src/mca/oob/base/base.h" #include "src/mca/plm/base/base.h" #include "src/mca/pmdl/base/base.h" #include "src/mca/prtebacktrace/base/base.h" diff --git a/src/util/proc_info.c b/src/util/proc_info.c index 0923b962eb..8e153fb388 100644 --- a/src/util/proc_info.c +++ b/src/util/proc_info.c @@ -64,6 +64,7 @@ PRTE_EXPORT prte_process_info_t prte_process_info = { .aliases = NULL, .pid = 0, .proc_type = PRTE_PROC_TYPE_NONE, + .my_uri = NULL, .my_port = 0, .tmpdir_base = NULL, .top_session_dir = NULL, diff --git a/src/util/proc_info.h b/src/util/proc_info.h index 85e53ba5af..fe970e5ce7 100644 --- a/src/util/proc_info.h +++ b/src/util/proc_info.h @@ -75,6 +75,7 @@ typedef struct prte_process_info_t { char **aliases; /**< aliases for this node */ pid_t pid; /**< Local process ID for this process */ prte_proc_type_t proc_type; /**< Type of process */ + char *my_uri; /**< My contact info */ uint16_t my_port; /**< TCP port for out-of-band comm */ /* The session directory has the form * ///, where the prefix