@@ -29,62 +29,24 @@ while true ; do
29
29
esac
30
30
done
31
31
32
- if [ ! -z " $ALL_RUNNING " ]; then
33
- # # Use cluster-state-entity-count-query to initializer to check if
34
- # # all of the nodes in the cluster are actually running &
35
- # # queryable. The query will hang if one or more of the workers
36
- # # has crashed. Unfortunately, that hang makes scripting
37
- # # difficult: the `external_sender` proc can hang forever waiting
38
- # # for a reply from Wallaroo that will never arrive.
39
- # #
40
- # # If a worker *has* crashed, then a `cluster-status-query` that
41
- # # is sent to any running worker process will return successfully.
42
- # # That's not what we want to know.
43
- # #
44
- # # The only way that I can think of around this problem is to send
45
- # # a `cluster-status-query` and then parse the output, e.g.,
46
- # # Processing messages: true, Worker count: 2, Workers: |initializer,worker2,|,
47
- # # then map the worker name -> Wallaroo external TCP port, then
48
- # # send a `cluster-status-query` to each of the workers. But that
49
- # # embeds a lot more Wallaroo internal knowledge (and also the TCP
50
- # # port number convention used by these shell scripts).
51
- # #
52
- # # NOTE: GH bug #3002 means that we can DoS ourselves by sending
53
- # # this query too soon! {sigh}
32
+ # # If a worker has crashed, then a `cluster-status-query` that
33
+ # # is sent to any running worker process will return successfully.
34
+ # # That's not what we want to know.
35
+ # #
36
+ # # NOTE: GH bug #3002 means that we can DoS ourselves by sending
37
+ # # this query too soon! {sigh}
38
+ # #
39
+ # # If we use cluster-state-entity-count-query to initializer to check if
40
+ # # all of the nodes in the cluster are actually running &
41
+ # # queryable. The query will hang if one or more of the workers
42
+ # # has crashed. Unfortunately, that hang makes scripting
43
+ # # difficult: the `external_sender` proc can hang forever waiting
44
+ # # for a reply from Wallaroo that will never arrive.
45
+ # #
46
+ # # Our workaround is to use our external TCP port numbering scheme to
47
+ # # query each worker directly. We assume that the initializer's
48
+ # # cluster membership info is the Source of Truth(tm).
54
49
55
- if [ ! -z " $VERBOSE " ]; then
56
- echo -n " Entity count: "
57
- fi
58
- OUTFILE=` tempfile -d /tmp`
59
- trap " rm -f $OUTFILE " 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
60
- for i in ` seq 1 $COUNT ` ; do
61
- ../../testing/tools/external_sender/external_sender \
62
- -e $WALLAROO_ARG_EXTERNAL \
63
- -t cluster-state-entity-count-query > $OUTFILE 2>&1 &
64
- PID=$!
65
- sleep 0.1
66
- grep -s initializer $OUTFILE > /dev/null 2>&1
67
- if [ $? -eq 0 ]; then
68
- if [ ! -z " $VERBOSE " ]; then
69
- echo Success
70
- fi
71
- break
72
- fi
73
- if [ ! -z " $VERBOSE " ]; then
74
- echo -n .
75
- fi
76
- done
77
- if [ $i -eq $COUNT ]; then
78
- if [ ! -z " $VERBOSE " ]; then
79
- echo Failed
80
- fi
81
- exit 1
82
- fi
83
- fi
84
-
85
- if [ ! -z " $VERBOSE " ]; then
86
- echo -n " Processing messages: "
87
- fi
88
50
for i in ` seq 1 $COUNT ` ; do
89
51
../../testing/tools/external_sender/external_sender \
90
52
-e $WALLAROO_ARG_EXTERNAL -t cluster-status-query 2>&1 | \
@@ -98,6 +60,64 @@ for i in `seq 1 $COUNT`; do
98
60
sleep 0.1
99
61
done
100
62
63
+ if [ $i -eq $COUNT ]; then
64
+ if [ ! -z " $VERBOSE " ]; then
65
+ echo Failed
66
+ fi
67
+ exit 1
68
+ fi
69
+
70
+ if [ ! -z " $ALL_RUNNING " ]; then
71
+ workers=` ../../testing/tools/external_sender/external_sender \
72
+ -e $WALLAROO_ARG_EXTERNAL -t cluster-status-query 2>&1 | \
73
+ grep -s ' Processing messages: ' | \
74
+ sed -e ' s/.*Workers: .//' -e ' s/,|.*//' | \
75
+ tr ' ,' ' ' `
76
+ for worker in $workers ; do
77
+ if [ ! -z " $VERBOSE " ]; then
78
+ echo -n " Worker $worker : "
79
+ fi
80
+ base_port=7103
81
+ case $worker in
82
+ initializer)
83
+ port=$base_port
84
+ ;;
85
+ worker* )
86
+ n=` echo $worker | sed ' s/worker//' `
87
+ my_shift=` expr $n \* 10`
88
+ port=` expr $base_port + $my_shift `
89
+ ;;
90
+ * )
91
+ echo Error: unknown worker $worker
92
+ exit 1
93
+ ;;
94
+ esac
95
+ if [ ! -z " $VERBOSE " ]; then
96
+ echo -n port = $port
97
+ fi
98
+ for i in ` seq 1 $COUNT ` ; do
99
+ ../../testing/tools/external_sender/external_sender \
100
+ -e 127.0.0.1:$port -t cluster-status-query 2>&1 | \
101
+ grep -s ' Processing messages: true' > /dev/null 2>&1
102
+ if [ $? -eq 0 ]; then
103
+ if [ ! -z " $VERBOSE " ]; then
104
+ echo " "
105
+ fi
106
+ break ;
107
+ fi
108
+ if [ ! -z " $VERBOSE " ]; then
109
+ echo -n .
110
+ fi
111
+ sleep 0.1
112
+ done
113
+ if [ $i -eq $COUNT ]; then
114
+ if [ ! -z " $VERBOSE " ]; then
115
+ break
116
+ fi
117
+ fi
118
+ done
119
+ fi
120
+
101
121
if [ $i -eq $COUNT ]; then
102
122
if [ ! -z " $VERBOSE " ]; then
103
123
echo Failed
0 commit comments