Skip to content

Commit 693d2aa

Browse files
authored
fix: validation fails causing etcd events not to be handled correctly (#11268)
1 parent e193439 commit 693d2aa

File tree

3 files changed

+165
-18
lines changed

3 files changed

+165
-18
lines changed

Makefile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,3 +465,10 @@ ci-env-down:
465465
rm $(OTEL_CONFIG)
466466
$(ENV_DOCKER_COMPOSE) down
467467
@$(call func_echo_success_status, "$@ -> [ Done ]")
468+
469+
### ci-env-stop : CI env temporary stop
470+
.PHONY: ci-env-stop
471+
ci-env-stop:
472+
@$(call func_echo_status, "$@ -> [ Start ]")
473+
$(ENV_DOCKER_COMPOSE) stop
474+
@$(call func_echo_success_status, "$@ -> [ Done ]")

apisix/core/config_etcd.lua

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -651,41 +651,46 @@ local function sync_data(self)
651651
-- waitdir will return [res] even for self.single_item = true
652652
for _, res in ipairs(res_copy) do
653653
local key
654+
local data_valid = true
654655
if self.single_item then
655656
key = self.key
656657
else
657658
key = short_key(self, res.key)
658659
end
659660

660661
if res.value and not self.single_item and type(res.value) ~= "table" then
661-
self:upgrade_version(res.modifiedIndex)
662-
return false, "invalid item data of [" .. self.key .. "/" .. key
663-
.. "], val: " .. res.value
664-
.. ", it should be an object"
662+
data_valid = false
663+
log.error("invalid item data of [", self.key .. "/" .. key,
664+
"], val: ", res.value,
665+
", it should be an object")
665666
end
666667

667-
if res.value and self.item_schema then
668-
local ok, err = check_schema(self.item_schema, res.value)
669-
if not ok then
670-
self:upgrade_version(res.modifiedIndex)
671-
672-
return false, "failed to check item data of ["
673-
.. self.key .. "] err:" .. err
668+
if data_valid and res.value and self.item_schema then
669+
data_valid, err = check_schema(self.item_schema, res.value)
670+
if not data_valid then
671+
log.error("failed to check item data of [", self.key,
672+
"] err:", err, " ,val: ", json.encode(res.value))
674673
end
675674

676-
if self.checker then
677-
local ok, err = self.checker(res.value)
678-
if not ok then
679-
self:upgrade_version(res.modifiedIndex)
680-
681-
return false, "failed to check item data of ["
682-
.. self.key .. "] err:" .. err
675+
if data_valid and self.checker then
676+
data_valid, err = self.checker(res.value)
677+
if not data_valid then
678+
log.error("failed to check item data of [", self.key,
679+
"] err:", err, " ,val: ", json.delay_encode(res.value))
683680
end
684681
end
685682
end
686683

684+
-- the modifiedIndex tracking should be updated regardless of the validity of the config
687685
self:upgrade_version(res.modifiedIndex)
688686

687+
if not data_valid then
688+
-- do not update the config cache when the data is invalid
689+
-- invalid data should only cancel this config item update, not discard
690+
-- the remaining events, use continue instead of loop break and return
691+
goto CONTINUE
692+
end
693+
689694
if res.dir then
690695
if res.value then
691696
return false, "todo: support for parsing `dir` response "
@@ -758,6 +763,8 @@ local function sync_data(self)
758763
end
759764

760765
self.conf_version = self.conf_version + 1
766+
767+
::CONTINUE::
761768
end
762769

763770
return self.values

t/cli/test_etcd_sync_event_handle.sh

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
. ./t/cli/common.sh
21+
22+
# check etcd while enable auth
23+
git checkout conf/config.yaml
24+
25+
# Make new routes
26+
etcdctl --endpoints=127.0.0.1:2379 del --prefix /apisix/routes/
27+
etcdctl --endpoints=127.0.0.1:2379 put /apisix/routes/ init_dir
28+
etcdctl --endpoints=127.0.0.1:2379 put /apisix/routes/1 '{"uri":"/1","plugins":{}}'
29+
etcdctl --endpoints=127.0.0.1:2379 put /apisix/routes/2 '{"uri":"/2","plugins":{}}'
30+
etcdctl --endpoints=127.0.0.1:2379 put /apisix/routes/3 '{"uri":"/3","plugins":{}}'
31+
etcdctl --endpoints=127.0.0.1:2379 put /apisix/routes/4 '{"uri":"/4","plugins":{}}'
32+
etcdctl --endpoints=127.0.0.1:2379 put /apisix/routes/5 '{"uri":"/5","plugins":{}}'
33+
34+
# Connect by unauthenticated
35+
echo '
36+
deployment:
37+
role: traditional
38+
role_traditional:
39+
config_provider: etcd
40+
etcd:
41+
host:
42+
- http://127.0.0.1:2379
43+
prefix: /apisix
44+
nginx_config:
45+
error_log_level: info
46+
worker_processes: 1
47+
' > conf/config.yaml
48+
49+
# Initialize and start APISIX without password
50+
make init
51+
make run
52+
53+
# Test request
54+
curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:9080/1 | grep 503 || (echo "failed: Round 1 Request 1 unexpected"; exit 1)
55+
curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:9080/2 | grep 503 || (echo "failed: Round 1 Request 2 unexpected"; exit 1)
56+
curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:9080/3 | grep 503 || (echo "failed: Round 1 Request 3 unexpected"; exit 1)
57+
curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:9080/4 | grep 503 || (echo "failed: Round 1 Request 4 unexpected"; exit 1)
58+
curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:9080/5 | grep 503 || (echo "failed: Round 1 Request 5 unexpected"; exit 1)
59+
curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:9080/6 | grep 404 || (echo "failed: Round 1 Request 6 unexpected"; exit 1)
60+
61+
# Enable auth to block APISIX connect
62+
export ETCDCTL_API=3
63+
etcdctl version
64+
etcdctl --endpoints=127.0.0.1:2379 user add "root:apache-api6-sync"
65+
etcdctl --endpoints=127.0.0.1:2379 role add root
66+
etcdctl --endpoints=127.0.0.1:2379 user grant-role root root
67+
etcdctl --endpoints=127.0.0.1:2379 user get root
68+
etcdctl --endpoints=127.0.0.1:2379 auth enable
69+
sleep 3
70+
71+
# Restart etcd services to make sure that APISIX cannot be synchronized
72+
project_compose_ci=ci/pod/docker-compose.common.yml make ci-env-stop
73+
project_compose_ci=ci/pod/docker-compose.common.yml make ci-env-up
74+
75+
# Make some changes when APISIX cannot be synchronized
76+
# Authentication ensures that only etcdctl can access etcd at this time
77+
etcdctl --endpoints=127.0.0.1:2379 --user=root:apache-api6-sync put /apisix/routes/1 '{"uri":"/1","plugins":{"fault-injection":{"abort":{"http_status":204}}}}'
78+
etcdctl --endpoints=127.0.0.1:2379 --user=root:apache-api6-sync put /apisix/routes/2 '{"uri":"/2"}' ## set incorrect configuration
79+
etcdctl --endpoints=127.0.0.1:2379 --user=root:apache-api6-sync put /apisix/routes/3 '{"uri":"/3","plugins":{"fault-injection":{"abort":{"http_status":204}}}}'
80+
etcdctl --endpoints=127.0.0.1:2379 --user=root:apache-api6-sync put /apisix/routes/4 '{"uri":"/4","plugins":{"fault-injection":{"abort":{"http_status":204}}}}'
81+
etcdctl --endpoints=127.0.0.1:2379 --user=root:apache-api6-sync put /apisix/routes/5 '{"uri":"/5","plugins":{"fault-injection":{"abort":{"http_status":204}}}}'
82+
83+
# Resume APISIX synchronization by disable auth
84+
# Since APISIX will not be able to access etcd until authentication is disable,
85+
# watch will be temporarily disabled, so when authentication is disable,
86+
# the backlog events will be sent at once at an offset from when APISIX disconnects.
87+
# When APISIX resumes the connection, it still has not met its mandatory full
88+
# synchronization condition, so it will be "watch" that resumes, not "readdir".
89+
etcdctl --endpoints=127.0.0.1:2379 --user=root:apache-api6-sync auth disable
90+
etcdctl --endpoints=127.0.0.1:2379 user delete root
91+
etcdctl --endpoints=127.0.0.1:2379 role delete root
92+
sleep 5 # wait resync by watch
93+
94+
# Test request
95+
# All but the intentionally incoming misconfigurations should be applied,
96+
# and non-existent routes will remain non-existent.
97+
curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:9080/1 | grep 204 || (echo "failed: Round 2 Request 1 unexpected"; exit 1)
98+
curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:9080/2 | grep 503 || (echo "failed: Round 2 Request 2 unexpected"; exit 1)
99+
curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:9080/3 | grep 204 || (echo "failed: Round 2 Request 3 unexpected"; exit 1)
100+
curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:9080/4 | grep 204 || (echo "failed: Round 2 Request 4 unexpected"; exit 1)
101+
curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:9080/5 | grep 204 || (echo "failed: Round 2 Request 5 unexpected"; exit 1)
102+
curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:9080/6 | grep 404 || (echo "failed: Round 2 Request 6 unexpected"; exit 1)
103+
104+
# Check logs
105+
## Case1: Ensure etcd is disconnected
106+
cat logs/error.log | grep "watchdir err: has no healthy etcd endpoint available" || (echo "Log case 1 unexpected"; exit 1)
107+
108+
## Case2: Ensure events are sent in bulk after connection is restored
109+
## It is extracted from the structure of following type
110+
## result = {
111+
## events = { {
112+
## {
113+
## kv = {
114+
## key = "/apisix/routes/1",
115+
## ...
116+
## }
117+
#### }, {
118+
## kv = {
119+
## key = "/apisix/routes/2",
120+
## ...
121+
## }
122+
## },
123+
## ...
124+
## } },
125+
## header = {
126+
## ...
127+
## }
128+
## }
129+
## After check, it only appears when watch recovers and returns events in bulk.
130+
cat logs/error.log | grep "}, {" || (echo "failed: Log case 2 unexpected"; exit 1)
131+
132+
## Case3: Ensure that the check schema error is actually triggered.
133+
cat logs/error.log | grep "failed to check item data" || (echo "failed: Log case 3 unexpected"; exit 1)

0 commit comments

Comments
 (0)