Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle json parsing errors in rpc check #1086

Merged
merged 10 commits into from
Jul 9, 2024
25 changes: 18 additions & 7 deletions core/schains/rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,18 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

import json
import logging
import time

from tools.configs import ALLOWED_TIMESTAMP_DIFF
from tools.configs.schains import DEFAULT_RPC_CHECK_TIMEOUT, RPC_CHECK_TIMEOUT_STEP
from tools.helper import post_request


logger = logging.getLogger(__name__)


def make_rpc_call(http_endpoint, method, params=None, timeout=None) -> bool:
params = params or []
return post_request(
Expand All @@ -47,10 +52,16 @@ def check_endpoint_alive(http_endpoint, timeout=None):

def check_endpoint_blocks(http_endpoint):
res = make_rpc_call(http_endpoint, 'eth_getBlockByNumber', ['latest', False])
if res and res.json():
res_data = res.json()
latest_schain_timestamp_hex = res_data['result']['timestamp']
latest_schain_timestamp = int(latest_schain_timestamp_hex, 16)
admin_timestamp = int(time.time())
return abs(latest_schain_timestamp - admin_timestamp) < ALLOWED_TIMESTAMP_DIFF
return False
healthy = False
if res:
try:
res_data = res.json()
latest_schain_timestamp_hex = res_data['result']['timestamp']
latest_schain_timestamp = int(latest_schain_timestamp_hex, 16)
admin_timestamp = int(time.time())
healthy = abs(latest_schain_timestamp - admin_timestamp) < ALLOWED_TIMESTAMP_DIFF
except (json.JSONDecodeError, KeyError, ValueError) as e:
logger.warning('Failed to parse response, error: %s', e)
else:
logger.warning('Empty response from skaled')
return healthy
105 changes: 60 additions & 45 deletions tests/schains/monitor/rpc_test.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
import datetime
import json
import mock
from time import sleep

import freezegun
import requests

from core.schains.monitor.rpc import handle_failed_schain_rpc
from core.schains.runner import get_container_info
from core.schains.rpc import check_endpoint_blocks
from tools.configs.containers import SCHAIN_CONTAINER

from web.models.schain import SChainRecord

CURRENT_TIMESTAMP = 1594903080
CURRENT_DATETIME = datetime.datetime.utcfromtimestamp(CURRENT_TIMESTAMP)


def test_handle_failed_schain_rpc_no_container(schain_db, dutils, skaled_status):
schain_record = SChainRecord.get_by_name(schain_db)
Expand All @@ -15,21 +24,17 @@ def test_handle_failed_schain_rpc_no_container(schain_db, dutils, skaled_status)
schain={'name': schain_db},
schain_record=schain_record,
skaled_status=skaled_status,
dutils=dutils
dutils=dutils,
)
assert not dutils.is_container_exists(container_name)


def test_handle_failed_schain_rpc_exit_time_reached(
schain_db,
dutils,
cleanup_schain_containers,
skaled_status_exit_time_reached
schain_db, dutils, cleanup_schain_containers, skaled_status_exit_time_reached
):
schain_record = SChainRecord.get_by_name(schain_db)

image_name, container_name, _, _ = get_container_info(
SCHAIN_CONTAINER, schain_db)
image_name, container_name, _, _ = get_container_info(SCHAIN_CONTAINER, schain_db)

dutils.run_container(image_name=image_name, name=container_name, entrypoint='bash -c "exit 0"')
sleep(7)
Expand All @@ -42,7 +47,7 @@ def test_handle_failed_schain_rpc_exit_time_reached(
schain={'name': schain_db},
schain_record=schain_record,
skaled_status=skaled_status_exit_time_reached,
dutils=dutils
dutils=dutils,
)
assert dutils.is_container_exists(container_name)

Expand All @@ -51,20 +56,14 @@ def test_handle_failed_schain_rpc_exit_time_reached(


def test_monitor_schain_downloading_snapshot(
schain_db,
dutils,
cleanup_schain_containers,
skaled_status_downloading_snapshot
schain_db, dutils, cleanup_schain_containers, skaled_status_downloading_snapshot
):
schain_record = SChainRecord.get_by_name(schain_db)

image_name, container_name, _, _ = get_container_info(
SCHAIN_CONTAINER, schain_db)
image_name, container_name, _, _ = get_container_info(SCHAIN_CONTAINER, schain_db)

dutils.run_container(
image_name=image_name,
name=container_name,
entrypoint='bash -c "sleep 100"'
image_name=image_name, name=container_name, entrypoint='bash -c "sleep 100"'
)
sleep(7)
schain_record.set_failed_rpc_count(100)
Expand All @@ -76,25 +75,19 @@ def test_monitor_schain_downloading_snapshot(
schain={'name': schain_db},
schain_record=schain_record,
skaled_status=skaled_status_downloading_snapshot,
dutils=dutils
dutils=dutils,
)
container_info = dutils.get_info(container_name)
assert container_info['stats']['State']['FinishedAt'] == finished_at


def test_handle_failed_schain_rpc_stuck_max_retries(
schain_db,
dutils,
skaled_status,
cleanup_schain_containers
schain_db, dutils, skaled_status, cleanup_schain_containers
):
schain_record = SChainRecord.get_by_name(schain_db)
image_name, container_name, _, _ = get_container_info(
SCHAIN_CONTAINER, schain_db)
image_name, container_name, _, _ = get_container_info(SCHAIN_CONTAINER, schain_db)
dutils.run_container(
image_name=image_name,
name=container_name,
entrypoint='bash -c "sleep 100"'
image_name=image_name, name=container_name, entrypoint='bash -c "sleep 100"'
)

schain_record.set_failed_rpc_count(100)
Expand All @@ -107,20 +100,17 @@ def test_handle_failed_schain_rpc_stuck_max_retries(
schain={'name': schain_db},
schain_record=schain_record,
skaled_status=skaled_status,
dutils=dutils
dutils=dutils,
)
container_info = dutils.get_info(container_name)
assert container_info['stats']['State']['FinishedAt'] == finished_at


def test_monitor_container_exited(schain_db, dutils, cleanup_schain_containers, skaled_status):
schain_record = SChainRecord.get_by_name(schain_db)
image_name, container_name, _, _ = get_container_info(
SCHAIN_CONTAINER, schain_db)
image_name, container_name, _, _ = get_container_info(SCHAIN_CONTAINER, schain_db)
dutils.run_container(
image_name=image_name,
name=container_name,
entrypoint='bash -c "exit 100;"'
image_name=image_name, name=container_name, entrypoint='bash -c "exit 100;"'
)

schain_record.set_failed_rpc_count(100)
Expand All @@ -134,26 +124,20 @@ def test_monitor_container_exited(schain_db, dutils, cleanup_schain_containers,
schain={'name': schain_db},
schain_record=schain_record,
skaled_status=skaled_status,
dutils=dutils
dutils=dutils,
)
assert schain_record.restart_count == 0
container_info = dutils.get_info(container_name)
assert container_info['stats']['State']['FinishedAt'] == finished_at


def test_handle_failed_schain_rpc_stuck(
schain_db,
dutils,
cleanup_schain_containers,
skaled_status
schain_db, dutils, cleanup_schain_containers, skaled_status
):
schain_record = SChainRecord.get_by_name(schain_db)
image_name, container_name, _, _ = get_container_info(
SCHAIN_CONTAINER, schain_db)
image_name, container_name, _, _ = get_container_info(SCHAIN_CONTAINER, schain_db)
dutils.run_container(
image_name=image_name,
name=container_name,
entrypoint='bash -c "sleep 100"'
image_name=image_name, name=container_name, entrypoint='bash -c "sleep 100"'
)

schain_record.set_failed_rpc_count(100)
Expand All @@ -167,8 +151,39 @@ def test_handle_failed_schain_rpc_stuck(
schain={'name': schain_db},
schain_record=schain_record,
skaled_status=skaled_status,
dutils=dutils
dutils=dutils,
)
assert schain_record.restart_count == 1
container_info = dutils.get_info(container_name)
assert container_info['stats']['State']['FinishedAt'] != finished_at


@mock.patch('tools.helper.requests.post')
@freezegun.freeze_time(CURRENT_DATETIME)
def test_check_endpoint_blocks(post_request_mock):
endpoint = 'http://127.0.0.1:10003'

post_request_mock.side_effect = requests.exceptions.RequestException('Test error')
assert check_endpoint_blocks(endpoint) is False
post_request_mock.side_effect = None

response_dummy = mock.Mock()
post_request_mock.return_value = response_dummy

response_dummy.json = mock.Mock(return_value={})
assert check_endpoint_blocks(endpoint) is False

response_dummy.json = mock.Mock(
side_effect=json.JSONDecodeError('Test error', doc='doc', pos=1)
)
assert check_endpoint_blocks(endpoint) is False

response_dummy.json = mock.Mock(return_value={'result': {'timestamp': '0xhhhhh'}})
assert check_endpoint_blocks(endpoint) is False

response_dummy.json = mock.Mock(return_value={'result': {'timestamp': '0x1'}})
assert check_endpoint_blocks(endpoint) is False

hex_offset_ts = hex(CURRENT_TIMESTAMP + 1)
response_dummy.json = mock.Mock(return_value={'result': {'timestamp': hex_offset_ts}})
assert check_endpoint_blocks(endpoint) is True
Loading