Skip to content

Commit 305b014

Browse files
committed
kubernetes: Add timeouts to prevent I/O hangs
If we dont have timeout, in some situations networking issues might cause process hang forever on k8s calls. Signed-off-by: Denys Fedoryshchenko <denys.f@collabora.com>
1 parent a4bb594 commit 305b014

1 file changed

Lines changed: 20 additions & 2 deletions

File tree

kernelci/runtime/kubernetes.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,15 @@ class Kubernetes(Runtime):
2222
"""
2323

2424
JOB_NAME_CHARACTERS = string.ascii_lowercase + string.digits
25+
# Default timeout for Kubernetes API calls in seconds
26+
# This prevents indefinite hangs on network issues
27+
DEFAULT_API_TIMEOUT = 30
2528

2629
def __init__(self, *args, **kwargs):
2730
super().__init__(*args, **kwargs)
2831
self.kcontext = None
32+
# Allow timeout to be configured, otherwise use default
33+
self.api_timeout = getattr(self.config, 'api_timeout', self.DEFAULT_API_TIMEOUT)
2934

3035
@classmethod
3136
def _get_job_file_name(cls, params):
@@ -56,11 +61,21 @@ def _fetch_load(self, ctxname):
5661
pods = None
5762
for _ in range(3):
5863
try:
59-
pods = core_v1.list_namespaced_pod(namespace='default')
64+
# Add client-side timeout to prevent indefinite hangs on network issues
65+
# _request_timeout is passed to urllib3 and controls socket timeout
66+
pods = core_v1.list_namespaced_pod(
67+
namespace='default',
68+
timeout_seconds=60, # Server-side timeout
69+
_request_timeout=self.api_timeout # Client-side timeout
70+
)
6071
break
6172
except kubernetes.client.rest.ApiException as error:
6273
print(f'Error listing pods in {ctxname}: {error}')
6374
continue
75+
except Exception as error: # pylint: disable=broad-except
76+
# Catch timeout and other exceptions (e.g., urllib3.exceptions.ReadTimeoutError)
77+
print(f'Exception listing pods in {ctxname}: {error}')
78+
continue
6479

6580
if not pods:
6681
print(f'No pods found in {ctxname}, returning 1000')
@@ -104,7 +119,10 @@ def wait(self, job_object):
104119
core_v1 = kubernetes.client.CoreV1Api()
105120
job_name = job_object[0][0].metadata.labels['job-name']
106121
for event in watch.stream(
107-
func=core_v1.list_namespaced_pod, namespace='default'):
122+
func=core_v1.list_namespaced_pod,
123+
namespace='default',
124+
timeout_seconds=3600, # Server-side timeout (1 hour)
125+
_request_timeout=self.api_timeout): # Client-side timeout
108126
if event['type'] != 'MODIFIED':
109127
continue
110128
if job_name not in event['object'].metadata.name:

0 commit comments

Comments
 (0)