Skip to content

Commit 51c8d36

Browse files
xoviatMichael Howitz
andauthored
handle worker crash (#158)
Rerun test on crash with xdist. If a test crashes the worker while rerunfailures is running with a supported xdist, then the test will now be rerun. This new behavior: 1. requires pytest 6 or newer and a supported xdist 2. doesn't increase the number of times a worker can crash 3. allows rerunning a timed-out test when combined with pytest-timeout Co-authored-by: Michael Howitz <mh@gocept.com> Co-authored-by: xoviat <xoviat@users.noreply.github.com>
1 parent 948e273 commit 51c8d36

6 files changed

Lines changed: 234 additions & 14 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ pytest_rerunfailures.egg-info/
44
.Python
55
.cache/
66
.idea/
7+
.vscode/
78
.python-version
89
.tox*
910
bin/

CHANGES.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ Changelog
44
10.2 (unreleased)
55
-----------------
66

7+
Features
8+
++++++++
9+
10+
- Allow recovery from crashed tests with pytest-xdist.
711
- Add support for Python 3.10 (as of Python 3.10.rc2).
812
(Thanks to `@hugovk <https://github.com/hugovk>`_ for the PR.)
913

README.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ You will need the following prerequisites in order to use pytest-rerunfailures:
2222
- Python 3.6, up to 3.10, or PyPy3
2323
- pytest 5.3 or newer
2424

25+
This plugin can recover from a hard crash with the following optional
26+
prerequisites:
27+
28+
- pytest-xdist 2.3.0 or newer
29+
2530
This package is currently tested against the last 5 minor pytest releases. In
2631
case you work with an older version of pytest you should consider updating or
2732
use one of the earlier versions of this package.
@@ -35,6 +40,17 @@ To install pytest-rerunfailures:
3540
3641
$ pip install pytest-rerunfailures
3742
43+
Recover from hard crashes
44+
-------------------------
45+
46+
If one or more tests trigger a hard crash (for example: segfault), this plugin
47+
will ordinarily be unable to rerun the test. However, if a compatible version of
48+
pytest-xdist is installed, and the tests are run within pytest-xdist using the `-n`
49+
flag, this plugin will be able to rerun crashed tests, assuming the workers and
50+
controller are on the same LAN (this assumption is valid for almost all cases
51+
because most of the time the workers and controller are on the same computer).
52+
If this assumption is not the case, then this functionality may not operate.
53+
3854
Re-run all failures
3955
-------------------
4056

pytest_rerunfailures.py

Lines changed: 181 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
1+
import hashlib
12
import os
23
import platform
34
import re
5+
import socket
46
import sys
7+
import threading
58
import time
69
import traceback
710
import warnings
11+
from contextlib import suppress
812

913
import pytest
1014
from _pytest.outcomes import fail
@@ -23,9 +27,16 @@
2327
# We have a pytest >= 6.1
2428
pass
2529

30+
try:
31+
from xdist.newhooks import pytest_handlecrashitem
2632

27-
PYTEST_GTE_54 = parse_version(pytest.__version__) >= parse_version("5.4")
33+
HAS_PYTEST_HANDLECRASHITEM = True
34+
del pytest_handlecrashitem
35+
except ImportError:
36+
HAS_PYTEST_HANDLECRASHITEM = False
2837

38+
39+
PYTEST_GTE_54 = parse_version(pytest.__version__) >= parse_version("5.4")
2940
PYTEST_GTE_63 = parse_version(pytest.__version__) >= parse_version("6.3.0.dev")
3041

3142

@@ -78,16 +89,6 @@ def pytest_addoption(parser):
7889
)
7990

8091

81-
def pytest_configure(config):
82-
# add flaky marker
83-
config.addinivalue_line(
84-
"markers",
85-
"flaky(reruns=1, reruns_delay=0): mark test to re-run up "
86-
"to 'reruns' times. Add a delay of 'reruns_delay' seconds "
87-
"between re-runs.",
88-
)
89-
90-
9192
def _get_resultlog(config):
9293
if not HAS_RESULTLOG:
9394
return None
@@ -302,6 +303,167 @@ def _should_not_rerun(item, report, reruns):
302303
)
303304

304305

306+
def is_master(config):
307+
return not (hasattr(config, "workerinput") or hasattr(config, "slaveinput"))
308+
309+
310+
def pytest_configure(config):
311+
# add flaky marker
312+
config.addinivalue_line(
313+
"markers",
314+
"flaky(reruns=1, reruns_delay=0): mark test to re-run up "
315+
"to 'reruns' times. Add a delay of 'reruns_delay' seconds "
316+
"between re-runs.",
317+
)
318+
319+
if HAS_PYTEST_HANDLECRASHITEM:
320+
if is_master(config):
321+
config.failures_db = ServerStatusDB()
322+
else:
323+
config.failures_db = ClientStatusDB(config.workerinput["sock_port"])
324+
else:
325+
config.failures_db = StatusDB() # no-op db
326+
327+
328+
if HAS_PYTEST_HANDLECRASHITEM:
329+
330+
def pytest_configure_node(node):
331+
"""xdist hook"""
332+
node.workerinput["sock_port"] = node.config.failures_db.sock_port
333+
334+
def pytest_handlecrashitem(crashitem, report, sched):
335+
"""
336+
Return the crashitem from pending and collection.
337+
"""
338+
db = sched.config.failures_db
339+
reruns = db.get_test_reruns(crashitem)
340+
if db.get_test_failures(crashitem) < reruns:
341+
sched.mark_test_pending(crashitem)
342+
report.outcome = "rerun"
343+
344+
db.add_test_failure(crashitem)
345+
346+
347+
# An in-memory db residing in the master that records
348+
# the number of reruns (set before test setup)
349+
# and failures (set after each failure or crash)
350+
# accessible from both the master and worker
351+
class StatusDB:
352+
def __init__(self):
353+
self.delim = b"\n"
354+
self.hmap = {}
355+
356+
def _hash(self, crashitem: str) -> str:
357+
if crashitem not in self.hmap:
358+
self.hmap[crashitem] = hashlib.sha1(
359+
crashitem.encode(),
360+
).hexdigest()[:10]
361+
362+
return self.hmap[crashitem]
363+
364+
def add_test_failure(self, crashitem):
365+
hash = self._hash(crashitem)
366+
failures = self._get(hash, "f")
367+
failures += 1
368+
self._set(hash, "f", failures)
369+
370+
def get_test_failures(self, crashitem):
371+
hash = self._hash(crashitem)
372+
return self._get(hash, "f")
373+
374+
def set_test_reruns(self, crashitem, reruns):
375+
hash = self._hash(crashitem)
376+
self._set(hash, "r", reruns)
377+
378+
def get_test_reruns(self, crashitem):
379+
hash = self._hash(crashitem)
380+
return self._get(hash, "r")
381+
382+
# i is a hash of the test name, t_f.py::test_t
383+
# k is f for failures or r for reruns
384+
# v is the number of failures or reruns (an int)
385+
def _set(self, i: str, k: str, v: int):
386+
pass
387+
388+
def _get(self, i: str, k: str) -> int:
389+
return 0
390+
391+
392+
class SocketDB(StatusDB):
393+
def __init__(self):
394+
super().__init__()
395+
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
396+
self.sock.setblocking(1)
397+
398+
def _sock_recv(self, conn) -> str:
399+
buf = b""
400+
while True:
401+
b = conn.recv(1)
402+
if b == self.delim:
403+
break
404+
buf += b
405+
406+
return buf.decode()
407+
408+
def _sock_send(self, conn, msg: str):
409+
conn.send(msg.encode() + self.delim)
410+
411+
412+
class ServerStatusDB(SocketDB):
413+
def __init__(self):
414+
super().__init__()
415+
self.sock.bind(("", 0))
416+
self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
417+
418+
self.rerunfailures_db = {}
419+
t = threading.Thread(target=self.run_server, daemon=True)
420+
t.start()
421+
422+
@property
423+
def sock_port(self):
424+
return self.sock.getsockname()[1]
425+
426+
def run_server(self):
427+
self.sock.listen()
428+
while True:
429+
conn, _ = self.sock.accept()
430+
t = threading.Thread(target=self.run_connection, args=(conn,), daemon=True)
431+
t.start()
432+
433+
def run_connection(self, conn):
434+
with suppress(ConnectionError):
435+
while True:
436+
op, i, k, v = self._sock_recv(conn).split("|")
437+
if op == "set":
438+
self._set(i, k, int(v))
439+
elif op == "get":
440+
self._sock_send(conn, str(self._get(i, k)))
441+
442+
def _set(self, i: str, k: str, v: int):
443+
if i not in self.rerunfailures_db:
444+
self.rerunfailures_db[i] = {}
445+
self.rerunfailures_db[i][k] = v
446+
447+
def _get(self, i: str, k: str) -> int:
448+
try:
449+
return self.rerunfailures_db[i][k]
450+
except KeyError:
451+
return 0
452+
453+
454+
class ClientStatusDB(SocketDB):
455+
def __init__(self, sock_port):
456+
super().__init__()
457+
self.sock.connect(("localhost", sock_port))
458+
459+
def _set(self, i: str, k: str, v: int):
460+
self._sock_send(self.sock, "|".join(("set", i, k, str(v))))
461+
462+
def _get(self, i: str, k: str) -> int:
463+
self._sock_send(self.sock, "|".join(("get", i, k, "")))
464+
return int(self._sock_recv(self.sock))
465+
466+
305467
def pytest_runtest_protocol(item, nextitem):
306468
"""
307469
Run the test protocol.
@@ -319,8 +481,14 @@ def pytest_runtest_protocol(item, nextitem):
319481
# first item if necessary
320482
check_options(item.session.config)
321483
delay = get_reruns_delay(item)
322-
parallel = hasattr(item.config, "slaveinput") or hasattr(item.config, "workerinput")
323-
item.execution_count = 0
484+
parallel = not is_master(item.config)
485+
item_location = (item.location[0] + "::" + item.location[2]).replace("\\", "/")
486+
db = item.session.config.failures_db
487+
item.execution_count = db.get_test_failures(item_location)
488+
db.set_test_reruns(item_location, reruns)
489+
490+
if item.execution_count > reruns:
491+
return True
324492

325493
need_to_run = True
326494
while need_to_run:

test_pytest_rerunfailures.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
import pytest
66
from pkg_resources import parse_version
77

8+
from pytest_rerunfailures import HAS_PYTEST_HANDLECRASHITEM
9+
810

911
pytest_plugins = "pytester"
1012

1113
PYTEST_GTE_60 = parse_version(pytest.__version__) >= parse_version("6.0")
12-
1314
PYTEST_GTE_61 = parse_version(pytest.__version__) >= parse_version("6.1")
15+
has_xdist = HAS_PYTEST_HANDLECRASHITEM and PYTEST_GTE_61
1416

1517

1618
def temporary_failure(count=1):
@@ -23,6 +25,17 @@ def temporary_failure(count=1):
2325
raise Exception('Failure: {{0}}'.format(count))"""
2426

2527

28+
def temporary_crash(count=1):
29+
return f"""
30+
import py
31+
import os
32+
path = py.path.local(__file__).dirpath().ensure('test.res')
33+
count = path.read() or 1
34+
if int(count) <= {count}:
35+
path.write(int(count) + 1)
36+
os._exit(1)"""
37+
38+
2639
def check_outcome_field(outcomes, field_name, expected_value):
2740
field_value = outcomes.get(field_name, 0)
2841
assert field_value == expected_value, (
@@ -168,6 +181,23 @@ def test_pass():
168181
assert_outcomes(result, passed=1, rerun=1)
169182

170183

184+
@pytest.mark.skipif(not has_xdist, reason="requires xdist with crashitem")
185+
def test_rerun_passes_after_temporary_test_crash(testdir):
186+
# note: we need two tests because there is a bug where xdist
187+
# cannot rerun the last test if it crashes. the bug exists only
188+
# in xdist is there is no error that causes the bug in this plugin.
189+
testdir.makepyfile(
190+
f"""
191+
def test_crash():
192+
{temporary_crash()}
193+
194+
def test_pass():
195+
pass"""
196+
)
197+
result = testdir.runpytest("-n", "1", "--reruns", "1", "-r", "R")
198+
assert_outcomes(result, passed=2, rerun=1)
199+
200+
171201
def test_rerun_passes_after_temporary_test_failure_with_flaky_mark(testdir):
172202
testdir.makepyfile(
173203
f"""

tox.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ minversion = 3.17.1
1818
[testenv]
1919
commands = pytest test_pytest_rerunfailures.py {posargs}
2020
deps =
21+
pytest-xdist
2122
pytest53: pytest==5.3.*
2223
pytest54: pytest==5.4.*
2324
pytest60: pytest==6.0.*

0 commit comments

Comments
 (0)