a7ex · a7ex · Jun 30, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## Version 2.1.0 - 2026-06-24
+### CHANGES:
+- Detect flaky/mixed tests (failed on a first attempt but passed on retry) from the per-repetition results in the xcresult bundle, including repeated test cases, per-device runs, and parameterized test arguments.
+- In `txt`, `cli`, `html`, and `md` output, flaky tests are shown with a distinct flaky icon (🟠 in markdown, ⚠︎ otherwise) instead of the failure icon.
+- In `junit` output, a flaky test still counts as a failure but gains a `flaky="true"` attribute and a `[FLAKY]` prefix on each failure message.
+
 ## Version 2.0.0 - 2026-03-14
 ### CHANGES:
 - Fix issue #65: 'Session-level issues' / 'Issues recorded without an associated test or suite' are now listed in test results

diff --git a/CommandlineTool/main.swift b/CommandlineTool/main.swift
@@ -9,7 +9,7 @@ import ArgumentParser
 import Foundation
 import XcresultparserLib
 
-private let marketingVersion = "2.0.1"
+private let marketingVersion = "2.1.0"
 
 struct xcresultparser: ParsableCommand {
     static let configuration = CommandConfiguration(

diff --git a/Package.swift b/Package.swift
@@ -58,7 +58,8 @@ let package = Package(
                 .copy("TestAssets/sonarTestExecutionWithProjectRootRelative.xml"),
                 .copy("TestAssets/parametrized.xcresult"),
                 .copy("TestAssets/session_level_failure.xcresult"),
-                .copy("TestAssets/junit_session_level_failure.xml")
+                .copy("TestAssets/junit_session_level_failure.xml"),
+                .copy("TestAssets/Test-FlakyFixture.xcresult")
             ]
         )
     ]

diff --git a/README.md b/README.md
@@ -28,6 +28,12 @@ All JSON parsing is done with native `Codable` models in this project.
 - For `txt`, `cli`, `html`, and `md`, expected failures are represented as a distinct test state.
 - For `junit` and sonar test execution `xml`, expected failures are emitted as regular passing test cases for schema compatibility.
 
+## Flaky Test Detection
+A test that failed on a first attempt but passed on retry (a "flaky"/"mixed" result) is detected from the per-repetition results in the xcresult bundle. This works for repeated test cases, per-device runs, and parameterized test arguments.
+- For `txt`, `cli`, `html`, and `md`, a flaky test is shown with a distinct flaky icon (🟠 in markdown, ⚠︎ otherwise) instead of the failure icon.
+- For `junit`, the test still counts as a failure: the `<testcase>` keeps its `<failure>` children but gains a `flaky="true"` attribute and a `[FLAKY]` prefix on each failure message.
+- Flaky tests still count as failures for roll-up, counts, and `--failed-tests-only`; they are only labeled differently in the rendered output.
+
 <details>
   <summary>More on converting code coverage data</summary>
 

diff --git a/Sources/xcresultparser/DataProviders/JunitXML/JunitXMLDataProviding.swift b/Sources/xcresultparser/DataProviders/JunitXML/JunitXMLDataProviding.swift
@@ -48,6 +48,10 @@ struct JunitTest {
     let duration: Double?
     let isFailed: Bool
     let isSkipped: Bool
+    /// `true` when the test failed on a first attempt but passed on retry.
+    /// The overall result (failed or, if the retry recovered, passed) is kept;
+    /// the test is only additionally labeled as flaky.
+    var isFlaky: Bool = false
 }
 
 struct JunitFailureSummary {

diff --git a/Sources/xcresultparser/DataProviders/JunitXML/XCResultToolJunitXMLDataProvider.swift b/Sources/xcresultparser/DataProviders/JunitXML/XCResultToolJunitXMLDataProvider.swift
@@ -138,7 +138,8 @@ struct XCResultToolJunitXMLDataProvider: JunitXMLDataProviding {
                     name: mappedArgumentTest.name,
                     duration: mappedArgumentTest.duration,
                     isFailed: mappedArgumentTest.result == .failed,
-                    isSkipped: mappedArgumentTest.result == .skipped
+                    isSkipped: mappedArgumentTest.result == .skipped,
+                    isFlaky: mappedArgumentTest.isFlaky
                 )
             }
         )
@@ -173,7 +174,8 @@ struct XCResultToolJunitXMLDataProvider: JunitXMLDataProviding {
             name: node.name,
             duration: node.durationInSeconds,
             isFailed: result == .failed,
-            isSkipped: result == .skipped
+            isSkipped: result == .skipped,
+            isFlaky: node.isFlaky
         )
     }
 

diff --git a/Sources/xcresultparser/JunitXML.swift b/Sources/xcresultparser/JunitXML.swift
@@ -297,9 +297,30 @@ public struct JunitXML: XmlSerializable {
         } else if test.isSkipped {
             testcase.addChild(skippedWithoutSummary)
         }
+        if test.isFlaky {
+            markTestcaseAsFlaky(testcase)
+        }
         return testcase
     }
 
+    // Labels a `<testcase>` as flaky (failed on first attempt, passed on retry)
+    // without changing its pass/fail semantics: it gains a `flaky="true"` marker.
+    //
+    // - When the overall result is failed, the `<testcase>` keeps its `<failure>`
+    //   children so it still counts as a failure, and each failure message gains
+    //   a `[FLAKY]` prefix.
+    // - When the overall result is passed (the retry recovered and Xcode
+    //   aggregated it as a pass), it has no `<failure>` children, so only the
+    //   `flaky="true"` marker distinguishes it from a clean pass.
+    private func markTestcaseAsFlaky(_ testcase: XMLElement) {
+        testcase.addAttribute(name: "flaky", stringValue: "true")
+        for case let failure as XMLElement in testcase.children ?? [] where failure.name == "failure" {
+            let labeled = "[FLAKY] " + (failure.attribute(forName: "message")?.stringValue ?? "passed on retry")
+            failure.removeAttribute(forName: "message")
+            failure.addAttribute(name: "message", stringValue: labeled)
+        }
+    }
+
     private var failureWithoutSummary: XMLElement {
         return XMLElement(name: "failure")
     }

diff --git a/Sources/xcresultparser/Models/XCResultToolModels/XCTestNode+Extensions.swift b/Sources/xcresultparser/Models/XCResultToolModels/XCTestNode+Extensions.swift
@@ -11,6 +11,7 @@ extension XCTestNode {
         let name: String
         let duration: TimeInterval?
         let result: XCTestResult
+        let isFlaky: Bool
     }
 
     func mapArgumentTest(argument: XCTestNode, testClassName: String?) -> MappedArgumentTest {
@@ -23,9 +24,31 @@ extension XCTestNode {
             identifier: baseIdentifier.formatWithParameter(argument.name),
             name: name.formatWithParameter(argument.name),
             duration: argument.durationInSeconds ?? durationInSeconds,
-            result: argument.result ?? result ?? .unknown
+            result: argument.result ?? result ?? .unknown,
+            isFlaky: argument.isFlaky
         )
     }
+
+    /// The results of all `Repetition` nodes nested below this node.
+    ///
+    /// Descends through intermediate nodes (e.g. `Test Case Run` for per-device
+    /// runs) so that repetitions are found regardless of how deeply Xcode nests
+    /// them under a test case or an argument.
+    var repetitionResults: [XCTestResult] {
+        (children ?? []).flatMap { child -> [XCTestResult] in
+            child.nodeType == .repetition
+                ? [child.result ?? .unknown]
+                : child.repetitionResults
+        }
+    }
+
+    /// `true` when the test was retried and recovered: at least one repetition
+    /// passed while at least one other repetition failed. Such a test is
+    /// "flaky"/"mixed" rather than a clean pass or a clean failure.
+    var isFlaky: Bool {
+        let results = repetitionResults
+        return results.contains(.passed) && results.contains(.failed)
+    }
 }
 
 extension [XCTestNode] {

diff --git a/Sources/xcresultparser/OutputFormatting/Formatters/Markdown/MDResultFormatter.swift b/Sources/xcresultparser/OutputFormatting/Formatters/Markdown/MDResultFormatter.swift
@@ -19,6 +19,9 @@ public struct MDResultFormatter: XCResultFormatting {
     public var testExpectedFailureIcon: String {
         return forGithub ? "🟡 " : "🟡&nbsp;&nbsp;"
     }
+    public var testFlakyIcon: String {
+        return forGithub ? "🟠 " : "🟠&nbsp;&nbsp;"
+    }
 
     private let forGithub: Bool
 

diff --git a/Sources/xcresultparser/OutputFormatting/Formatters/XCResultFormatting.swift b/Sources/xcresultparser/OutputFormatting/Formatters/XCResultFormatting.swift
@@ -28,6 +28,7 @@ public protocol XCResultFormatting {
     var testPassIcon: String { get }
     var testSkipIcon: String { get }
     var testExpectedFailureIcon: String { get }
+    var testFlakyIcon: String { get }
 
     func codeCoverageTargetSummary(_ item: String) -> String
     func codeCoverageFileSummary(_ item: String) -> String
@@ -50,4 +51,8 @@ public extension XCResultFormatting {
     var testExpectedFailureIcon: String {
         return "!"
     }
+
+    var testFlakyIcon: String {
+        return "⚠︎"
+    }
 }
diff --git a/Sources/xcresultparser/XCResultFormatter.swift b/Sources/xcresultparser/XCResultFormatter.swift
@@ -30,7 +30,7 @@ public struct XCResultFormatter {
         let subtestGroups: [FormattedTestGroup]
 
         var hasFailedTests: Bool {
-            if subtests.contains(where: \.isFailed) {
+            if subtests.contains(where: \.countsAsFailure) {
                 return true
             }
             if subtestGroups.contains(where: \.hasFailedTests) {
@@ -49,6 +49,7 @@ public struct XCResultFormatter {
         case failed
         case skipped
         case expectedFailure
+        case flaky
     }
 
     private struct FormattedTest {
@@ -61,6 +62,17 @@ public struct XCResultFormatter {
             status == .failed
         }
 
+        var isFlaky: Bool {
+            status == .flaky
+        }
+
+        // A flaky test recovered on retry but is still treated as a failure for
+        // roll-up, counting and `--failed-tests-only` purposes - it is only
+        // labeled differently in the rendered output.
+        var countsAsFailure: Bool {
+            status == .failed || status == .flaky
+        }
+
         var isSkipped: Bool {
             status == .skipped
         }
@@ -401,7 +413,7 @@ public struct XCResultFormatter {
             lines.append(outputFormatter.accordionOpenTag)
         }
         for thisTest in group.subtests {
-            if !failedTestsOnly || thisTest.isFailed {
+            if !failedTestsOnly || thisTest.countsAsFailure {
                 lines.append(
                     actionTestFileStatusString(
                         for: thisTest,
@@ -456,6 +468,10 @@ public struct XCResultFormatter {
             return outputFormatter.testSkipIcon
         }
 
+        if testData.isFlaky {
+            return outputFormatter.testFlakyIcon
+        }
+
         return outputFormatter.testFailIcon
     }
 
@@ -616,7 +632,7 @@ public struct XCResultFormatter {
                     identifier: mappedArgumentTest.identifier,
                     name: mappedArgumentTest.name,
                     duration: mappedArgumentTest.duration,
-                    status: testStatus(for: mappedArgumentTest.result)
+                    status: mappedArgumentTest.isFlaky ? .flaky : testStatus(for: mappedArgumentTest.result)
                 )
             }
         )
@@ -646,7 +662,7 @@ public struct XCResultFormatter {
             identifier: identifier,
             name: node.name,
             duration: node.durationInSeconds,
-            status: testStatus(for: result)
+            status: node.isFlaky ? .flaky : testStatus(for: result)
         )
     }