diff --git a/.gitignore b/.gitignore
index 21cecc927..97859a175 100644
--- a/.gitignore
+++ b/.gitignore
@@ -55,6 +55,7 @@ tests/SIL.Machine.Tests/Corpora/TestData/usfm/target/*
 tests/SIL.Machine.Tests/Corpora/TestData/project/*
 tests/SIL.Machine.Tests/Corpora/TestData/pretranslations.json
 .idea
+.worktrees
 
 # Local-only HermitCrab benchmark fixtures (real Sena/Indonesian grammars + word lists, used
 # for ad hoc perf/allocation testing) + FieldWorks project backups. Large and/or not licensed
diff --git a/docs/hermitcrab-parse-algorithm-analysis.md b/docs/hermitcrab-parse-algorithm-analysis.md
new file mode 100644
index 000000000..1149ebfea
--- /dev/null
+++ b/docs/hermitcrab-parse-algorithm-analysis.md
@@ -0,0 +1,360 @@
+# Where the 15 million steps go: an algorithmic dissection of HermitCrab parsing
+
+This document dissects, empirically and against the literature, why a single legitimate Sena
+word costs the HermitCrab engine millions of rule applications, and identifies the specific
+redundancies that could be removed **without constraining the grammar and without losing valid
+parses**. It is the analysis companion to `complexity-cap.md` (which bounds the damage) and
+`docs/hermitcrab-grammar-performance.md` (which helps grammar authors avoid the damage). This
+document is about making the engine itself stop doing provably repeated work.
+
+All numbers below are from the real Sena grammar (`samples/data/sena-hc.xml`, ~33k lines, two
+`morphologicalRuleOrder="unordered"` strata, 25 morphological rules + ~19 multi-slot affix
+templates in the main stratum), measured with an instrumented harness that replicates
+`Morpher.ParseWordCore` exactly and swaps in a behavior-identical, counting clone of the
+analysis cascade. A "step" is one rule-application attempt (`ParseContext.Step`), the same unit
+`MaxParseSteps` budgets.
+
+## 1. The headline data
+
+Two worst-case words, dissected end to end:
+
+| | `atawirambo` (parses, 2 results) | `cinacemerwa` (fails, 0 results) |
+|---|---|---|
+| Total steps | 14,905,517 | 37,543,196 |
+| Analysis phase steps | 14,202,364 (95.3%) | 29,494,226 (78.6%) |
+| Analysis candidates produced | 41 | 41 |
+| — of which reach any lexical root | 4 | 5 |
+| — of which yield a parse | 2 | 0 |
+| Synthesis phase steps | 703,153 | 8,048,970 |
+| Synthesis inputs after `ExpandAlternatives` | 17,699 | 218,847 |
+| Cascade node expansions (main stratum) | 158,227 | 523,773 |
+| Unique states — (shape, rule-multiset) | 1,626 | 12,168 |
+| Unique states — + syntacticFS in key | 2,546 | (not measured) |
+| **Redundant re-expansions** | **98.4% of tree** | **97.7% of tree** |
+
+Three facts jump out:
+
+1. **The cost is analysis, not synthesis.** 79–95% of all steps are spent unapplying
+   morphological rules to hypothesize underlying forms — producing just 41 candidates, of which
+   only 4–5 ever match a lexical root.
+
+2. **The analysis tree is ~98% transpositions.** The cascade re-expands states it has already
+   fully explored. One state (`shape='t'`, a 12-affix multiset) was re-expanded **7,200 times**
+   for `atawirambo`. These are order-variants: unapplying prefix `a-` then suffix `-mbo` vs.
+   `-mbo` then `a-` reaches the same (shape, remaining-rules) state, and the engine explores the
+   entire subtree below it again each time.
+
+3. **Nothing prunes hopeless work.** The most expensive word in the corpus (`cinacemerwa`,
+   37.5M steps) returns *zero* parses: 218,847 fully-synthesized candidate words, every one
+   failing at the end-of-pipeline checks (surface match / `IsWordValid`). The engine has no
+   notion of "this branch can no longer succeed."
+
+## 2. The combinatorial structure, precisely
+
+### 2.1 Analysis: all *orderings*, deduped only at the end
+
+For `unordered` strata (both Sena strata), analysis morphology runs through
+`CombinationRuleCascade` with `multiApp: true`
+(`AnalysisStratumRule.cs:50-71` → `CombinationRuleCascade.cs:32-54`). In that mode the recursion
+restarts at rule index 0 on **every** level: the search enumerates all ordered sequences (with
+repetition, bounded per-rule by `MaxApplicationCount`, default 1) of rule unapplications. For a
+word where k independent affixes can strip, that is O(k!) paths to the same end state, not
+O(2^k) states.
+
+Each node expansion attempts the **entire rule battery** — visible in the per-rule diagnostics
+as bands of rules with *identical* attempt counts (14 prefix rules × 319,267 attempts, 30 rules
+× 158,227 attempts = one attempt per rule per node). Every attempt costs one step plus, if the
+rule's syntactic gates pass, a full-shape anchored FST match per allomorph
+(`AnalysisAffixProcessRule.cs:61-64`, `MatchingMethod.Unification`, `AllSubmatches: true`), and
+every successful unapplication deep-clones the `Word` including its `Shape`
+(`AnalysisAffixProcessAllomorphRuleSpec.ApplyRhs`).
+
+Deduplication exists but fires **after the work is done**:
+
+- Each cascade's terminal `HashSet<Word>` collapses equal results — but
+  `Word.ValueEquals` (`Word.cs:583-600`) includes the `_mruleApps` **sequence**, so two
+  orderings of the same affix set are *not* equal and are both kept, and in any case the
+  HashSet dedups storage, not the recomputation that produced the duplicate.
+- `MergeEquivalentAnalyses` (`AnalysisStratumRule.cs:140-178`) merges by **shape only**, at the
+  stratum output boundary — after the tree has been fully walked. The merged variants are
+  stashed in `Word.Alternatives`… and then `ExpandAlternatives` (`Word.cs:452-494`)
+  re-materializes every one of them as a separate synthesis input. Merging defers the
+  explosion; it does not remove it (16,330 alternatives for one candidate of `atawirambo`;
+  98,197 for one candidate of `cinacemerwa`).
+
+On top of the cascade, templates and morphological rules mutually recurse
+(`AnalysisStratumRule.cs:188-230`): every cascade output gets the full template battery applied,
+and every template output re-enters the full cascade — again with no memoization, which is why
+total analysis steps (14.2M) are ~3.6× the cascade-internal rule attempts (3.96M).
+
+### 2.2 Synthesis: a directed replay that still scans the whole battery
+
+Synthesis is *not* a search — each analysis trail dictates the exact rule sequence, gated by
+`IsMorphologicalRuleApplicable` (`Word.cs:269-276`: the next pending rule must equal the rule
+being tried). But the `CombinationRuleCascade` used for unordered synthesis
+(`SynthesisStratumRule.cs:35`) still **attempts all ~40 rules at every node** and lets the gate
+reject 39 of them, one step each: every rule shows exactly 17,877 synthesis attempts for
+`atawirambo`'s 17,699 synthesis inputs. The engine already knows the one rule that can apply
+(`_mruleApps[_mruleAppIndex]`); it looks for it by exhaustive scan.
+
+And the expensive correctness checks run dead last: allomorph environments, allomorph/morpheme
+co-occurrence, disjunctive allomorph selection, and the surface-form match are all evaluated
+only after the entire synthesis cascade has produced a finished word
+(`Allomorph.IsWordValid`, `Morpher.IsWordValid`, `Morpher.IsMatch` — `Morpher.cs:711-753`).
+`cinacemerwa` synthesized 218,847 complete words and threw away every single one at that final
+stage.
+
+## 3. What the literature says
+
+The most striking finding is internal: **HermitCrab's founding paper already solved this
+problem, by packing rather than forking.** Maxwell (1994) — the original Hermit Crab design
+(Michael Maxwell's, not David Weber's; Weber's tools are AMPLE/STAMP) — avoids exponential
+analysis explicitly *"by encoding into the form being parsed the ambiguities which arise
+during parsing"*: rule unapplication uninstantiates features and marks undone
+deletions/epentheses `[+optional]`, producing **one underspecified shape that denotes the whole
+candidate set**, with lexical lookup as unification against it. The .NET implementation keeps
+this for phonology (`AnalysisRewriteRule` mutates one shape in place, which is why phonological
+rules are invisible in the step counters) but forks a concrete `Word` per choice at the
+morphological level — losing the design's central invariant exactly where Bantu grammars
+multiply. Maxwell quotes Anderson (1988): with realistic rule depth, "simply undoing the
+effects of the rules… [is] quite impractical" if candidates multiply. The measured
+98%-transposition tree is that prediction come true. (Historically, Hermit Crab benchmarked
+within ~3× of PC-KIMMO when ambiguity stayed *in the form* rather than in the agenda.)
+
+The rest of the (verified) literature converges on the same handful of completeness-safe
+mechanisms:
+
+1. **The complexity is real but local.** Two-level morphological recognition is NP-complete in
+   general, PSPACE-complete with unrestricted deletion (Barton 1986; Barton, Berwick & Ristad
+   1987) — so no restructuring gives a polynomial worst case, and the budget/soft-stop outer
+   net stays. But the hardness is driven by *"local rather than global ambiguity"*, and
+   Koskenniemi & Church (1988) locate the exponent precisely: parse cost is linear in word
+   length and exponential in the number of **unresolved choice points that coexist before the
+   first lexical anchor** — regressive-harmony prefixes in their data; subject/tense/object
+   prefix slots before the verb root in Sena.
+
+2. **"Overanalysis" and its two published cures.** Unapply-everything-then-look-up is what
+   Karttunen & Beesley call the overanalysis problem. Cure (a): **interleave lexical lookup
+   with analysis** (Koskenniemi's tandem lookup "does not pursue analyses that have no matching
+   lexical path"); sound whenever the lexical filter over-approximates the lexicon. Cure (b):
+   compose lexicon and rules at compile time — the FST endgame, out of scope here. Notably,
+   rule *composition alone does not help*: "the ambiguity remains" (Karttunen & Beesley) —
+   only lexicon information and state merging shrink the candidate set.
+
+3. **Memoization is compatible with exact all-parses output.** Memoizing a backtracking parser
+   keyed on state yields chart-parser complexity (Norvig 1991); Earley deduction / tabling
+   (Johnson 1995; Shieber et al. 1995) gives the answer-complete discipline for it (memo entry
+   = subscribers + answers; converging searches subscribe instead of recomputing). The exact
+   model-counting literature (Sang et al. 2004; Bacchus et al. 2009) proves caching coexists
+   with exhaustive (not just best-first) semantics. The game-search literature contributes the
+   key-design discipline (Kishimoto & Müller 2004: keys must contain exactly what the remaining
+   computation reads — a full-path key blew up searches 1000×).
+
+4. **Dead-end pruning is unambiguously sound.** Nogood recording / UNSAT-component caching
+   (Dechter & Mateescu 2007; Sang et al. 2004): discarding states proven to yield zero
+   completions can never lose a parse. The boolean residue of A* heuristics — precomputed
+   necessary conditions for *any* completion to exist — is the admissible-pruning transfer
+   (Klein & Manning 2003); best-first *ordering* itself buys nothing when running to
+   exhaustion.
+
+5. **Packed representations are guaranteed to exist.** Rewrite-rule cascades denote regular
+   relations (Johnson 1972; Kaplan & Kay 1994), so for a fixed surface word the candidate set
+   is a regular language — representable as a lattice/DAG where each rule applies once to the
+   whole structure (polynomial in lattice size), instead of once per enumerated path. Shared
+   forests with tail sharing (Billot & Lang 1989; Tomita-style local-ambiguity packing) are the
+   grammar-level version; AND/OR search with context-based merging (Dechter & Mateescu 2007)
+   the search-level one. HFST optimized-lookup demonstrates the endpoint: cost bounded by
+   distinct (position, state) pairs, not derivation paths.
+
+6. **What does NOT transfer:** classical dominance pruning and symmetry breaking keep one
+   representative per equivalence orbit — sound only for optimization/best-parse, unsound for
+   literally-all-parses unless the merged items are provably output-identical (which is just
+   deduplication); Viterbi-style weighted DP is best-parse machinery.
+
+7. **Field precedent.** The FLEx mailing list documents this exact pain (Awetí words at ~9-20
+   minutes), fixed until now only by hand-editing grammars (Andy Black's audit took a word from
+   ~9 min to ~100 s). Maxwell (1998) shows IA (listed-allomorph) and IP (rule) descriptions are
+   mechanically interconvertible — precedent for precompiling cheap rules into listed
+   allomorphs. No published engine-side fix exists; this analysis + `GrammarAnalyzer` would be
+   the first citable treatment.
+
+## 4. Concrete opportunities, ranked
+
+Ranking merges the empirical measurements (§1–§2) with the literature's soundness analysis
+(§3). The first three are engine changes with no formalism impact and no lost parses; the
+later ones are progressively larger architectural moves.
+
+### 4.1 Transposition table over analysis states (~50–100× on the dominant phase)
+
+Key: `(shape, per-rule unapplication counts, SyntacticFeatureStruct, stratum)` — measured
+98.4% hit rate on `atawirambo`, 97.7% on `cinacemerwa`. Two designs:
+
+- **Conservative (output-identical):** memo value = the set of (result `Word`, trail-suffix)
+  continuations discovered below the state; on a revisit, replay the continuations onto the
+  new prefix trail (cheap list operations — no FST matching, no shape cloning). Produces
+  byte-identical output including all order-variant trails and traces.
+- **Aggressive (canonical trails):** for `unordered` strata, record the trail as a canonical
+  multiset and stop generating order-variants entirely; synthesis gates on multiset membership
+  instead of sequence position. Semantically defensible — "unordered" means order is not
+  linguistically meaningful — and collapses `ExpandAlternatives` too, but changes trace output
+  and needs corpus-level verification that parse *results* are unchanged.
+
+The conservative design alone converts the 158,227-expansion tree into a 2,546-expansion DAG.
+
+Key-design discipline from the literature (the "GHI problem" in game search): the memo key must
+contain **exactly** what the remaining computation can read — here that means the shape, the
+per-rule unapplication counts (they gate `MaxApplicationCount`), the syntactic FS (it gates
+`OutSyntacticFeatureStruct.IsUnifiable`), and for compounding the non-head state; but *not* the
+trail order. Keying on too much (e.g. the full trail) silently degrades the hit rate back to
+zero. The measured 1,626 → 2,546 state growth when adding the FS to the key shows the FS
+splits few states in practice — cheap to include, and required for soundness.
+
+The **cheapest first slice** of this is a *nogood cache* only: record states whose subtree
+yielded zero results, skip them on revisit. No continuation replay, no trail bookkeeping,
+trivially sound (discarding a zero-completion branch can never lose a parse). Since failure is
+the overwhelmingly common case (only 4/41 candidates ever reach the lexicon), most of the
+98.4% redundancy is *failed* subtrees re-searched — a nogood-only table captures most of the
+win for a fraction of the implementation risk. The tabling literature's discipline applies on
+upgrade to a full memo: a memo entry holds subscribers + answers, and a search converging on an
+in-flight entry subscribes rather than recomputing.
+
+### 4.2 Early lexical intersection — "tandem lookup" (the literature's decisive fix)
+
+37 of 41 `atawirambo` candidates never matched any lexical root, and the tree that produced
+them is the entire cost. This is Karttunen & Beesley's "overanalysis" problem, and the
+published cure that doesn't require FST compilation is Koskenniemi's tandem lookup: consult the
+lexicon *during* analysis and refuse to pursue hypotheses no lexical path can complete.
+Soundness condition: the filter must **over-approximate** the lexicon (only kill hypotheses
+that could never survive lookup), which tolerates underspecified segments conservatively.
+
+Concretely: if every remaining unappliable rule only strips edge material (true for ordinary
+affix rules — verifiable statically per grammar by `GrammarAnalyzer`), then a candidate can
+only ever reach roots already present inside its current shape. Precompute a substring index
+over root allomorphs (Aho-Corasick / suffix automaton, matching at the natural-class level so
+underspecified nodes over-approximate); prune any branch whose shape contains no possible
+root. This attacks the exponent the literature identifies — unresolved prefix choice points
+stacking up *before the search ever touches the root* — and AMPLE's dictionary-first
+architecture is the existence proof that the same grammar content can be searched
+lexicon-anchored.
+
+### 4.3 Direct rule indexing in synthesis (~40× on synthesis steps)
+
+Unordered synthesis knows the single rule that can apply next; replace the scan-all-rules
+cascade with a `Dictionary<IMorphologicalRule, IRule<Word,int>>` lookup (compounding-rule
+`null` entries fall back to the scan). Behavior-identical by construction: the 39 skipped
+attempts are exactly the ones `IsMorphologicalRuleApplicable` rejects today. Turns
+`cinacemerwa`'s 8.0M synthesis steps into ~200K.
+
+### 4.4 Early constraint checking in synthesis
+
+Allomorph environment and co-occurrence constraints that are already decidable mid-derivation
+(the environment's context is fully inside an already-built portion of the word, morphemes
+already placed) could fail candidates before the rest of the cascade runs, instead of at
+`IsWordValid`. Requires care with material later phonological rules could still change; the
+statically-safe subset is identifiable per grammar (`GrammarAnalyzer` again).
+
+### 4.5 Rule-battery prefiltering in analysis (constant factor)
+
+At every analysis node all 25+ rules are attempted; most fail their anchored FST match
+immediately. An index from edge-segment natural classes to the affix rules whose patterns could
+possibly match (AMPLE-style position/anchor indexing) skips guaranteed-miss attempts without
+changing semantics.
+
+### 4.6 Cross-word memoization (corpus-scale extension)
+
+The transposition state contains no reference to the original surface word — states like
+`('t', {12 affixes})` recur across *words*. A bounded (LRU) cross-word memo could make
+"Parse All Words" batch runs dramatically sublinear in practice. Interaction with per-parse
+`ParseContext` budgets needs design; flagged as an extension, not a first step.
+
+### 4.7 Packed candidate representation (the endgame short of full FST)
+
+Restore Maxwell's original invariant at the morphological level: represent the analysis
+candidate set as a shared lattice/DAG (guaranteed to exist — the candidate set of a
+rewrite-rule cascade over a fixed surface form is a regular language, Kaplan & Kay 1994),
+where each rule stage applies once to the whole structure and equal states merge (the foma/HFST
+habit of determinize-minimize between stages, transplanted). `Word.Alternatives` +
+`ExpandAlternatives` is a half-built version of this — it packs (by shape, at stratum
+boundaries) but then fully unpacks before synthesis. Making synthesis verify *lattice nodes*
+instead of expanded candidates is the biggest win and the biggest change; it converges with
+the separate FST effort and should be weighed against it rather than built independently.
+
+### Priorities
+
+1. **4.1 nogood slice** — cheapest, trivially sound, captures most of the measured 98%.
+2. **4.1 full memo + 4.3 synthesis rule indexing** — mechanical, output-identical.
+3. **4.2 tandem lexical intersection** — the decisive fix per the literature; needs the
+   `GrammarAnalyzer` edge-stripper check.
+4. **4.4 / 4.5 invariants and prefilters** — constant factors, fit the existing lint.
+5. **4.6 / 4.7** — corpus-scale and architectural endgames, coordinate with the FST effort.
+
+The complexity cap (`complexity-cap.md`) stays regardless: the worst case is NP-complete
+(PSPACE-complete with unrestricted deletion), so a budget outer net is formally motivated, and
+Barton's "bounded nulls" + Maxwell's own "unapply a deletion rule only N times" sanction the
+existing `DeletionReapplications`/`MaxAnalysisShapeGrowth` knobs as part of the formalism, not
+an apology.
+
+## 5. Sources
+
+Primary sources verified against fetched text by the research pass (adversarial spot-checks
+6/6 confirmed):
+
+- M. Maxwell (1994), *Parsing Using Linearly Ordered Phonological Rules* — the original Hermit
+  Crab: packing ambiguity into underspecified forms. https://arxiv.org/abs/cmp-lg/9411015
+- M. Maxwell (1991), *Phonological Analysis and Opaque Rule Orders*, IWPT-2.
+  https://aclanthology.org/1991.iwpt-1.13/ (overgeneration bound; full text not yet retrieved)
+- M. Maxwell (1998), *Two Theories of Morphology, One Implementation*, SILEWP 1998-001.
+  https://www.sil.org/resources/publications/entry/7814
+- G.E. Barton (1986), *Computational Complexity in Two-Level Morphology*, ACL.
+  https://aclanthology.org/P86-1009.pdf; and *Constraint Propagation in KIMMO Systems*, ACL.
+  https://aclanthology.org/P86-1008.pdf; Barton, Berwick & Ristad (1987), *Computational
+  Complexity and Natural Language*, MIT Press.
+- K. Koskenniemi & K. Church (1988), *Complexity, Two-Level Morphology and Finnish*, COLING.
+  https://aclanthology.org/C88-1069.pdf
+- L. Karttunen & K. Beesley (2005), *Twenty-Five Years of Finite-State Morphology*.
+  https://web.stanford.edu/group/cslipublications/cslipublications/koskenniemi-festschrift/8-karttunen-beesley.pdf
+- L. Karttunen, R. Kaplan & A. Zaenen (1992), *Two-Level Morphology with Composition*, COLING.
+  https://aclanthology.org/C92-1025.pdf
+- R. Kaplan & M. Kay (1994), *Regular Models of Phonological Rule Systems*, CL 20(3).
+  https://aclanthology.org/J94-3001.pdf
+- P. Norvig (1991), *Techniques for Automatic Memoization with Applications to Context-Free
+  Parsing*, CL 17(1). https://aclanthology.org/J91-1004/
+- M. Johnson (1995), *Memoization in Top-Down Parsing*, CL 21(3).
+  https://aclanthology.org/J95-3005.pdf
+- S. Shieber, Y. Schabes & F. Pereira (1995), *Principles and Implementation of Deductive
+  Parsing*. https://arxiv.org/abs/cmp-lg/9404008
+- S. Billot & B. Lang (1989), *The Structure of Shared Forests in Ambiguous Parsing*, ACL.
+  https://aclanthology.org/P89-1018.pdf
+- D. Klein & C. Manning (2003), *A* Parsing: Fast Exact Viterbi Parse Selection*, HLT-NAACL.
+  https://nlp.stanford.edu/pubs/klein2003astar.pdf; (2001) *Parsing and Hypergraphs*, IWPT.
+- T. Sang, F. Bacchus, P. Beame, H. Kautz & T. Pitassi (2004), *Combining Component Caching and
+  Clause Learning for Effective Model Counting*, SAT.
+  http://www.cs.toronto.edu/~fbacchus/Papers/SangetalSAT2004.pdf
+- R. Dechter & R. Mateescu (2007), *AND/OR Search Spaces for Graphical Models*, AIJ.
+  https://ics.uci.edu/~dechter/publications/r147.pdf
+- A. Kishimoto & M. Müller (2004), *A General Solution to the Graph History Interaction
+  Problem*, AAAI. https://cdn.aaai.org/AAAI/2004/AAAI04-102.pdf
+- M. Mohri & R. Sproat (1996), *An Efficient Compiler for Weighted Rewrite Rules*, ACL.
+  https://aclanthology.org/P96-1031.pdf; L. Karttunen (1995), *The Replace Operator*, ACL.
+  https://arxiv.org/pdf/cmp-lg/9504032; W. Skut et al. (2004), bimachines.
+  https://arxiv.org/pdf/cs/0407046
+- M. Mohri, F. Pereira & M. Riley (2002), *Weighted Finite-State Transducers in Speech
+  Recognition*, CS&L. https://cs.nyu.edu/~mohri/pub/csl01.pdf; OpenFst.
+  https://cs.nyu.edu/~mohri/pub/fst.pdf
+- M. Hulden (2009), *Foma: a Finite-State Compiler and Library*, EACL.
+  https://aclanthology.org/E09-2008.pdf; HFST optimized-lookup.
+  https://github.com/hfst/hfst/wiki/OptimizedLookupFormat
+- M. Silfverberg & K. Lindén (2009), HFST runtime lookup (67k–308k words/s).
+- E. Antworth, PC-KIMMO v2 morphological parsing (chart over morphemes).
+  https://software.sil.org/pc-kimmo/morphological-parsing/
+- D. Weber, H.A. Black & S. McConnel (1988), *AMPLE: A Tool for Exploring Morphology*, SIL
+  OPAC 12. https://www.sil.org/resources/archives/5761
+- FLEx field evidence: flex-list "parsing broke down" thread (Awetí, ~9 min → ~100 s by manual
+  grammar audit). https://groups.google.com/g/flex-list/c/pkxCwIxIktg
+- Negative results consulted: Ibaraki (1977) dominance pruning (optimality-only guarantee);
+  Crawford et al. (1996) symmetry breaking (one-representative-per-orbit, unsound for
+  all-parses); Anders et al. (2024). https://arxiv.org/abs/2407.04419
+
+## 6. Corpus context
+
+*(top-N step counts per corpus — completed when the full-corpus scan lands)*
diff --git a/src/SIL.Machine.Morphology.HermitCrab.Tool/BatchCommand.cs b/src/SIL.Machine.Morphology.HermitCrab.Tool/BatchCommand.cs
new file mode 100644
index 000000000..2e4561873
--- /dev/null
+++ b/src/SIL.Machine.Morphology.HermitCrab.Tool/BatchCommand.cs
@@ -0,0 +1,241 @@
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
+using ManyConsole;
+
+namespace SIL.Machine.Morphology.HermitCrab;
+
+/// <summary>
+/// Phase 0 of parse-optimization.md: parses every word in a word list and records a per-word result
+/// signature plus elapsed time, so two runs (before/after an engine change) can be diffed to confirm
+/// parse results are unchanged. Flushed per word and crash-resumable via --start, since some corpus
+/// words are expensive enough (100+ seconds, and one has been observed to crash a process outright) that
+/// losing partial progress on a multi-hour corpus run is unacceptable.
+/// </summary>
+internal class BatchCommand : ConsoleCommand
+{
+    private readonly HCContext _context;
+    private int _startIndex;
+    private string _ruleStatsPath;
+    private bool _parallel;
+    private int _parallelDegree = -1;
+
+    public BatchCommand(HCContext context)
+    {
+        _context = context;
+
+        IsCommand(
+            "batch",
+            "Parses every word in a word list, recording a result signature and timing per word (see parse-optimization.md Phase 0)"
+        );
+        SkipsCommandSummaryBeforeRunning();
+        HasAdditionalArguments(2, "<wordlist-file> <output-tsv>");
+        HasOption(
+            "start=",
+            "0-based line index to resume at (for crash recovery; ignored with --parallel)",
+            v => _startIndex = int.Parse(v)
+        );
+        HasOption(
+            "rule-stats=",
+            "accumulate per-rule firing stats (category/stem/allomorph/environment buckets, with example "
+                + "words) across the whole run and write a report to {FILE} -- run with --sequential, the "
+                + "counters are not thread-safe",
+            v => _ruleStatsPath = v
+        );
+        HasOption(
+            "parallel:",
+            "parse words concurrently across a load-balanced, longest-word-first Parallel.ForEach "
+                + "(parse-optimization.md Phase 8a) -- requires the Morpher itself to be --sequential for "
+                + "the per-word memo tables to engage; degree defaults to Environment.ProcessorCount, or "
+                + "{N} if given; trades --start crash-resume for speed (output is buffered and written "
+                + "index-ordered at the end)",
+            v =>
+            {
+                _parallel = true;
+                if (!string.IsNullOrEmpty(v))
+                    _parallelDegree = int.Parse(v);
+            }
+        );
+    }
+
+    public override int Run(string[] remainingArguments)
+    {
+        string wordListPath = remainingArguments[0];
+        string outputPath = remainingArguments[1];
+
+        string[] words = File.ReadAllLines(wordListPath).Select(w => w.Trim()).Where(w => w.Length > 0).ToArray();
+
+        if (_ruleStatsPath != null)
+        {
+            if (_parallel)
+            {
+                _context.Out.WriteLine(
+                    "ERROR: --rule-stats and --parallel cannot be combined (counters are not thread-safe)."
+                );
+                return -1;
+            }
+            if (_context.Morpher.MaxDegreeOfParallelism != 1)
+            {
+                _context.Out.WriteLine(
+                    "WARNING: --rule-stats requested without --sequential; per-rule counters are not "
+                        + "thread-safe and will be unreliable under within-word parallelism."
+                );
+            }
+            _context.Morpher.AccumulateRuleStats = true;
+        }
+
+        if (_parallel)
+        {
+            if (_context.Morpher.MaxDegreeOfParallelism != 1)
+            {
+                _context.Out.WriteLine(
+                    "WARNING: --parallel requested without --sequential; the per-word memo tables "
+                        + "(parse-optimization.md Phases 2/3/3b) only engage on the sequential cascade, so "
+                        + "this run will not get their benefit."
+                );
+            }
+            if (_startIndex > 0)
+            {
+                _context.Out.WriteLine("WARNING: --start is ignored under --parallel; running the full word list.");
+            }
+            return RunParallel(words, outputPath);
+        }
+
+        return RunSequential(words, outputPath);
+    }
+
+    private int RunSequential(string[] words, string outputPath)
+    {
+        using var writer = new StreamWriter(outputPath, append: _startIndex > 0) { AutoFlush = true };
+        var totalSw = Stopwatch.StartNew();
+        long parsed = 0,
+            skipped = 0;
+        for (int i = _startIndex; i < words.Length; i++)
+        {
+            string word = words[i];
+            // Sentinel written before the attempt: if this word crashes the process, a wrapper script
+            // can read the last line to find where to resume (see run_sena_shards.ps1 precedent).
+            writer.WriteLine($"{i}\t{word}\tSTARTED");
+            (string status, long elapsedMs, string signature) = ParseOneWord(word);
+            writer.WriteLine($"{i}\t{word}\t{elapsedMs}\t{status}\t{signature}");
+            if (status == "SKIPPED")
+                skipped++;
+            else
+                parsed++;
+            if (i % 100 == 0)
+            {
+                _context.Out.WriteLine("[{0}/{1}]", i, words.Length);
+                // Rewritten (not appended) every checkpoint so a mid-run crash on a pathological word
+                // still leaves a usable report reflecting everything parsed so far.
+                if (_ruleStatsPath != null)
+                    WriteRuleStatsReport();
+            }
+        }
+        totalSw.Stop();
+        if (_ruleStatsPath != null)
+            WriteRuleStatsReport();
+        _context.Out.WriteLine(
+            "batch complete: {0} words parsed ({1} skipped), {2}ms total",
+            parsed,
+            skipped,
+            totalSw.ElapsedMilliseconds
+        );
+        return 0;
+    }
+
+    // Phase 8a: the earlier per-word AutoFlush writer is not thread-safe and crash-resume has no meaning
+    // once words are handed out out-of-order, so rows are buffered per index and written once at the end.
+    // Ordering the work queue longest-word-first, combined with the load-balanced (chunked, not static
+    // range) partitioner below, is what closes the 2.9x gap between wall clock and the perfect-packing
+    // bound measured on 2026-07-03 -- heavy words no longer cluster onto a few threads.
+    private int RunParallel(string[] words, string outputPath)
+    {
+        var rows = new string[words.Length];
+        int[] order = Enumerable.Range(0, words.Length).OrderByDescending(i => words[i].Length).ToArray();
+
+        var totalSw = Stopwatch.StartNew();
+        long parsed = 0,
+            skipped = 0;
+        long completed = 0;
+
+        var parallelOptions = new ParallelOptions();
+        if (_parallelDegree > 0)
+            parallelOptions.MaxDegreeOfParallelism = _parallelDegree;
+
+        Parallel.ForEach(
+            Partitioner.Create(order, loadBalance: true),
+            parallelOptions,
+            i =>
+            {
+                string word = words[i];
+                (string status, long elapsedMs, string signature) = ParseOneWord(word);
+                rows[i] = $"{i}\t{word}\t{elapsedMs}\t{status}\t{signature}";
+                if (status == "SKIPPED")
+                    Interlocked.Increment(ref skipped);
+                else
+                    Interlocked.Increment(ref parsed);
+                long n = Interlocked.Increment(ref completed);
+                if (n % 100 == 0)
+                    _context.Out.WriteLine("[{0}/{1}]", n, words.Length);
+            }
+        );
+        totalSw.Stop();
+
+        using (var writer = new StreamWriter(outputPath, append: false))
+        {
+            foreach (string row in rows)
+                writer.WriteLine(row);
+        }
+
+        _context.Out.WriteLine(
+            "batch complete: {0} words parsed ({1} skipped), {2}ms total",
+            parsed,
+            skipped,
+            totalSw.ElapsedMilliseconds
+        );
+        return 0;
+    }
+
+    private (string status, long elapsedMs, string signature) ParseOneWord(string word)
+    {
+        var sw = Stopwatch.StartNew();
+        try
+        {
+            Word[] results = _context.Morpher.ParseWord(word, out _).ToArray();
+            sw.Stop();
+            return ("ok", sw.ElapsedMilliseconds, BuildSignature(results));
+        }
+        catch (InvalidShapeException)
+        {
+            return ("SKIPPED", 0, "-");
+        }
+    }
+
+    private void WriteRuleStatsReport()
+    {
+        using var statsWriter = new StreamWriter(_ruleStatsPath, append: false);
+        RuleStatsReport.Write(statsWriter, "Analysis", _context.Morpher.AnalysisRuleStats);
+        RuleStatsReport.Write(statsWriter, "Synthesis", _context.Morpher.SynthesisRuleStats);
+    }
+
+    // Order-independent (sorted) so two runs that find the same parses in a different internal order
+    // still compare equal; a change in this signature means parse RESULTS changed, which every phase in
+    // parse-optimization.md is required not to do.
+    private static string BuildSignature(IEnumerable<Word> results)
+    {
+        List<string> signatures = results
+            .Select(w =>
+                string.Join("+", w.AllomorphsInMorphOrder.Select(a => a.Morpheme.Id))
+                + "|"
+                + w.Shape.ToRegexString(w.Stratum.CharacterDefinitionTable, true)
+            )
+            .OrderBy(s => s, StringComparer.Ordinal)
+            .ToList();
+        return signatures.Count == 0 ? "-" : string.Join(";", signatures);
+    }
+}
diff --git a/src/SIL.Machine.Morphology.HermitCrab.Tool/HCContext.cs b/src/SIL.Machine.Morphology.HermitCrab.Tool/HCContext.cs
index e6c8866c0..42179d46c 100644
--- a/src/SIL.Machine.Morphology.HermitCrab.Tool/HCContext.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab.Tool/HCContext.cs
@@ -8,9 +8,9 @@ internal class HCContext(Language language, TextWriter outWriter)
     private Morpher _morpher;
     private readonly TextWriter _outWriter = outWriter;
 
-    public void Compile()
+    public void Compile(bool sequential = false)
     {
-        _morpher = new Morpher(new TraceManager(), _language);
+        _morpher = new Morpher(new TraceManager(), _language, sequential ? 1 : -1);
     }
 
     public Language Language
diff --git a/src/SIL.Machine.Morphology.HermitCrab.Tool/Program.cs b/src/SIL.Machine.Morphology.HermitCrab.Tool/Program.cs
index ff8e86bcc..3d30ef513 100644
--- a/src/SIL.Machine.Morphology.HermitCrab.Tool/Program.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab.Tool/Program.cs
@@ -20,6 +20,8 @@ public static int Main(string[] args)
         string scriptFile = null;
         bool showHelp = false;
         bool quitOnError = true;
+        bool sequential = false;
+        bool lexicalGate = false;
 
         var p = new OptionSet
         {
@@ -31,6 +33,20 @@ public static int Main(string[] args)
                 "continues when an error occurs while loading the configuration",
                 value => quitOnError = value == null
             },
+            {
+                "sequential",
+                "parse single-threaded (maxDegreeOfParallelism: 1) -- the mode a caller that "
+                    + "parallelizes across words itself (e.g. batch corpus runs) should use; also the "
+                    + "only mode the analysis nogood cache (parse-optimization.md Phase 2) currently covers",
+                value => sequential = value != null
+            },
+            {
+                "lexical-gate",
+                "enable Morpher.EnableLexicalGating (parse-optimization.md Phase 5) -- default off, "
+                    + "highest-risk optimization; use for A/B corpus verification against a run without "
+                    + "this flag",
+                value => lexicalGate = value != null
+            },
             { "h|help", "show this help message and exit", value => showHelp = value != null },
         };
 
@@ -66,7 +82,8 @@ public static int Main(string[] args)
 
             context = new HCContext(language, output ?? Console.Out);
             Console.Write("Compiling rules... ");
-            context.Compile();
+            context.Compile(sequential);
+            context.Morpher.EnableLexicalGating = lexicalGate;
             Console.WriteLine("done.");
             Console.WriteLine("{0} loaded.", language.Name);
             Console.WriteLine();
@@ -92,6 +109,7 @@ public static int Main(string[] args)
             new TracingCommand(context),
             new TestCommand(context),
             new StatsCommand(context),
+            new BatchCommand(context),
         };
 
         string input;
diff --git a/src/SIL.Machine.Morphology.HermitCrab.Tool/RuleStatsReport.cs b/src/SIL.Machine.Morphology.HermitCrab.Tool/RuleStatsReport.cs
new file mode 100644
index 000000000..bb6349515
--- /dev/null
+++ b/src/SIL.Machine.Morphology.HermitCrab.Tool/RuleStatsReport.cs
@@ -0,0 +1,56 @@
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using SIL.Machine.Rules;
+
+namespace SIL.Machine.Morphology.HermitCrab;
+
+/// <summary>
+/// Formats the InstrumentedRule tree (see Morpher.AccumulateRuleStats) as a flat, grep-able text report:
+/// one line per rule with its totals, followed by its bucket breakdowns sorted so the rarest (most
+/// suspicious) buckets are easy to spot against the common case -- that's the "300 times vs 4 times, are
+/// the 4 wrong?" comparison this whole feature exists for.
+/// </summary>
+internal static class RuleStatsReport
+{
+    public static void Write(TextWriter writer, string label, InstrumentedRule<Word, int> root)
+    {
+        writer.WriteLine($"==== {label} ====");
+        if (root == null)
+        {
+            writer.WriteLine("(no rule tree)");
+            return;
+        }
+        WriteRule(writer, root, "");
+        writer.WriteLine();
+    }
+
+    private static void WriteRule(TextWriter writer, InstrumentedRule<Word, int> rule, string path)
+    {
+        if (rule == null)
+            return;
+
+        string fullPath = string.IsNullOrEmpty(path) ? rule.Name ?? "?" : $"{path} > {rule.Name}";
+
+        if (rule.InputCount > 0 || rule.BucketGroups.Count > 0)
+        {
+            double elapsedMs = rule.ElapsedTime * 1000.0 / Stopwatch.Frequency;
+            writer.WriteLine(
+                $"{fullPath}\tinputs={rule.InputCount}\tsuccesses={rule.SuccessCount}\toutputs={rule.OutputCount}\telapsedMs={elapsedMs:F0}"
+            );
+
+            foreach (var group in rule.BucketGroups.OrderBy(g => g.Key))
+            {
+                writer.WriteLine($"  [{group.Key}]");
+                foreach (var bucket in group.Value.OrderByDescending(b => b.Value.Count))
+                {
+                    string examples = string.Join(" | ", bucket.Value.Examples);
+                    writer.WriteLine($"    {bucket.Key}: {bucket.Value.Count}\te.g. {examples}");
+                }
+            }
+        }
+
+        foreach (var sub in rule.SubRules)
+            WriteRule(writer, sub, fullPath);
+    }
+}
diff --git a/src/SIL.Machine.Morphology.HermitCrab/AnalysisAffixTemplateRule.cs b/src/SIL.Machine.Morphology.HermitCrab/AnalysisAffixTemplateRule.cs
index f401ce0fa..f93fec49c 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/AnalysisAffixTemplateRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/AnalysisAffixTemplateRule.cs
@@ -10,7 +10,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab
 {
-    internal class AnalysisAffixTemplateRule : IRule<Word, int>
+    internal class AnalysisAffixTemplateRule : InstrumentedRule<Word, int>
     {
         private readonly Morpher _morpher;
         private readonly AffixTemplate _template;
@@ -18,6 +18,7 @@ internal class AnalysisAffixTemplateRule : IRule<Word, int>
 
         public AnalysisAffixTemplateRule(Morpher morpher, AffixTemplate template)
         {
+            Name = template.Name;
             _morpher = morpher;
             _template = template;
             _rules = new List<IRule<Word, int>>(
@@ -27,9 +28,10 @@ public AnalysisAffixTemplateRule(Morpher morpher, AffixTemplate template)
                     FreezableEqualityComparer<Word>.Default
                 ))
             );
+            AddSubRules(_rules);
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
             if (!_morpher.RuleSelector(_template))
                 return Enumerable.Empty<Word>();
@@ -41,7 +43,11 @@ public IEnumerable<Word> Apply(Word input)
             if (_morpher.TraceManager.IsTracing)
                 _morpher.TraceManager.BeginUnapplyTemplate(_template, input);
 
-            Word inWord = input.Clone();
+            // Shape-sharing clone (parse-optimization.md Phase 10a): this clone is frozen on the next
+            // line and nothing between clone and freeze touches the shape -- slot rules only ever READ
+            // it (FST matching), and their outputs are separate deep clones. Falls back to a deep copy
+            // automatically when input's shape isn't frozen yet (e.g. unmemoized/tracing paths).
+            Word inWord = input.CloneShareFrozenShape();
             inWord.Freeze();
 
             var output = new HashSet<Word>(FreezableEqualityComparer<Word>.Default);
@@ -60,6 +66,7 @@ public IEnumerable<Word> Apply(Word input)
                 sfs.Add(fs);
                 outWord.SyntacticFeatureStruct = sfs;
             }
+            AddRuleStats(output.Count);
             return output;
         }
 
diff --git a/src/SIL.Machine.Morphology.HermitCrab/AnalysisLanguageRule.cs b/src/SIL.Machine.Morphology.HermitCrab/AnalysisLanguageRule.cs
index 4bdd3c959..68bc7e5e4 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/AnalysisLanguageRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/AnalysisLanguageRule.cs
@@ -6,7 +6,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab
 {
-    internal class AnalysisLanguageRule : IRule<Word, int>
+    internal class AnalysisLanguageRule : InstrumentedRule<Word, int>
     {
         private readonly Morpher _morpher;
         private readonly List<Stratum> _strata;
@@ -14,12 +14,14 @@ internal class AnalysisLanguageRule : IRule<Word, int>
 
         public AnalysisLanguageRule(Morpher morpher, Language language)
         {
+            Name = "Analysis";
             _morpher = morpher;
             _strata = language.Strata.Reverse().ToList();
             _rules = _strata.Select(stratum => stratum.CompileAnalysisRule(morpher)).ToList();
+            AddSubRules(_rules);
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
             var inputSet = new HashSet<Word>(FreezableEqualityComparer<Word>.Default) { input };
             var tempSet = new HashSet<Word>(FreezableEqualityComparer<Word>.Default);
@@ -45,6 +47,7 @@ public IEnumerable<Word> Apply(Word input)
                 inputSet = outputSet;
             }
 
+            AddRuleStats(results.Count);
             return results;
         }
     }
diff --git a/src/SIL.Machine.Morphology.HermitCrab/AnalysisScope.cs b/src/SIL.Machine.Morphology.HermitCrab/AnalysisScope.cs
new file mode 100644
index 000000000..6eab999af
--- /dev/null
+++ b/src/SIL.Machine.Morphology.HermitCrab/AnalysisScope.cs
@@ -0,0 +1,88 @@
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+
+namespace SIL.Machine.Morphology.HermitCrab
+{
+    /// <summary>
+    /// Per-parse cache carrier threaded through <see cref="Word"/> clones exactly like
+    /// <see cref="Word.CurrentTrace"/> -- reference-shared, excluded from
+    /// <c>Word.FreezeImpl</c>/<c>Word.ValueEquals</c> so existing dedup semantics are unchanged. Holds the
+    /// analysis-cascade memo table from parse-optimization.md Phases 2 (nogoods) and 3 (positive memo with
+    /// trail replay) -- see <see cref="MemoizedCombinationRuleCascade"/>.
+    /// One instance per <see cref="Morpher.ParseWord(string, out object)"/> call: entries are state facts
+    /// about a specific parse (a state key does not encode the target surface word), so sharing this
+    /// across concurrent parses of different words would be unsound without also scoping the key to the
+    /// word -- that cross-word extension is explicitly deferred (parse-optimization.md §4.6).
+    /// Thread-safe because a single word's analysis can itself run in parallel
+    /// (<c>ParallelCombinationRuleCascade</c>, used when <see cref="Morpher.MaxDegreeOfParallelism"/> > 1) --
+    /// though today only the sequential cascade actually reads/writes this (see
+    /// <see cref="MemoizedCombinationRuleCascade"/>'s doc comment).
+    /// </summary>
+    internal sealed class AnalysisScope
+    {
+        public AnalysisScope(bool lexicalGatingActive)
+        {
+            LexicalGatingActive = lexicalGatingActive;
+        }
+
+        // parse-optimization.md Phase 5: precomputed once per parse (Morpher.ParseWord already knows
+        // whether tracing/guessRoot/the grammar's own qualification allow the gate for this call) rather
+        // than re-checked per candidate -- cheap, and keeps MemoizedCombinationRuleCascade from needing a
+        // direct Morpher reference just to read three unrelated conditions.
+        public bool LexicalGatingActive { get; }
+
+        // OOM guard (parse-optimization.md Phase 3): cinacemerwa-class words have crashed a test host on
+        // memory before budgets existed to stop them, and a positive memo holds actual Word lists (not
+        // just a boolean like the nogood case), so it is the one that can grow unboundedly. Past the cap,
+        // new subtrees are simply not memoized -- correctness is unaffected, only the hit rate degrades.
+        private const int MaxMemoEntries = 100_000;
+
+        public ConcurrentDictionary<AnalysisStateKey, MemoEntry> Memo { get; } =
+            new ConcurrentDictionary<AnalysisStateKey, MemoEntry>();
+
+        // Same discipline, different battery: template unapplication (AnalysisStratumRule.ApplyTemplates)
+        // reads exactly what AnalysisStateKey captures (shape, syntactic FS, stratum, non-heads) and
+        // nothing trail-order-dependent, so equal-keyed arrivals get equal template outputs modulo the
+        // trail prefix -- the same Word.ReplayOnto graft Phase 3 uses. Kept as a separate table from
+        // Memo because the two record different computations over the same key space (a state's mrule
+        // subtree vs. its one-level template outputs); merging them would conflate a "no mrule results"
+        // nogood with "no template outputs". Measured motivation (2026-07-03, atawirambo): the template
+        // battery was invoked 38,840 times against ~2,581 unique keys -- 93% of total parse wall time --
+        // because Phases 2/3 memoized only the mrule cascade and templates sat outside it.
+        public ConcurrentDictionary<AnalysisStateKey, MemoEntry> TemplateMemo { get; } =
+            new ConcurrentDictionary<AnalysisStateKey, MemoEntry>();
+
+        // Keys currently under expansion on some call stack -- guards the in-flight re-entry case (a
+        // multiApp cascade can reach the same state again before its own first expansion has completed,
+        // e.g. via a self-loop). A hit here must fall through to plain, unmemoized expansion rather than
+        // read a nonexistent/partial entry or deadlock; see MemoizedCombinationRuleCascade.ApplyRules.
+        public ConcurrentDictionary<AnalysisStateKey, byte> InProgress { get; } =
+            new ConcurrentDictionary<AnalysisStateKey, byte>();
+
+        public bool HasMemoCapacity => Memo.Count < MaxMemoEntries;
+    }
+
+    /// <summary>
+    /// A memoized analysis-cascade subtree (parse-optimization.md Phase 3). <see cref="Results"/> empty =
+    /// the Phase 2 "nogood" case (subtree proved to yield nothing); non-empty = the Phase 3 positive case,
+    /// replayable onto a differently-ordered arrival at the same <see cref="AnalysisStateKey"/> via
+    /// <see cref="Word.ReplayOnto"/>, using <see cref="MruleTrailPrefixLength"/>/
+    /// <see cref="NonHeadPrefixLength"/> to split each stored result's trail/non-heads into the
+    /// (discarded, replaced) prefix and the (kept) subtree-local suffix. There is no "budget exhausted /
+    /// incomplete" flag: this branch has no step/time budget infrastructure (see parse-optimization.md
+    /// "Branch context"), so every recorded subtree was explored to full completion.
+    /// </summary>
+    internal sealed class MemoEntry
+    {
+        public MemoEntry(IReadOnlyList<Word> results, int mruleTrailPrefixLength, int nonHeadPrefixLength)
+        {
+            Results = results;
+            MruleTrailPrefixLength = mruleTrailPrefixLength;
+            NonHeadPrefixLength = nonHeadPrefixLength;
+        }
+
+        public IReadOnlyList<Word> Results { get; }
+        public int MruleTrailPrefixLength { get; }
+        public int NonHeadPrefixLength { get; }
+    }
+}
diff --git a/src/SIL.Machine.Morphology.HermitCrab/AnalysisStateKey.cs b/src/SIL.Machine.Morphology.HermitCrab/AnalysisStateKey.cs
new file mode 100644
index 000000000..fe48f977d
--- /dev/null
+++ b/src/SIL.Machine.Morphology.HermitCrab/AnalysisStateKey.cs
@@ -0,0 +1,117 @@
+using System;
+using System.Collections.Generic;
+using SIL.Machine.Annotations;
+using SIL.Machine.FeatureModel;
+
+namespace SIL.Machine.Morphology.HermitCrab
+{
+    /// <summary>
+    /// Order-independent identity of an analysis-cascade node, used by the memo cache in
+    /// <see cref="AnalysisStratumRule"/>'s Unordered-mode morphological rule cascade (see
+    /// parse-optimization.md Phases 2-3). Two Words with an equal key are guaranteed to make identical
+    /// decisions in every analysis-side morphological rule that cascade can invoke -- verified by
+    /// inspecting what each one reads from its input:
+    /// <list type="bullet">
+    /// <item><see cref="MorphologicalRules.AnalysisAffixProcessRule"/>: Shape (FST pattern match) and
+    /// <see cref="Word.SyntacticFeatureStruct"/> (unifiability gate) plus a per-rule unapplication count.</item>
+    /// <item><see cref="MorphologicalRules.AnalysisCompoundingRule"/>: adds <see cref="Word.NonHeadCount"/>
+    /// (<c>MaxStemCount</c> gate) -- it never reads the non-heads' own content, only the count.</item>
+    /// <item><see cref="MorphologicalRules.AnalysisRealizationalAffixProcessRule"/>: adds
+    /// <see cref="Word.RealizationalFeatureStruct"/>.</item>
+    /// </list>
+    /// but never the ORDER those rules were unapplied in -- exactly the redundancy this key collapses.
+    /// Deliberately excludes fields <see cref="Word.ValueEquals"/> includes for a different purpose
+    /// (result dedup): the unapplication trail as an ordered SEQUENCE (replaced here by an
+    /// order-independent multiset) and <c>_isLastAppliedRuleFinal</c>/<c>IsPartial</c>, which are not read
+    /// by any analysis-side rule (grep-verified against every file matching
+    /// <c>MorphologicalRules/Analysis*.cs</c> and <c>PhonologicalRules/Analysis*.cs</c>).
+    /// </summary>
+    internal readonly struct AnalysisStateKey : IEquatable<AnalysisStateKey>
+    {
+        private readonly Shape _shape;
+        private readonly Stratum _stratum;
+        private readonly FeatureStruct _syntacticFS;
+        private readonly FeatureStruct _realizationalFS;
+        private readonly int _nonHeadCount;
+        private readonly IReadOnlyDictionary<IMorphologicalRule, int> _ruleCounts;
+        private readonly int _hashCode;
+
+        public AnalysisStateKey(Word word)
+        {
+            _shape = word.Shape;
+            _stratum = word.Stratum;
+            _syntacticFS = word.SyntacticFeatureStruct;
+            _realizationalFS = word.RealizationalFeatureStruct;
+            _nonHeadCount = word.NonHeadCount;
+            _ruleCounts = word.UnappliedRuleCounts;
+
+            // Defensive, not incidental: AnalysisAffixTemplateRule.Apply reassigns SyntacticFeatureStruct
+            // to a freshly-cloned, unfrozen FeatureStruct AFTER the owning Word is already frozen (its
+            // setter has no CheckFrozen() guard, unlike RealizationalFeatureStruct's -- a pre-existing
+            // quirk nothing previously surfaced, since Word's own FreezeImpl/ValueEquals deliberately
+            // exclude SyntacticFeatureStruct). Freeze() is idempotent (FeatureStruct.cs: "if (IsFrozen)
+            // return") and every write site in this codebase clones before mutating (never mutates a
+            // FeatureStruct another reference might still be holding), so freezing here on read is safe.
+            _shape.Freeze();
+            _syntacticFS.Freeze();
+            _realizationalFS.Freeze();
+
+            int hash = 17;
+            hash = hash * 31 + _shape.GetFrozenHashCode();
+            hash = hash * 31 + (_stratum?.GetHashCode() ?? 0);
+            hash = hash * 31 + _syntacticFS.GetFrozenHashCode();
+            hash = hash * 31 + _realizationalFS.GetFrozenHashCode();
+            hash = hash * 31 + _nonHeadCount;
+            if (_ruleCounts != null)
+            {
+                // XOR, not the usual *31 rolling combine: the multiset is unordered, so the combination
+                // must be commutative -- two dictionaries with the same entries built up in different
+                // unapplication orders must hash identically.
+                int multisetHash = 0;
+                foreach (KeyValuePair<IMorphologicalRule, int> kvp in _ruleCounts)
+                    multisetHash ^= (kvp.Key.GetHashCode() * 397) ^ kvp.Value;
+                hash = hash * 31 + multisetHash;
+            }
+            _hashCode = hash;
+        }
+
+        public override int GetHashCode() => _hashCode;
+
+        public override bool Equals(object obj) => obj is AnalysisStateKey other && Equals(other);
+
+        public bool Equals(AnalysisStateKey other)
+        {
+            if (_hashCode != other._hashCode)
+                return false;
+            if (_nonHeadCount != other._nonHeadCount || !ReferenceEquals(_stratum, other._stratum))
+                return false;
+            if (!_shape.ValueEquals(other._shape))
+                return false;
+            if (
+                !_syntacticFS.ValueEquals(other._syntacticFS)
+                || !_realizationalFS.ValueEquals(other._realizationalFS)
+            )
+                return false;
+            return RuleCountsEqual(_ruleCounts, other._ruleCounts);
+        }
+
+        private static bool RuleCountsEqual(
+            IReadOnlyDictionary<IMorphologicalRule, int> a,
+            IReadOnlyDictionary<IMorphologicalRule, int> b
+        )
+        {
+            int aCount = a?.Count ?? 0;
+            int bCount = b?.Count ?? 0;
+            if (aCount != bCount)
+                return false;
+            if (aCount == 0)
+                return true;
+            foreach (KeyValuePair<IMorphologicalRule, int> kvp in a)
+            {
+                if (!b.TryGetValue(kvp.Key, out int otherCount) || otherCount != kvp.Value)
+                    return false;
+            }
+            return true;
+        }
+    }
+}
diff --git a/src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs b/src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs
index aadef0838..318730151 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs
@@ -8,7 +8,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab
 {
-    internal class AnalysisStratumRule : IRule<Word, int>
+    internal class AnalysisStratumRule : InstrumentedRule<Word, int>
     {
         private readonly IRule<Word, int> _mrulesRule;
         private readonly IRule<Word, int> _prulesRule;
@@ -18,6 +18,7 @@ internal class AnalysisStratumRule : IRule<Word, int>
 
         public AnalysisStratumRule(Morpher morpher, Stratum stratum)
         {
+            Name = stratum.Name;
             _stratum = stratum;
             _morpher = morpher;
             _prulesRule = new LinearRuleCascade<Word, int>(
@@ -47,14 +48,16 @@ public AnalysisStratumRule(Morpher morpher, Stratum stratum)
                     break;
                 case MorphologicalRuleOrder.Unordered:
                     // Single-threaded when the caller caps within-word parallelism (e.g. it
-                    // parallelizes across words itself); parallel cascade otherwise.
+                    // parallelizes across words itself); parallel cascade otherwise. The sequential
+                    // cascade additionally memoizes analysis states (parse-optimization.md Phases 2-3) --
+                    // the parallel one does not yet, see MemoizedCombinationRuleCascade's doc comment.
                     _mrulesRule =
                         morpher.MaxDegreeOfParallelism == 1
                             ? (IRule<Word, int>)
-                                new CombinationRuleCascade<Word, int>(
+                                new MemoizedCombinationRuleCascade(
                                     mrules,
-                                    true,
-                                    FreezableEqualityComparer<Word>.Default
+                                    FreezableEqualityComparer<Word>.Default,
+                                    morpher
                                 )
                             : new ParallelCombinationRuleCascade<Word, int>(
                                 mrules,
@@ -68,6 +71,9 @@ public AnalysisStratumRule(Morpher morpher, Stratum stratum)
                             };
                     break;
             }
+            AddSubRule(_prulesRule);
+            AddSubRule(_templatesRule);
+            AddSubRule(_mrulesRule);
         }
 
         private IRule<Word, int> CompileAffixTemplate(AffixTemplate template, Morpher morpher)
@@ -106,8 +112,9 @@ private IRule<Word, int> CompilePhonologicalRule(IPhonologicalRule prule, Morphe
             }
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
+            long startTime = Stopwatch.GetTimestamp();
             if (_morpher.TraceManager.IsTracing)
                 _morpher.TraceManager.BeginUnapplyStratum(_stratum, input);
 
@@ -117,6 +124,29 @@ public IEnumerable<Word> Apply(Word input)
 
             _prulesRule.Apply(input);
             input.Freeze();
+
+            // parse-optimization.md Phase 4 Gate B: once phonological unapplication has grown this
+            // candidate past the longest form the grammar's own lexicon+rules could ever produce (root +
+            // every rule's own max insertion, see GrammarAnalyzer), no morphological unapplication from
+            // here can ever recover a valid analysis -- affix rules only ever shrink during analysis (they
+            // reverse synthesis's insertions), so the length can only go down from here. Skip the
+            // (potentially exponential) morphological cascade entirely rather than let it search a subtree
+            // that is already provably dead. Null MaxAnalysisLength means the grammar couldn't be measured
+            // this exactly (a compounding rule, or a phonological pattern with quantifiers/groups) -- see
+            // GrammarAnalyzer's remarks -- so the gate is off, matching today's unbounded behavior exactly.
+            // Bypassed while tracing (ground rule 1): the early return below would also skip this word's
+            // EndUnapplyStratum trace event further down.
+            if (
+                !_morpher.TraceManager.IsTracing
+                && _morpher.MaxAnalysisLength is int maxLength
+                && input.Shape.SegmentCount() > maxLength
+            )
+            {
+                ElapsedTime += Stopwatch.GetTimestamp() - startTime;
+                AddRuleStats(0);
+                return Enumerable.Empty<Word>();
+            }
+
             IDictionary<Shape, Word> shapeWord = null;
             // Don't merge if tracing because it messes up the tracing.
             bool mergeEquivalentAnalyses = _morpher.MergeEquivalentAnalyses && !_morpher.TraceManager.IsTracing;
@@ -151,9 +181,16 @@ public IEnumerable<Word> Apply(Word input)
                 if (_morpher.MaxUnapplications > 0 && output.Count >= _morpher.MaxUnapplications)
                     break;
             }
+            ElapsedTime += Stopwatch.GetTimestamp() - startTime;
+            AddRuleStats(output.Count);
             return output;
         }
 
+        // Test hook: incremented on every template-memo replay (see ApplyTemplateBattery). The
+        // equivalence test that covers the replay path asserts this is nonzero so it can never go
+        // vacuous -- a memo that silently stops firing would otherwise look exactly like a passing test.
+        internal static long DiagTemplateMemoHits;
+
         private IEnumerable<Word> ApplyMorphologicalRules(Word input)
         {
             foreach (Word mruleOutWord in _mrulesRule.Apply(input))
@@ -173,9 +210,47 @@ private IEnumerable<Word> ApplyMorphologicalRules(Word input)
             }
         }
 
+        // Runs the template battery for `input`, memoized by AnalysisStateKey (parse-optimization.md
+        // Phase 3, extended 2026-07-03). Template unapplication reads only what the key captures --
+        // shape, syntactic FS, stratum, non-heads -- never the trail's ORDER, so an equal-keyed arrival
+        // gets the stored outputs replayed (Word.ReplayOnto grafts the new arrival's own trail/non-head
+        // prefix, identical to the mrule-cascade memo). Measured motivation: on Sena's `atawirambo`, the
+        // template battery ran 38,840 times against ~2,581 unique keys and accounted for 93% of parse
+        // wall time -- the mrule cascade Phases 2/3 memoized had already shrunk to ~1.4s. Sequential
+        // only and skipped while tracing (scope is null then), matching the mrule memo's scoping; no
+        // in-flight re-entry guard is needed here because _templatesRule.Apply is eager and
+        // self-contained (the template<->mrule mutual recursion lives in this class's enumerators,
+        // outside the memoized call).
+        private IEnumerable<Word> ApplyTemplateBattery(Word input)
+        {
+            AnalysisScope scope = input.AnalysisScope;
+            if (scope == null || _morpher.MaxDegreeOfParallelism != 1)
+                return _templatesRule.Apply(input);
+
+            var key = new AnalysisStateKey(input);
+            if (scope.TemplateMemo.TryGetValue(key, out MemoEntry entry))
+            {
+                var replayed = new List<Word>(entry.Results.Count);
+                DiagTemplateMemoHits++;
+                foreach (Word stored in entry.Results)
+                    replayed.Add(stored.ReplayOnto(input, entry.MruleTrailPrefixLength, entry.NonHeadPrefixLength));
+                return replayed;
+            }
+
+            var results = new List<Word>(_templatesRule.Apply(input));
+            if (scope.HasMemoCapacity)
+            {
+                scope.TemplateMemo.TryAdd(
+                    key,
+                    new MemoEntry(results, input.MorphologicalRuleTrailLength, input.NonHeadCount)
+                );
+            }
+            return results;
+        }
+
         private IEnumerable<Word> ApplyTemplates(Word input)
         {
-            foreach (Word tempOutWord in _templatesRule.Apply(input))
+            foreach (Word tempOutWord in ApplyTemplateBattery(input))
             {
                 switch (_stratum.MorphologicalRuleOrder)
                 {
diff --git a/src/SIL.Machine.Morphology.HermitCrab/GrammarAnalyzer.cs b/src/SIL.Machine.Morphology.HermitCrab/GrammarAnalyzer.cs
new file mode 100644
index 000000000..f64282303
--- /dev/null
+++ b/src/SIL.Machine.Morphology.HermitCrab/GrammarAnalyzer.cs
@@ -0,0 +1,199 @@
+using System.Collections.Generic;
+using System.Linq;
+using SIL.Machine.Matching;
+using SIL.Machine.Morphology.HermitCrab.MorphologicalRules;
+using SIL.Machine.Morphology.HermitCrab.PhonologicalRules;
+
+namespace SIL.Machine.Morphology.HermitCrab
+{
+    /// <summary>
+    /// Computes a grammar-wide, word-independent bound on how long an underlying (analysis) form can
+    /// validly be, relative to the lexicon and rule set actually declared (parse-optimization.md Phase 4's
+    /// "Gate B" -- Gate A, a mirror-image synthesis-side bound, was attempted and reverted; see the note
+    /// in <see cref="Morpher.SynthesizeAnalysis"/>). The bound is a deliberately loose over-approximation
+    /// -- summed across every rule's own already-declared reapplication limit
+    /// (<see cref="Morphology.HermitCrab.MorphologicalRules.AffixProcessRule.MaxApplicationCount"/>,
+    /// <see cref="Morpher.DeletionReapplications"/>), never estimated -- so it can prune a candidate only
+    /// when NO combination of rules in the grammar could ever produce something that long, regardless of
+    /// which specific root or derivation path is under consideration. Returns null (meaning "no admissible
+    /// bound, gate off") the moment any rule's shape falls outside what this class knows how to measure
+    /// exactly (quantifiers/groups/alternations in a phonological Lhs/Rhs, or a compounding rule present
+    /// at all, since compounding combines multiple full root lengths rather than adding a bounded affix)
+    /// -- per the plan's own rule: skipping only costs pruning opportunity, an admissible bound must never
+    /// be guessed.
+    /// </summary>
+    public static class GrammarAnalyzer
+    {
+        /// <summary>
+        /// The longest possible underlying form (in real segments) any analysis candidate could validly
+        /// represent: the longest root allomorph in the lexicon, plus every affix/realizational rule's own
+        /// maximum possible net insertion (its allomorphs' <see cref="InsertSegments"/>/
+        /// <see cref="InsertSimpleContext"/> actions, summed and multiplied by
+        /// <see cref="Morphology.HermitCrab.MorphologicalRules.AffixProcessRule.MaxApplicationCount"/>),
+        /// plus every phonological deletion-type subrule's maximum possible net restoration. Null if any
+        /// rule in the grammar can't be measured this way (see class remarks).
+        /// </summary>
+        /// <remarks>
+        /// The phonological term is compounding, not additive: <c>AnalysisRewriteRule</c>'s Deletion
+        /// reapply loop runs <see cref="Morpher.DeletionReapplications"/> + 1 passes, and each pass is a
+        /// <c>SimultaneousPhonologicalPatternRule</c> sweep that can restore EVERY non-overlapping match
+        /// site in the current shape at once, not just one -- a real case (<c>RewriteRuleTests
+        /// .MultipleDeletionRules</c>: an 8-segment root deletes two independent "ii" clusters down to a
+        /// 4-segment surface form in one pass) needs more than "count of subrules" restored segments per
+        /// pass. Bounding the number of sites by the current running length (itself already an
+        /// over-approximation of the true pre-phonology length at this point) keeps this sound: real growth
+        /// can never exceed <c>runningLength * subruleDelta</c> per pass, since a simultaneous sweep cannot
+        /// match more sites than there are segments to match against.
+        /// </remarks>
+        public static int? ComputeMaxAnalysisLength(Language language, int deletionReapplications)
+        {
+            int bound = 0;
+            foreach (Stratum stratum in language.Strata)
+            {
+                if (stratum.MorphologicalRules.OfType<CompoundingRule>().Any())
+                    return null;
+
+                int longestRoot = stratum.Entries.SelectMany(e => e.Allomorphs).Select(SegmentCount).DefaultIfEmpty(0).Max();
+                bound += longestRoot;
+
+                foreach (AffixProcessRule rule in stratum.MorphologicalRules.OfType<AffixProcessRule>())
+                    bound += MaxAllomorphInsertion(rule.Allomorphs) * rule.MaxApplicationCount;
+
+                foreach (
+                    RealizationalAffixProcessRule rule in stratum.MorphologicalRules.OfType<RealizationalAffixProcessRule>()
+                )
+                    bound += MaxAllomorphInsertion(rule.Allomorphs);
+
+                int phonoGrowthRate = 0;
+                foreach (RewriteRule rule in stratum.PhonologicalRules.OfType<RewriteRule>())
+                {
+                    if (!TryGetFlatSegmentCount(rule.Lhs, out int lhsCount))
+                        return null;
+                    foreach (RewriteSubrule sr in rule.Subrules)
+                    {
+                        if (!TryGetFlatSegmentCount(sr.Rhs, out int rhsCount))
+                            return null;
+                        if (lhsCount > rhsCount)
+                            phonoGrowthRate += lhsCount - rhsCount;
+                    }
+                }
+                for (int pass = 0; pass < deletionReapplications + 1 && phonoGrowthRate > 0; pass++)
+                    bound += bound * phonoGrowthRate;
+            }
+            return bound;
+        }
+
+        /// <summary>
+        /// parse-optimization.md Phase 5's edge-stripper qualification: true only if every affix rule in
+        /// the grammar is a pure "copy a contiguous span of the input, optionally with material inserted
+        /// only before/after it" transform, and no stratum has a <see cref="CompoundingRule"/> or a
+        /// <see cref="MetathesisRule"/>. This is the soundness precondition for
+        /// <see cref="Morpher.EnableLexicalGating"/>: <see cref="RootAllomorphTrie.ContainsRootAnywhere"/>
+        /// assumes a root that exists in the lexicon must still appear as an intact contiguous window in
+        /// any not-yet-fully-analyzed candidate. Reduplication (the same input span copied more than once)
+        /// and infixation (material inserted BETWEEN two copied spans, splitting one span's material from
+        /// another's) both break that assumption -- the true root window would be split or duplicated, so
+        /// a real root could be invisible to a contiguous-window search. Compounding combines multiple
+        /// independent root windows, and metathesis physically reorders segments -- both are also outside
+        /// what a contiguous-window search over the ORIGINAL lexicon strings can safely reason about.
+        /// This is a single whole-language verdict, not per-stratum: simpler and strictly safer than the
+        /// per-stratum granularity the plan sketches (a grammar with one unqualified stratum disables the
+        /// gate everywhere rather than only where it's actually unsafe).
+        /// </summary>
+        public static bool IsEdgeStripperQualified(Language language)
+        {
+            foreach (Stratum stratum in language.Strata)
+            {
+                if (stratum.MorphologicalRules.OfType<CompoundingRule>().Any())
+                    return false;
+                if (stratum.PhonologicalRules.OfType<MetathesisRule>().Any())
+                    return false;
+
+                foreach (AffixProcessRule rule in stratum.MorphologicalRules.OfType<AffixProcessRule>())
+                {
+                    if (rule.Allomorphs.Any(a => !IsEdgeStripperAllomorph(a)))
+                        return false;
+                }
+                foreach (
+                    RealizationalAffixProcessRule rule in stratum.MorphologicalRules.OfType<RealizationalAffixProcessRule>()
+                )
+                {
+                    if (rule.Allomorphs.Any(a => !IsEdgeStripperAllomorph(a)))
+                        return false;
+                }
+            }
+            return true;
+        }
+
+        /// <summary>
+        /// An allomorph qualifies if its Rhs, scanned in order, looks like
+        /// <c>[insert]* [copy]+ [insert]*</c> with every copied part name appearing at most once: all
+        /// copied-from-input material forms one contiguous block (no insertion sandwiched between two
+        /// copy actions -- that would be infixation, splitting the input material apart), and no part is
+        /// copied twice (that would be reduplication).
+        /// </summary>
+        private static bool IsEdgeStripperAllomorph(AffixProcessAllomorph allomorph)
+        {
+            var seenParts = new HashSet<string>();
+            bool sawCopy = false;
+            bool sawInsertAfterCopy = false;
+            foreach (MorphologicalOutputAction action in allomorph.Rhs)
+            {
+                switch (action)
+                {
+                    case CopyFromInput copyFromInput:
+                        if (sawInsertAfterCopy || !seenParts.Add(copyFromInput.PartName))
+                            return false;
+                        sawCopy = true;
+                        break;
+                    case ModifyFromInput modifyFromInput:
+                        if (sawInsertAfterCopy || !seenParts.Add(modifyFromInput.PartName))
+                            return false;
+                        sawCopy = true;
+                        break;
+                    case InsertSegments _:
+                    case InsertSimpleContext _:
+                        if (sawCopy)
+                            sawInsertAfterCopy = true;
+                        break;
+                }
+            }
+            return true;
+        }
+
+        private static int SegmentCount(RootAllomorph allomorph) => allomorph.Segments.Shape.SegmentCount();
+
+        private static int MaxAllomorphInsertion(IEnumerable<AffixProcessAllomorph> allomorphs)
+        {
+            int max = 0;
+            foreach (AffixProcessAllomorph allo in allomorphs)
+            {
+                int insertion = 0;
+                foreach (MorphologicalOutputAction action in allo.Rhs)
+                {
+                    switch (action)
+                    {
+                        case InsertSegments insertSegments:
+                            insertion += insertSegments.Segments.Shape.SegmentCount();
+                            break;
+                        case InsertSimpleContext _:
+                            insertion += 1;
+                            break;
+                        // CopyFromInput/ModifyFromInput carry forward material already matched from the
+                        // input (the root or a nested part) -- already counted via the root/allomorph
+                        // length elsewhere, so they contribute 0 NEW segments here.
+                    }
+                }
+                if (insertion > max)
+                    max = insertion;
+            }
+            return max;
+        }
+
+        private static bool TryGetFlatSegmentCount(Pattern<Word, int> pattern, out int count)
+        {
+            count = pattern.Children.Count;
+            return pattern.Children.All(c => c is Constraint<Word, int> ctr && ctr.Type() == HCFeatureSystem.Segment);
+        }
+    }
+}
diff --git a/src/SIL.Machine.Morphology.HermitCrab/HermitCrabExtensions.cs b/src/SIL.Machine.Morphology.HermitCrab/HermitCrabExtensions.cs
index 5cf2ad5af..aa42e29a8 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/HermitCrabExtensions.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/HermitCrabExtensions.cs
@@ -142,6 +142,23 @@ internal static void SetDeleted(this ShapeNode node, bool deleted)
             );
         }
 
+        /// <summary>
+        /// Counts real phonetic segments -- excludes boundary/anchor markers and nodes marked
+        /// <see cref="IsDeleted(ShapeNode)"/> (see parse-optimization.md's Phase 3 note on why deletion
+        /// marks rather than removes a node). Used by <see cref="GrammarAnalyzer"/>'s Phase 4 length-bound
+        /// gates, which reason about how many real segments a candidate could ever justify.
+        /// </summary>
+        internal static int SegmentCount(this Shape shape)
+        {
+            int count = 0;
+            foreach (ShapeNode node in shape)
+            {
+                if (node.Type() == HCFeatureSystem.Segment && !node.IsDeleted())
+                    count++;
+            }
+            return count;
+        }
+
         internal static bool IsIterative(this ShapeNode node)
         {
             return node.Annotation.Data != null;
diff --git a/src/SIL.Machine.Morphology.HermitCrab/MemoizedCombinationRuleCascade.cs b/src/SIL.Machine.Morphology.HermitCrab/MemoizedCombinationRuleCascade.cs
new file mode 100644
index 000000000..7dc392b7f
--- /dev/null
+++ b/src/SIL.Machine.Morphology.HermitCrab/MemoizedCombinationRuleCascade.cs
@@ -0,0 +1,145 @@
+using System.Collections.Generic;
+using SIL.Machine.Rules;
+
+namespace SIL.Machine.Morphology.HermitCrab
+{
+    /// <summary>
+    /// Drop-in replacement for the sequential <see cref="CombinationRuleCascade{TData,TOffset}"/> on
+    /// Unordered-order analysis strata (parse-optimization.md Phases 2-3). Before expanding a node, checks
+    /// whether an earlier expansion elsewhere in the same word's analysis -- reached via a different
+    /// unapplication order, but with an equal <see cref="AnalysisStateKey"/> -- already searched this
+    /// exact state:
+    /// <list type="bullet">
+    /// <item>proved empty (Phase 2's "nogood" case) -> skip straight to "no results";</item>
+    /// <item>produced results (Phase 3) -> replay them (<see cref="Word.ReplayOnto"/>) instead of
+    /// re-searching: clone each stored result and graft the CURRENT arrival's own trail/non-head prefix
+    /// onto the stored subtree-local suffix (see <see cref="MemoEntry"/> and <see cref="Word.ReplayOnto"/>
+    /// for why only the prefix -- never the suffix -- needs replacing).</item>
+    /// </list>
+    /// Measured on the real Sena grammar's worst word (cinacemerwa), Phase 2 alone (nogoods only): 523,774
+    /// node expansions, 91.1% nogood-cache hits, 254s -> 192s wall clock (~24%) -- real but well below the
+    /// node-count reduction, because a nogood hit is a cheap dictionary lookup while the remaining ~9% of
+    /// productive nodes still paid full FST-match + clone cost on every revisit. Phase 3 targets exactly
+    /// that remaining cost: a state that DOES yield results is typically revisited hundreds to thousands of
+    /// times (the single worst atawirambo state was re-expanded 7,200x), and each revisit was a full
+    /// re-search before this change.
+    ///
+    /// Scoped to the sequential cascade only. The parallel cascade
+    /// (<c>ParallelCombinationRuleCascade</c>, used when <see cref="Morpher.MaxDegreeOfParallelism"/> > 1)
+    /// is a level-by-level breadth-first walk with no natural "this subtree is fully expanded" moment to
+    /// hang a memo write on, so it is left unmemoized for now -- callers that want this optimization
+    /// should construct their <see cref="Morpher"/> with <c>maxDegreeOfParallelism: 1</c> (the mode the
+    /// constructor's own doc comment already recommends for a caller that parallelizes across words
+    /// itself, which describes exactly the corpus-batch workloads this optimization targets).
+    /// </summary>
+    internal class MemoizedCombinationRuleCascade : RuleCascade<Word, int>
+    {
+        private readonly Morpher _morpher;
+
+        public MemoizedCombinationRuleCascade(
+            IEnumerable<IRule<Word, int>> rules,
+            IEqualityComparer<Word> comparer,
+            Morpher morpher
+        )
+            : base(rules, true, comparer)
+        {
+            _morpher = morpher;
+        }
+
+        public override IEnumerable<Word> Apply(Word input)
+        {
+            var output = new HashSet<Word>(Comparer);
+            ApplyRules(input, output);
+            AddRuleStats(output.Count);
+            return output;
+        }
+
+        // Returns every result produced strictly within the subtree rooted at `input` (i.e. by applying one
+        // or more rules starting from `input`, at any depth) -- NOT including `input` itself. This is both
+        // the return value callers use and the value memoized against `input`'s AnalysisStateKey once the
+        // subtree finishes, so a later differently-ordered arrival at the same state can replay it via
+        // Word.ReplayOnto instead of re-searching.
+        private List<Word> ApplyRules(Word input, HashSet<Word> output)
+        {
+            AnalysisScope scope = input.AnalysisScope;
+            // Null while tracing (Morpher.ParseWord skips allocating one) or for words never routed through
+            // ParseWord at all (e.g. rule-level unit tests) -- fall back to unmemoized behavior rather than
+            // throw, per the ground rule that tracing must stay byte-identical to the unmemoized engine.
+            if (scope == null)
+                return ApplyRulesRaw(input, output);
+
+            var key = new AnalysisStateKey(input);
+
+            if (scope.Memo.TryGetValue(key, out MemoEntry entry))
+            {
+                var replayed = new List<Word>(entry.Results.Count);
+                foreach (Word storedResult in entry.Results)
+                {
+                    Word replay = storedResult.ReplayOnto(input, entry.MruleTrailPrefixLength, entry.NonHeadPrefixLength);
+                    output.Add(replay);
+                    replayed.Add(replay);
+                }
+                return replayed;
+            }
+
+            // In-flight re-entry guard: a multiApp cascade can reach the same state again while its own
+            // first expansion is still on the stack (e.g. a self-loop through a rule that returns its input
+            // unchanged via a different route). Rather than read a nonexistent/partial entry or deadlock,
+            // fall through to a plain unmemoized expansion for just this arrival -- correctness-neutral,
+            // it only forgoes memoization for the one re-entrant call.
+            if (!scope.InProgress.TryAdd(key, 0))
+                return ApplyRulesRaw(input, output);
+
+            List<Word> results;
+            try
+            {
+                results = ApplyRulesRaw(input, output);
+            }
+            finally
+            {
+                scope.InProgress.TryRemove(key, out _);
+            }
+
+            // OOM guard (parse-optimization.md Phase 3): past the cap, keep searching correctly, just stop
+            // growing the table. No "exhausted" bookkeeping is needed here -- this branch has no
+            // step/time-budget infrastructure, so `results` always reflects a fully-completed subtree.
+            if (scope.HasMemoCapacity)
+                scope.Memo.TryAdd(key, new MemoEntry(results, input.MorphologicalRuleTrailLength, input.NonHeadCount));
+
+            return results;
+        }
+
+        private List<Word> ApplyRulesRaw(Word input, HashSet<Word> output)
+        {
+            var local = new List<Word>();
+            for (int i = 0; i < Rules.Count; i++)
+            {
+                foreach (Word result in ApplyRule(Rules[i], i, input))
+                {
+                    local.Add(result);
+                    output.Add(result);
+                    // avoid infinite loop -- same guard CombinationRuleCascade uses
+                    if (Comparer.Equals(input, result))
+                        continue;
+
+                    // parse-optimization.md Phase 5: emission above is cheap and deeper strata/templates
+                    // may still transform this result, so it always gets recorded regardless -- only
+                    // DESCENT (the potentially exponential part) is gated. A result whose shape can't
+                    // reach any root in its own or a deeper stratum can never bottom out in a valid
+                    // analysis no matter how the rest of the cascade proceeds.
+                    if (
+                        result.AnalysisScope != null
+                        && result.AnalysisScope.LexicalGatingActive
+                        && !_morpher.HasReachableRoot(result)
+                    )
+                    {
+                        continue;
+                    }
+
+                    local.AddRange(ApplyRules(result, output));
+                }
+            }
+            return local;
+        }
+    }
+}
diff --git a/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs b/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs
index 10cdc45c6..222953051 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs
@@ -16,12 +16,37 @@
 
 namespace SIL.Machine.Morphology.HermitCrab
 {
+    /// <remarks>
+    /// <para>
+    /// <b>Corpus-batch hosts running Server GC MUST set a heap hard limit</b> (e.g.
+    /// <c>DOTNET_GCHeapHardLimit</c> or <c>GCHeapHardLimitPercent</c>) when parallelizing across
+    /// words (parse-optimization.md Phase 8, see also <see cref="Morpher(ITraceManager, Language,
+    /// int)"/>'s <c>maxDegreeOfParallelism</c> remarks). Measured 2026-07-03: 16-way concurrency
+    /// with Server GC and no limit reached 45GB on a 64GB host and had to be killed; the same
+    /// workload with <c>DOTNET_GCHeapHardLimit=0x600000000</c> (24GB) completed. A follow-up
+    /// measurement (13 of the heaviest known Sena words, all running concurrently at once --
+    /// a harder case than a real mixed corpus, where lighter words finish early and relieve
+    /// pressure) found this is not always free: wall-clock rose ~30-45% under the same limit
+    /// versus unlimited (e.g. one word went 96.9s → 130.8s) even though every word still
+    /// completed and results stayed byte-identical. The blowup is <em>not</em> the per-parse memo
+    /// tables (<see cref="AnalysisScope.Memo"/>/<see cref="AnalysisScope.TemplateMemo"/>) retaining
+    /// too much -- measured at 6K-8K and 35K-58K stored <see cref="Word"/> instances respectively
+    /// for the heaviest known words, tens of MB at most given <see cref="Shape"/>'s and
+    /// <see cref="FeatureStruct"/>'s copy-on-write sharing -- it is Server GC deferring collection
+    /// of the much larger volume of transient search/replay garbage for throughput, under
+    /// concurrent heavy-word pressure. Set a limit sized to what the host can spare, and expect a
+    /// real (not cosmetic) throughput/memory trade-off under sustained all-heavy concurrent load;
+    /// do not assume the limit is a free safety net on every workload shape.
+    /// </para>
+    /// </remarks>
     public class Morpher : IMorphologicalAnalyzer, IMorphologicalGenerator
     {
         private readonly Language _lang;
         private readonly IRule<Word, int> _analysisRule;
         private readonly IRule<Word, int> _synthesisRule;
         private readonly Dictionary<Stratum, RootAllomorphTrie> _allomorphTries;
+        private readonly Dictionary<Stratum, List<RootAllomorphTrie>> _reachabilityTries;
+        private readonly bool _lexicalGatingQualified;
         private readonly ITraceManager _traceManager;
         private readonly ReadOnlyObservableCollection<Morpheme> _morphemes;
         private readonly IList<RootAllomorph> _lexicalPatterns = new List<RootAllomorph>();
@@ -63,8 +88,23 @@ public Morpher(ITraceManager traceManager, Language lang, int maxDegreeOfParalle
                 morphemes.AddRange(stratum.MorphologicalRules.OfType<AffixProcessRule>());
                 morphemes.AddRange(stratum.AffixTemplates.SelectMany(t => t.Slots).SelectMany(s => s.Rules).Distinct());
             }
+            // parse-optimization.md Phase 5: for each stratum, the tries of itself and every stratum
+            // "deeper" than it -- deeper meaning closer to the root, i.e. earlier in Language.Strata's own
+            // (root-most-first) order, since AnalysisLanguageRule walks strata in the OPPOSITE order
+            // (Reverse(), surface-first) and a candidate currently at stratum S can still be transformed
+            // by every stratum S has yet to reach on its way to the root.
+            _reachabilityTries = new Dictionary<Stratum, List<RootAllomorphTrie>>();
+            var soFar = new List<RootAllomorphTrie>();
+            foreach (Stratum stratum in _lang.Strata)
+            {
+                soFar.Add(_allomorphTries[stratum]);
+                _reachabilityTries[stratum] = new List<RootAllomorphTrie>(soFar);
+            }
+            _lexicalGatingQualified = GrammarAnalyzer.IsEdgeStripperQualified(_lang);
+
             _analysisRule = lang.CompileAnalysisRule(this);
             _synthesisRule = lang.CompileSynthesisRule(this);
+            ((InstrumentedRule<Word, int>)_synthesisRule).Name = "Synthesis";
             MaxStemCount = 2;
             MaxUnapplications = 0;
             MergeEquivalentAnalyses = true;
@@ -81,6 +121,56 @@ public ITraceManager TraceManager
 
         public int DeletionReapplications { get; set; }
 
+        private int? _maxAnalysisLengthOverride;
+        private bool _maxAnalysisLengthOverrideSet;
+
+        /// <summary>
+        /// The longest underlying form (in real segments, i.e. <see cref="HermitCrabExtensions.SegmentCount"/>)
+        /// any analysis candidate can be before it is pruned as unreachable (parse-optimization.md Phase 4's
+        /// "Gate B") -- auto-derived from the grammar (<see cref="GrammarAnalyzer.ComputeMaxAnalysisLength"/>:
+        /// the longest lexicon root plus every rule's own maximum possible insertion) unless explicitly set.
+        /// Setting this (including to <c>null</c>, which disables the gate entirely) overrides the
+        /// auto-derived value; re-derived fresh from the current grammar and <see cref="DeletionReapplications"/>
+        /// on every read otherwise, so it never goes stale if either changes after construction. Auto-derives
+        /// to <c>null</c> (gate off) when the grammar contains a compounding rule or a phonological rule shape
+        /// this analysis can't measure exactly -- see <see cref="GrammarAnalyzer"/>'s remarks.
+        /// </summary>
+        public int? MaxAnalysisLength
+        {
+            get
+            {
+                return _maxAnalysisLengthOverrideSet
+                    ? _maxAnalysisLengthOverride
+                    : GrammarAnalyzer.ComputeMaxAnalysisLength(_lang, DeletionReapplications);
+            }
+            set
+            {
+                _maxAnalysisLengthOverride = value;
+                _maxAnalysisLengthOverrideSet = true;
+            }
+        }
+
+        /// <summary>
+        /// parse-optimization.md Phase 5: prune an analysis subtree before descending into it when no root
+        /// in the current stratum (or any stratum deeper than it) can match ANY contiguous window of the
+        /// candidate's current shape -- see <see cref="GrammarAnalyzer.IsEdgeStripperQualified"/> and
+        /// <see cref="RootAllomorphTrie.ContainsRootAnywhere"/>. <b>Default off</b>, as the plan requires:
+        /// even when set, the gate only actually activates for a given parse when the grammar itself
+        /// qualifies (<see cref="GrammarAnalyzer.IsEdgeStripperQualified"/>, checked once at construction)
+        /// and the call isn't tracing or root-guessing (<see cref="ParseWord(string, out object, bool)"/>'s
+        /// <c>guessRoot</c> synthesizes from lexical PATTERNS, bypassing the real lexicon entirely, so a
+        /// real-lexicon reachability gate would be unsound applied to it). This is the plan's own
+        /// highest-risk phase; turn on only after the corpus A/B protocol in parse-optimization.md's Phase
+        /// 5 section holds for your grammar.
+        /// </summary>
+        public bool EnableLexicalGating { get; set; }
+
+        /// <summary>Reachability check backing <see cref="EnableLexicalGating"/> -- see its remarks.</summary>
+        internal bool HasReachableRoot(Word word)
+        {
+            return _reachabilityTries[word.Stratum].Any(trie => trie.ContainsRootAnywhere(word.Shape));
+        }
+
         public int MaxStemCount { get; set; }
 
         /// <summary>
@@ -110,6 +200,21 @@ public Language Language
             get { return _lang; }
         }
 
+        /// <summary>
+        /// When true, ParseWord does not clear rule stats (InstrumentedRule.InputCount/OutputCount/
+        /// ElapsedTime/BucketGroups) at the start of each parse, so they accumulate across an entire corpus
+        /// batch instead of reflecting only the most recent word. Off by default: existing single-word
+        /// callers (e.g. an interactive "why didn't this parse" UI) expect ClearStats every call. The rule
+        /// tree is shared across every ParseWord call on this Morpher, so a caller enabling this on a Morpher
+        /// used from multiple threads is responsible for keeping calls single-threaded (see
+        /// MemoizedCombinationRuleCascade's doc comment on maxDegreeOfParallelism: 1 for corpus-batch runs).
+        /// </summary>
+        public bool AccumulateRuleStats { get; set; }
+
+        public InstrumentedRule<Word, int> AnalysisRuleStats => _analysisRule as InstrumentedRule<Word, int>;
+
+        public InstrumentedRule<Word, int> SynthesisRuleStats => _synthesisRule as InstrumentedRule<Word, int>;
+
         /// <summary>
         /// Parses the specified surface form.
         /// </summary>
@@ -133,11 +238,30 @@ public IEnumerable<Word> ParseWord(string word, out object trace, bool guessRoot
             Shape shape = _lang.SurfaceStratum.CharacterDefinitionTable.Segment(word);
 
             var input = new Word(_lang.SurfaceStratum, shape);
+            // Skipped while tracing: the nogood cascade this backs skips expansions outright on a hit,
+            // which would also skip the trace events those expansions fire (parse-optimization.md Phase 2
+            // ground rules -- traces must stay byte-identical to the unmemoized engine).
+            if (!_traceManager.IsTracing)
+            {
+                // Phase 5's lexical gate is unsound under guessRoot (it synthesizes from lexical PATTERNS,
+                // bypassing the real lexicon the gate's reachability index is built from) -- this check
+                // covers the whole parse, not just guessRoot's own fallback branch further down, since the
+                // gate would otherwise have already pruned candidates during _analysisRule.Apply below,
+                // before guessRoot's branch ever runs.
+                bool lexicalGatingActive = EnableLexicalGating && _lexicalGatingQualified && !guessRoot;
+                input.AnalysisScope = new AnalysisScope(lexicalGatingActive);
+            }
             input.Freeze();
             if (_traceManager.IsTracing)
                 _traceManager.AnalyzeWord(_lang, input);
             trace = input.CurrentTrace;
 
+            if (!AccumulateRuleStats)
+            {
+                AnalysisRuleStats?.ClearStats();
+                SynthesisRuleStats?.ClearStats();
+            }
+
             // Unapply rules
             IList<Word> analyses = _analysisRule.Apply(input).ToList();
 
@@ -340,6 +464,15 @@ private IEnumerable<Word> Synthesize(string word, IList<Word> analyses)
 
         private IEnumerable<Word> SynthesizeAnalysis(string word, Word analysisWord)
         {
+            // Gate A from parse-optimization.md's Phase 4 sketch (pre-phonology length-vs-target pruning)
+            // was attempted and reverted here: `alternative` at this point is still essentially the bare
+            // root allomorph -- the pending affix trail's own insertions haven't been applied yet, they
+            // happen inside _synthesisRule.Apply below alongside phonology -- so comparing its length to
+            // the target surface length without also accounting for that trail's own insertions produced
+            // false rejections (confirmed against the unit suite: CompoundingRuleTests/MetathesisRuleTests
+            // regressed). A correct version would need to sum each pending trail rule's own max insertion
+            // (GrammarAnalyzer already computes this per-rule for Gate B) rather than compare bare-root
+            // length directly -- left as follow-up, not attempted this pass.
             foreach (Word synthesisWord in LexicalLookup(analysisWord))
             {
                 foreach (Word alternative in synthesisWord.ExpandAlternatives())
diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisAffixProcessRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisAffixProcessRule.cs
index b9f6d4acc..82881fb36 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisAffixProcessRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisAffixProcessRule.cs
@@ -7,7 +7,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules
 {
-    public class AnalysisAffixProcessRule : IRule<Word, int>
+    public class AnalysisAffixProcessRule : InstrumentedRule<Word, int>
     {
         private readonly Morpher _morpher;
         private readonly AffixProcessRule _rule;
@@ -15,6 +15,7 @@ public class AnalysisAffixProcessRule : IRule<Word, int>
 
         public AnalysisAffixProcessRule(Morpher morpher, AffixProcessRule rule)
         {
+            Name = rule.Name;
             _morpher = morpher;
             _rule = rule;
 
@@ -37,7 +38,7 @@ public AnalysisAffixProcessRule(Morpher morpher, AffixProcessRule rule)
             }
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
             if (!_morpher.RuleSelector(_rule))
                 return Enumerable.Empty<Word>();
@@ -75,11 +76,21 @@ public IEnumerable<Word> Apply(Word input)
                         _morpher.TraceManager.MorphologicalRuleUnapplied(_rule, i, input, outWord);
                     output.Add(outWord);
                     unapplied = true;
+
+                    if (_morpher.AccumulateRuleStats)
+                    {
+                        string example = RuleStatsHelper.Example(input);
+                        RecordBucket(RuleStatsHelper.AllomorphGroup, i.ToString(), example);
+                        RecordBucket(RuleStatsHelper.CategoryGroup, RuleStatsHelper.Category(input), example);
+                        RecordBucket(RuleStatsHelper.StemNameGroup, RuleStatsHelper.StemName(input), example);
+                        RecordBucket(RuleStatsHelper.RootDirectGroup, RuleStatsHelper.IsRootDirect(input), example);
+                    }
                 }
 
                 if (_morpher.TraceManager.IsTracing && !unapplied)
                     _morpher.TraceManager.MorphologicalRuleNotUnapplied(_rule, i, input);
             }
+            AddRuleStats(output.Count);
             return output;
         }
     }
diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisCompoundingRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisCompoundingRule.cs
index b5013d4ee..eac07f24f 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisCompoundingRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisCompoundingRule.cs
@@ -7,7 +7,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules
 {
-    public class AnalysisCompoundingRule : IRule<Word, int>
+    public class AnalysisCompoundingRule : InstrumentedRule<Word, int>
     {
         private readonly Morpher _morpher;
         private readonly CompoundingRule _rule;
@@ -15,6 +15,7 @@ public class AnalysisCompoundingRule : IRule<Word, int>
 
         public AnalysisCompoundingRule(Morpher morpher, CompoundingRule rule)
         {
+            Name = rule.Name;
             _morpher = morpher;
             _rule = rule;
 
@@ -37,7 +38,7 @@ public AnalysisCompoundingRule(Morpher morpher, CompoundingRule rule)
             }
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
             if (!_morpher.RuleSelector(_rule))
                 return Enumerable.Empty<Word>();
@@ -146,12 +147,25 @@ RootAllomorph allo in _morpher.SearchRootAllomorphs(_rule.Stratum, outWord.Curre
                         _morpher.TraceManager.MorphologicalRuleUnapplied(_rule, i, input, outWord);
                     output.Add(outWord);
                     unapplied = true;
+
+                    if (_morpher.AccumulateRuleStats)
+                    {
+                        string example = RuleStatsHelper.Example(input);
+                        RecordBucket(RuleStatsHelper.AllomorphGroup, i.ToString(), example);
+                        RecordBucket(RuleStatsHelper.CategoryGroup, RuleStatsHelper.Category(input), example);
+                        RecordBucket(
+                            RuleStatsHelper.NonHeadCategoryGroup,
+                            RuleStatsHelper.Category(outWord.CurrentNonHead),
+                            example
+                        );
+                    }
                 }
 
                 if (_morpher.TraceManager.IsTracing && !unapplied)
                     _morpher.TraceManager.MorphologicalRuleNotUnapplied(_rule, i, input);
             }
 
+            AddRuleStats(output.Count);
             return output;
         }
     }
diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisRealizationalAffixProcessRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisRealizationalAffixProcessRule.cs
index 031c6fbad..298e31aa6 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisRealizationalAffixProcessRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisRealizationalAffixProcessRule.cs
@@ -7,7 +7,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules
 {
-    public class AnalysisRealizationalAffixProcessRule : IRule<Word, int>
+    public class AnalysisRealizationalAffixProcessRule : InstrumentedRule<Word, int>
     {
         private readonly Morpher _morpher;
         private readonly RealizationalAffixProcessRule _rule;
@@ -15,6 +15,7 @@ public class AnalysisRealizationalAffixProcessRule : IRule<Word, int>
 
         public AnalysisRealizationalAffixProcessRule(Morpher morpher, RealizationalAffixProcessRule rule)
         {
+            Name = rule.Name;
             _morpher = morpher;
             _rule = rule;
 
@@ -37,7 +38,7 @@ public AnalysisRealizationalAffixProcessRule(Morpher morpher, RealizationalAffix
             }
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
             if (!_morpher.RuleSelector(_rule))
                 return Enumerable.Empty<Word>();
@@ -60,11 +61,21 @@ public IEnumerable<Word> Apply(Word input)
                         _morpher.TraceManager.MorphologicalRuleUnapplied(_rule, i, input, outWord);
                     output.Add(outWord);
                     unapplied = true;
+
+                    if (_morpher.AccumulateRuleStats)
+                    {
+                        string example = RuleStatsHelper.Example(input);
+                        RecordBucket(RuleStatsHelper.AllomorphGroup, i.ToString(), example);
+                        RecordBucket(RuleStatsHelper.CategoryGroup, RuleStatsHelper.Category(input), example);
+                        RecordBucket(RuleStatsHelper.StemNameGroup, RuleStatsHelper.StemName(input), example);
+                        RecordBucket(RuleStatsHelper.RootDirectGroup, RuleStatsHelper.IsRootDirect(input), example);
+                    }
                 }
 
                 if (_morpher.TraceManager.IsTracing && !unapplied)
                     _morpher.TraceManager.MorphologicalRuleNotUnapplied(_rule, i, input);
             }
+            AddRuleStats(output.Count);
             return output;
         }
     }
diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessRule.cs
index 98a3895d0..7c6c53648 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessRule.cs
@@ -8,7 +8,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules
 {
-    public class SynthesisAffixProcessRule : IRule<Word, int>
+    public class SynthesisAffixProcessRule : InstrumentedRule<Word, int>
     {
         private readonly Morpher _morpher;
         private readonly AffixProcessRule _rule;
@@ -16,6 +16,7 @@ public class SynthesisAffixProcessRule : IRule<Word, int>
 
         public SynthesisAffixProcessRule(Morpher morpher, AffixProcessRule rule)
         {
+            Name = rule.Name;
             _morpher = morpher;
             _rule = rule;
             _rules = new List<PatternRule<Word, int>>();
@@ -38,7 +39,7 @@ public SynthesisAffixProcessRule(Morpher morpher, AffixProcessRule rule)
             }
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
             if (!input.IsMorphologicalRuleApplicable(_rule))
                 return Enumerable.Empty<Word>();
@@ -215,6 +216,15 @@ public IEnumerable<Word> Apply(Word input)
                         _morpher.TraceManager.MorphologicalRuleApplied(_rule, i, input, outWord);
                     output.Add(outWord);
 
+                    if (_morpher.AccumulateRuleStats)
+                    {
+                        string example = RuleStatsHelper.Example(input);
+                        RecordBucket(RuleStatsHelper.AllomorphGroup, i.ToString(), example);
+                        RecordBucket(RuleStatsHelper.CategoryGroup, RuleStatsHelper.Category(input), example);
+                        RecordBucket(RuleStatsHelper.StemNameGroup, RuleStatsHelper.StemName(input), example);
+                        RecordBucket(RuleStatsHelper.RootDirectGroup, RuleStatsHelper.IsRootDirect(input), example);
+                    }
+
                     // return all word syntheses that match subrules that are constrained by environments,
                     // HC violates the disjunctive property of allomorphs here because it cannot check the
                     // environmental constraints until it has a surface form, we will enforce the disjunctive
@@ -237,6 +247,7 @@ public IEnumerable<Word> Apply(Word input)
                 }
             }
 
+            AddRuleStats(output.Count);
             return output;
         }
     }
diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisCompoundingRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisCompoundingRule.cs
index 29e3bd5f3..9ab42884e 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisCompoundingRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisCompoundingRule.cs
@@ -9,7 +9,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules
 {
-    public class SynthesisCompoundingRule : IRule<Word, int>
+    public class SynthesisCompoundingRule : InstrumentedRule<Word, int>
     {
         private readonly Morpher _morpher;
         private readonly CompoundingRule _rule;
@@ -17,6 +17,7 @@ public class SynthesisCompoundingRule : IRule<Word, int>
 
         public SynthesisCompoundingRule(Morpher morpher, CompoundingRule rule)
         {
+            Name = rule.Name;
             _morpher = morpher;
             _rule = rule;
             _subruleMatchers = new List<Tuple<Matcher<Word, int>, Matcher<Word, int>>>();
@@ -42,7 +43,7 @@ private Matcher<Word, int> BuildMatcher(IEnumerable<Pattern<Word, int>> lhs)
             );
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
             if (!input.IsMorphologicalRuleApplicable(_rule))
                 return Enumerable.Empty<Word>();
@@ -209,6 +210,18 @@ public IEnumerable<Word> Apply(Word input)
                             _morpher.TraceManager.MorphologicalRuleApplied(_rule, i, input, outWord);
 
                         output.Add(outWord);
+
+                        if (_morpher.AccumulateRuleStats)
+                        {
+                            string example = RuleStatsHelper.Example(input);
+                            RecordBucket(RuleStatsHelper.AllomorphGroup, i.ToString(), example);
+                            RecordBucket(RuleStatsHelper.CategoryGroup, RuleStatsHelper.Category(input), example);
+                            RecordBucket(
+                                RuleStatsHelper.NonHeadCategoryGroup,
+                                RuleStatsHelper.Category(input.CurrentNonHead),
+                                example
+                            );
+                        }
                         break;
                     }
                     if (_morpher.TraceManager.IsTracing)
@@ -228,6 +241,7 @@ public IEnumerable<Word> Apply(Word input)
                 }
             }
 
+            AddRuleStats(output.Count);
             return output;
         }
 
diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisRealizationalAffixProcessRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisRealizationalAffixProcessRule.cs
index bd1717f82..e9821ec3f 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisRealizationalAffixProcessRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisRealizationalAffixProcessRule.cs
@@ -9,7 +9,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules
 {
-    public class SynthesisRealizationalAffixProcessRule : IRule<Word, int>
+    public class SynthesisRealizationalAffixProcessRule : InstrumentedRule<Word, int>
     {
         private readonly Morpher _morpher;
         private readonly RealizationalAffixProcessRule _rule;
@@ -17,6 +17,7 @@ public class SynthesisRealizationalAffixProcessRule : IRule<Word, int>
 
         public SynthesisRealizationalAffixProcessRule(Morpher morpher, RealizationalAffixProcessRule rule)
         {
+            Name = rule.Name;
             _morpher = morpher;
             _rule = rule;
             _rules = new List<PatternRule<Word, int>>();
@@ -38,7 +39,7 @@ public SynthesisRealizationalAffixProcessRule(Morpher morpher, RealizationalAffi
             }
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
             if (!_morpher.RuleSelector(_rule))
                 return Enumerable.Empty<Word>();
@@ -146,6 +147,15 @@ public IEnumerable<Word> Apply(Word input)
 
                     output.Add(outWord);
 
+                    if (_morpher.AccumulateRuleStats)
+                    {
+                        string example = RuleStatsHelper.Example(input);
+                        RecordBucket(RuleStatsHelper.AllomorphGroup, i.ToString(), example);
+                        RecordBucket(RuleStatsHelper.CategoryGroup, RuleStatsHelper.Category(input), example);
+                        RecordBucket(RuleStatsHelper.StemNameGroup, RuleStatsHelper.StemName(input), example);
+                        RecordBucket(RuleStatsHelper.RootDirectGroup, RuleStatsHelper.IsRootDirect(input), example);
+                    }
+
                     // return all word syntheses that match subrules that are constrained by environments,
                     // HC violates the disjunctive property of allomorphs here because it cannot check the
                     // environmental constraints until it has a surface form, we will enforce the disjunctive
@@ -168,6 +178,7 @@ public IEnumerable<Word> Apply(Word input)
                 }
             }
 
+            AddRuleStats(output.Count);
             return output;
         }
 
diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisMetathesisRule.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisMetathesisRule.cs
index 5d160243f..f17fd671b 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisMetathesisRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisMetathesisRule.cs
@@ -8,7 +8,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules
 {
-    public class AnalysisMetathesisRule : IRule<Word, int>
+    public class AnalysisMetathesisRule : InstrumentedRule<Word, int>
     {
         private readonly Morpher _morpher;
         private readonly MetathesisRule _rule;
@@ -16,6 +16,7 @@ public class AnalysisMetathesisRule : IRule<Word, int>
 
         public AnalysisMetathesisRule(Morpher morpher, MetathesisRule rule)
         {
+            Name = rule.Name;
             _morpher = morpher;
             _rule = rule;
 
@@ -35,7 +36,7 @@ public AnalysisMetathesisRule(Morpher morpher, MetathesisRule rule)
             _patternRule = new IterativePhonologicalPatternRule(ruleSpec, settings);
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
             if (!_morpher.RuleSelector(_rule))
                 return Enumerable.Empty<Word>();
@@ -48,11 +49,20 @@ public IEnumerable<Word> Apply(Word input)
             {
                 if (_morpher.TraceManager.IsTracing)
                     _morpher.TraceManager.PhonologicalRuleUnapplied(_rule, -1, origInput, input);
+
+                if (_morpher.AccumulateRuleStats)
+                {
+                    string example = RuleStatsHelper.Example(input);
+                    RecordBucket(RuleStatsHelper.CategoryGroup, RuleStatsHelper.Category(input), example);
+                    RecordBucket(RuleStatsHelper.RootDirectGroup, RuleStatsHelper.IsRootDirect(input), example);
+                }
+                AddRuleStats(1);
                 return input.ToEnumerable();
             }
 
             if (_morpher.TraceManager.IsTracing)
                 _morpher.TraceManager.PhonologicalRuleNotUnapplied(_rule, -1, input);
+            AddRuleStats(0);
             return Enumerable.Empty<Word>();
         }
     }
diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisRewriteRule.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisRewriteRule.cs
index e691b4c0a..1e311fad2 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisRewriteRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisRewriteRule.cs
@@ -10,7 +10,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules
 {
-    public class AnalysisRewriteRule : IRule<Word, int>
+    public class AnalysisRewriteRule : InstrumentedRule<Word, int>
     {
         private enum ReapplyType
         {
@@ -25,6 +25,7 @@ private enum ReapplyType
 
         public AnalysisRewriteRule(Morpher morpher, RewriteRule rule)
         {
+            Name = rule.Name;
             _morpher = morpher;
             _rule = rule;
 
@@ -118,7 +119,7 @@ private static bool IsUnifiable(Constraint<Word, int> constraint, Pattern<Word,
             return true;
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
             if (!_morpher.RuleSelector(_rule))
                 return Enumerable.Empty<Word>();
@@ -173,6 +174,14 @@ public IEnumerable<Word> Apply(Word input)
                     if (_morpher.TraceManager.IsTracing)
                         _morpher.TraceManager.PhonologicalRuleUnapplied(_rule, i, origInput, input);
                     applied = true;
+
+                    if (_morpher.AccumulateRuleStats)
+                    {
+                        string example = RuleStatsHelper.Example(input);
+                        RecordBucket(RuleStatsHelper.SubruleGroup, i.ToString(), example);
+                        RecordBucket(RuleStatsHelper.CategoryGroup, RuleStatsHelper.Category(input), example);
+                        RecordBucket(RuleStatsHelper.RootDirectGroup, RuleStatsHelper.IsRootDirect(input), example);
+                    }
                 }
                 else if (_morpher.TraceManager.IsTracing)
                 {
@@ -180,6 +189,7 @@ public IEnumerable<Word> Apply(Word input)
                 }
             }
 
+            AddRuleStats(applied ? 1 : 0);
             if (applied)
                 return input.ToEnumerable();
             return Enumerable.Empty<Word>();
diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisMetathesisRule.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisMetathesisRule.cs
index 2d8c3af5a..70bb1abe6 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisMetathesisRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisMetathesisRule.cs
@@ -7,7 +7,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules
 {
-    public class SynthesisMetathesisRule : IRule<Word, int>
+    public class SynthesisMetathesisRule : InstrumentedRule<Word, int>
     {
         private readonly Morpher _morpher;
         private readonly MetathesisRule _rule;
@@ -15,6 +15,7 @@ public class SynthesisMetathesisRule : IRule<Word, int>
 
         public SynthesisMetathesisRule(Morpher morpher, MetathesisRule rule)
         {
+            Name = rule.Name;
             _morpher = morpher;
             _rule = rule;
 
@@ -32,7 +33,7 @@ public SynthesisMetathesisRule(Morpher morpher, MetathesisRule rule)
             _patternRule = new IterativePhonologicalPatternRule(ruleSpec, settings);
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
             if (!_morpher.RuleSelector(_rule))
                 return Enumerable.Empty<Word>();
@@ -45,11 +46,20 @@ public IEnumerable<Word> Apply(Word input)
             {
                 if (_morpher.TraceManager.IsTracing)
                     _morpher.TraceManager.PhonologicalRuleApplied(_rule, -1, origInput, input);
+
+                if (_morpher.AccumulateRuleStats)
+                {
+                    string example = RuleStatsHelper.Example(input);
+                    RecordBucket(RuleStatsHelper.CategoryGroup, RuleStatsHelper.Category(input), example);
+                    RecordBucket(RuleStatsHelper.RootDirectGroup, RuleStatsHelper.IsRootDirect(input), example);
+                }
+                AddRuleStats(1);
                 return input.ToEnumerable();
             }
 
             if (_morpher.TraceManager.IsTracing)
                 _morpher.TraceManager.PhonologicalRuleNotApplied(_rule, -1, input, FailureReason.Pattern, null);
+            AddRuleStats(0);
             return Enumerable.Empty<Word>();
         }
     }
diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteRule.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteRule.cs
index ecf84a7dc..827ed5cae 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteRule.cs
@@ -8,7 +8,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules
 {
-    public class SynthesisRewriteRule : IRule<Word, int>
+    public class SynthesisRewriteRule : InstrumentedRule<Word, int>
     {
         private readonly Morpher _morpher;
         private readonly RewriteRule _rule;
@@ -16,6 +16,7 @@ public class SynthesisRewriteRule : IRule<Word, int>
 
         public SynthesisRewriteRule(Morpher morpher, RewriteRule rule)
         {
+            Name = rule.Name;
             _morpher = morpher;
             _rule = rule;
 
@@ -48,17 +49,17 @@ public SynthesisRewriteRule(Morpher morpher, RewriteRule rule)
             }
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
             if (!_morpher.RuleSelector(_rule))
                 return Enumerable.Empty<Word>();
 
             Word origInput = null;
+            bool collectResults = _morpher.TraceManager.IsTracing || _morpher.AccumulateRuleStats;
             if (_morpher.TraceManager.IsTracing)
-            {
                 origInput = input.Clone();
+            if (collectResults)
                 input.CurrentRuleResults = new Dictionary<int, Tuple<FailureReason, object>>();
-            }
 
             bool applied = _patternRule.Apply(input).Any();
 
@@ -81,8 +82,29 @@ public IEnumerable<Word> Apply(Word input)
                         _morpher.TraceManager.PhonologicalRuleNotApplied(_rule, i, input, FailureReason.Pattern, null);
                     }
                 }
-                input.CurrentRuleResults = null;
             }
+
+            if (applied && _morpher.AccumulateRuleStats)
+            {
+                string example = RuleStatsHelper.Example(input);
+                for (int i = 0; i < _rule.Subrules.Count; i++)
+                {
+                    if (
+                        input.CurrentRuleResults.TryGetValue(i, out Tuple<FailureReason, object> reason)
+                        && reason.Item1 == FailureReason.None
+                    )
+                    {
+                        RecordBucket(RuleStatsHelper.SubruleGroup, i.ToString(), example);
+                        break;
+                    }
+                }
+                RecordBucket(RuleStatsHelper.CategoryGroup, RuleStatsHelper.Category(input), example);
+                RecordBucket(RuleStatsHelper.RootDirectGroup, RuleStatsHelper.IsRootDirect(input), example);
+            }
+            if (collectResults)
+                input.CurrentRuleResults = null;
+
+            AddRuleStats(applied ? 1 : 0);
             if (applied)
                 return input.ToEnumerable();
             return Enumerable.Empty<Word>();
diff --git a/src/SIL.Machine.Morphology.HermitCrab/RootAllomorphTrie.cs b/src/SIL.Machine.Morphology.HermitCrab/RootAllomorphTrie.cs
index 7d34e81a5..ac42a1420 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/RootAllomorphTrie.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/RootAllomorphTrie.cs
@@ -77,5 +77,25 @@ public IEnumerable<RootAllomorph> Search(Shape shape)
                     yield return _allomorphs[match.ID];
             }
         }
+
+        /// <summary>
+        /// parse-optimization.md Phase 5: does some root allomorph in this trie match ANY contiguous
+        /// window of <paramref name="shape"/> (start anchored per attempt, end NOT anchored -- a root is
+        /// typically shorter than the remaining unstripped candidate) -- unlike <see cref="Search"/>,
+        /// which only checks a match starting at the shape's first segment and consuming to its end (the
+        /// bare-root-after-full-unapplication case <see cref="Morpher.SearchRootAllomorphs"/> uses this
+        /// for). Used as a cheap admissibility check before descending into an analysis subtree: if no
+        /// window matches anywhere, no root in this stratum can ever be reached from here.
+        /// </summary>
+        public bool ContainsRootAnywhere(Shape shape)
+        {
+            foreach (Annotation<ShapeNode> startAnn in shape.Annotations.Where(ann => _filter(ann)))
+            {
+                IEnumerable<FstResult<Shape, ShapeNode>> matches;
+                if (_fsa.Transduce(shape, startAnn, null, true, false, false, out matches) && matches.Any())
+                    return true;
+            }
+            return false;
+        }
     }
 }
diff --git a/src/SIL.Machine.Morphology.HermitCrab/RuleStatsHelper.cs b/src/SIL.Machine.Morphology.HermitCrab/RuleStatsHelper.cs
new file mode 100644
index 000000000..a1707d2f4
--- /dev/null
+++ b/src/SIL.Machine.Morphology.HermitCrab/RuleStatsHelper.cs
@@ -0,0 +1,48 @@
+using System.Linq;
+using SIL.Machine.FeatureModel;
+
+namespace SIL.Machine.Morphology.HermitCrab
+{
+    /// <summary>
+    /// Builds the bucket keys/examples that InstrumentedRule.RecordBucket stores, so a corpus-wide stats run
+    /// can answer questions like "does this rule only ever fire on verbs" or "does this allomorph only ever
+    /// attach to a bare stem" from real parse traffic rather than re-reading the grammar XML by hand.
+    /// RootAllomorph/StemName are only meaningfully populated on the synthesis side (analysis doesn't know
+    /// the root's lexical identity until the derivation bottoms out), so those groups read "(none)" for most
+    /// analysis-direction calls -- that is itself useful signal, not a bug.
+    /// </summary>
+    internal static class RuleStatsHelper
+    {
+        public const string CategoryGroup = "category";
+        public const string StemNameGroup = "stemName";
+        public const string AllomorphGroup = "allomorph";
+        public const string RootDirectGroup = "rootDirect";
+        public const string SubruleGroup = "subrule";
+        public const string NonHeadCategoryGroup = "nonHeadCategory";
+
+        public static string Category(Word word)
+        {
+            FeatureSymbol pos = word.SyntacticFeatureStruct?.PartsOfSpeech().FirstOrDefault();
+            return pos?.ID ?? "(none)";
+        }
+
+        public static string StemName(Word word)
+        {
+            return word.RootAllomorph?.StemName?.Name ?? "(none)";
+        }
+
+        // "true" = this application's input had no morphological rules recorded on it yet -- for synthesis
+        // that means the affix/phonological rule fired directly against the bare stem; for analysis it means
+        // this was the innermost/first rule unapplied. Either reading answers "does this only ever touch the
+        // stem, or does it also apply once other affixes are already present."
+        public static string IsRootDirect(Word word)
+        {
+            return word.MorphologicalRules.Any() ? "false" : "true";
+        }
+
+        public static string Example(Word word)
+        {
+            return word.ToString();
+        }
+    }
+}
diff --git a/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplateRule.cs b/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplateRule.cs
index 21248d002..a69f4cc85 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplateRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplateRule.cs
@@ -6,7 +6,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab
 {
-    internal class SynthesisAffixTemplateRule : IRule<Word, int>
+    internal class SynthesisAffixTemplateRule : InstrumentedRule<Word, int>
     {
         private readonly Morpher _morpher;
         private readonly AffixTemplate _template;
@@ -14,6 +14,7 @@ internal class SynthesisAffixTemplateRule : IRule<Word, int>
 
         public SynthesisAffixTemplateRule(Morpher morpher, AffixTemplate template)
         {
+            Name = template.Name;
             _morpher = morpher;
             _template = template;
             _rules = new List<IRule<Word, int>>(
@@ -23,14 +24,16 @@ public SynthesisAffixTemplateRule(Morpher morpher, AffixTemplate template)
                     FreezableEqualityComparer<Word>.Default
                 ))
             );
+            AddSubRules(_rules);
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
             if (_morpher.TraceManager.IsTracing)
                 _morpher.TraceManager.BeginApplyTemplate(_template, input);
             var output = new HashSet<Word>(FreezableEqualityComparer<Word>.Default);
             ApplySlots(input, 0, output);
+            AddRuleStats(output.Count);
             return output;
         }
 
diff --git a/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplatesRule.cs b/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplatesRule.cs
index a5ab1aa2a..d4329d239 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplatesRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplatesRule.cs
@@ -7,7 +7,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab
 {
-    internal class SynthesisAffixTemplatesRule : IRule<Word, int>
+    internal class SynthesisAffixTemplatesRule : InstrumentedRule<Word, int>
     {
         private readonly Morpher _morpher;
         private readonly Stratum _stratum;
@@ -16,13 +16,15 @@ internal class SynthesisAffixTemplatesRule : IRule<Word, int>
 
         public SynthesisAffixTemplatesRule(Morpher morpher, Stratum stratum)
         {
+            Name = stratum.Name;
             _morpher = morpher;
             _stratum = stratum;
             _templates = stratum.AffixTemplates.ToList();
             _templateRules = _templates.Select(temp => temp.CompileSynthesisRule(morpher)).ToList();
+            AddSubRules(_templateRules);
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
             if (!input.RealizationalFeatureStruct.IsUnifiable(input.SyntacticFeatureStruct))
                 return Enumerable.Empty<Word>();
@@ -74,6 +76,7 @@ public IEnumerable<Word> Apply(Word input)
                 }
             }
 
+            AddRuleStats(output.Count);
             return output;
         }
 
diff --git a/src/SIL.Machine.Morphology.HermitCrab/SynthesisStratumRule.cs b/src/SIL.Machine.Morphology.HermitCrab/SynthesisStratumRule.cs
index 72ff8b24b..20cc206e6 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/SynthesisStratumRule.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/SynthesisStratumRule.cs
@@ -1,4 +1,5 @@
 ﻿using System.Collections.Generic;
+using System.Diagnostics;
 using System.Linq;
 using SIL.Extensions;
 using SIL.Machine.Annotations;
@@ -7,7 +8,7 @@
 
 namespace SIL.Machine.Morphology.HermitCrab
 {
-    internal class SynthesisStratumRule : IRule<Word, int>
+    internal class SynthesisStratumRule : InstrumentedRule<Word, int>
     {
         private readonly IRule<Word, int> _mrulesRule;
         private readonly IRule<Word, int> _prulesRule;
@@ -17,26 +18,25 @@ internal class SynthesisStratumRule : IRule<Word, int>
 
         public SynthesisStratumRule(Morpher morpher, Stratum stratum)
         {
+            Name = stratum.Name;
             _templatesRule = new SynthesisAffixTemplatesRule(morpher, stratum);
             _mrulesRule = null;
-            IEnumerable<IRule<Word, int>> mrules = stratum.MorphologicalRules.Select(mrule =>
-                mrule.CompileSynthesisRule(morpher)
-            );
+            // Paired (not just the compiled rules) so the Unordered cascade can look up the trail-directed
+            // rule instead of probing the whole battery -- see TrailDirectedRuleCascade.
+            var compiledMRules = stratum
+                .MorphologicalRules.Select(mrule => (Rule: mrule, Compiled: mrule.CompileSynthesisRule(morpher)))
+                .ToList();
             switch (stratum.MorphologicalRuleOrder)
             {
                 case MorphologicalRuleOrder.Linear:
                     _mrulesRule = new LinearRuleCascade<Word, int>(
-                        mrules,
+                        compiledMRules.Select(p => p.Compiled),
                         true,
                         FreezableEqualityComparer<Word>.Default
                     );
                     break;
                 case MorphologicalRuleOrder.Unordered:
-                    _mrulesRule = new CombinationRuleCascade<Word, int>(
-                        mrules,
-                        true,
-                        FreezableEqualityComparer<Word>.Default
-                    );
+                    _mrulesRule = new TrailDirectedRuleCascade(compiledMRules, FreezableEqualityComparer<Word>.Default);
                     break;
             }
             _prulesRule = new LinearRuleCascade<Word, int>(
@@ -44,13 +44,17 @@ public SynthesisStratumRule(Morpher morpher, Stratum stratum)
             );
             _stratum = stratum;
             _morpher = morpher;
+            AddSubRule(_mrulesRule);
+            AddSubRule(_prulesRule);
+            AddSubRule(_templatesRule);
         }
 
-        public IEnumerable<Word> Apply(Word input)
+        public override IEnumerable<Word> Apply(Word input)
         {
             if (!_morpher.RuleSelector(_stratum) || input.RootAllomorph.Morpheme.Stratum.Depth > _stratum.Depth)
                 return input.ToEnumerable();
 
+            long startTime = Stopwatch.GetTimestamp();
             if (_morpher.TraceManager.IsTracing)
                 _morpher.TraceManager.BeginApplyStratum(_stratum, input);
 
@@ -88,6 +92,9 @@ public IEnumerable<Word> Apply(Word input)
             }
             if (_morpher.TraceManager.IsTracing && output.Count == 0)
                 _morpher.TraceManager.EndApplyStratum(_stratum, input);
+
+            ElapsedTime += Stopwatch.GetTimestamp() - startTime;
+            AddRuleStats(output.Count);
             return output;
         }
 
diff --git a/src/SIL.Machine.Morphology.HermitCrab/TrailDirectedRuleCascade.cs b/src/SIL.Machine.Morphology.HermitCrab/TrailDirectedRuleCascade.cs
new file mode 100644
index 000000000..bc023a224
--- /dev/null
+++ b/src/SIL.Machine.Morphology.HermitCrab/TrailDirectedRuleCascade.cs
@@ -0,0 +1,76 @@
+using System.Collections.Generic;
+using System.Linq;
+using SIL.Machine.Morphology.HermitCrab.MorphologicalRules;
+using SIL.Machine.Rules;
+
+namespace SIL.Machine.Morphology.HermitCrab
+{
+    /// <summary>
+    /// Synthesis-side replacement for <see cref="CombinationRuleCascade{TData,TOffset}"/> on
+    /// <see cref="MorphologicalRuleOrder.Unordered"/> strata. Unlike analysis -- where any subset/order of
+    /// morphological rules is a live hypothesis and every rule genuinely must be tried -- synthesis already
+    /// knows, from the trail recorded during analysis (<see cref="Word.TryGetNextMorphologicalRuleToApply"/>),
+    /// exactly which single rule (or, for an unresolved compounding rule, which subset) can possibly apply
+    /// next. The unmodified cascade still probes the entire rule battery at every node and lets
+    /// <see cref="Word.IsMorphologicalRuleApplicable"/> reject the misses -- reject calls that are pure
+    /// overhead: <see cref="MorphologicalRules.SynthesisAffixProcessRule.Apply"/> and
+    /// <see cref="SynthesisCompoundingRule.Apply"/> both return empty on that check with no trace call, so
+    /// skipping the attempt entirely changes neither the result set nor trace output. Realizational affix
+    /// rules are excluded from the trail (<see cref="Word.MorphologicalRuleUnapplied"/>) and self-govern via
+    /// feature-structure checks instead, so they are always attempted, exactly as before.
+    /// </summary>
+    internal class TrailDirectedRuleCascade : InstrumentedRule<Word, int>
+    {
+        // Preserves the stratum's original rule order: when more than one rule can apply at a single node
+        // (a realizational rule alongside the trail-directed rule, or multiple compounding rules), trace
+        // calls must fire in the same relative order the unmodified all-rules cascade produced.
+        private readonly List<(IMorphologicalRule MorphologicalRule, IRule<Word, int> CompiledRule)> _rules;
+        private readonly IEqualityComparer<Word> _comparer;
+
+        public TrailDirectedRuleCascade(
+            IEnumerable<(IMorphologicalRule MorphologicalRule, IRule<Word, int> CompiledRule)> rules,
+            IEqualityComparer<Word> comparer
+        )
+        {
+            Name = "TrailDirectedRuleCascade";
+            _rules = new List<(IMorphologicalRule, IRule<Word, int>)>(rules);
+            _comparer = comparer;
+            AddSubRules(_rules.Select(p => p.CompiledRule));
+        }
+
+        public override IEnumerable<Word> Apply(Word input)
+        {
+            var output = new HashSet<Word>(_comparer);
+            ApplyRules(input, output);
+            AddRuleStats(output.Count);
+            return output;
+        }
+
+        private void ApplyRules(Word input, HashSet<Word> output)
+        {
+            bool hasNext = input.TryGetNextMorphologicalRuleToApply(out IMorphologicalRule next);
+            foreach ((IMorphologicalRule mrule, IRule<Word, int> compiled) in _rules)
+            {
+                bool attempt;
+                if (mrule is RealizationalAffixProcessRule)
+                    attempt = true;
+                else if (!hasNext)
+                    attempt = false;
+                else if (next == null)
+                    attempt = mrule is CompoundingRule;
+                else
+                    attempt = ReferenceEquals(mrule, next);
+
+                if (!attempt)
+                    continue;
+
+                foreach (Word result in compiled.Apply(input))
+                {
+                    if (!_comparer.Equals(input, result))
+                        ApplyRules(result, output);
+                    output.Add(result);
+                }
+            }
+        }
+    }
+}
diff --git a/src/SIL.Machine.Morphology.HermitCrab/Word.cs b/src/SIL.Machine.Morphology.HermitCrab/Word.cs
index 96748875f..11f623d74 100644
--- a/src/SIL.Machine.Morphology.HermitCrab/Word.cs
+++ b/src/SIL.Machine.Morphology.HermitCrab/Word.cs
@@ -70,12 +70,22 @@ public Word(Stratum stratum, Shape shape)
         }
 
         protected Word(Word word)
+            : this(word, shareFrozenShape: false) { }
+
+        // parse-optimization.md Phase 10a: shareFrozenShape lets clone sites that provably never
+        // mutate the clone's shape (see CloneShareFrozenShape) skip the deep Shape copy -- 10-pre
+        // measured that copy (ShapeNodes + their Annotation<ShapeNode> graphs) at ~25% of all
+        // per-word allocated bytes on heavy Sena words. Sharing only ever happens when the source
+        // shape is already frozen, so the shared instance is immutable: any later mutation attempt
+        // throws (Shape's freeze guards) instead of silently corrupting the other holder, and any
+        // legitimate downstream edit goes through another Clone(), which deep-copies as always.
+        private Word(Word word, bool shareFrozenShape)
         {
             _allomorphs = new Dictionary<string, Allomorph>(word._allomorphs);
             Stratum = word.Stratum;
             Source = word;
             // Don't copy Alternatives.
-            _shape = word._shape.Clone();
+            _shape = shareFrozenShape && word._shape.IsFrozen ? word._shape : word._shape.Clone();
             _rootAllomorph = word._rootAllomorph;
             SyntacticFeatureStruct = word.SyntacticFeatureStruct.Clone();
             RealizationalFeatureStruct = word.RealizationalFeatureStruct.Clone();
@@ -98,6 +108,7 @@ protected Word(Word word)
             _isLastAppliedRuleFinal = word._isLastAppliedRuleFinal;
             _isPartial = word._isPartial;
             CurrentTrace = word.CurrentTrace;
+            AnalysisScope = word.AnalysisScope;
             _disjunctiveAllomorphIndices =
                 word._disjunctiveAllomorphIndices == null || word._disjunctiveAllomorphIndices.Count == 0
                     ? null
@@ -226,6 +237,16 @@ public IEnumerable<Morpheme> MorphemesInApplicationOrder
 
         public object CurrentTrace { get; set; }
 
+        /// <summary>
+        /// Carrier for the analysis nogood cache (parse-optimization.md Phase 2). Reference-shared like
+        /// <see cref="CurrentTrace"/>, deliberately excluded from <see cref="FreezeImpl"/> and
+        /// <see cref="ValueEquals"/> so dedup semantics are unchanged. Null for words never routed through
+        /// <see cref="Morpher.ParseWord(string, out object)"/> (e.g. words built directly by rule-level
+        /// unit tests) or while tracing, in which case the cascade that reads this must fall back to
+        /// unmemoized behavior rather than throw.
+        /// </summary>
+        internal AnalysisScope AnalysisScope { get; set; }
+
         public bool IsPartial
         {
             get { return _isPartial; }
@@ -257,6 +278,26 @@ internal bool IsMorphologicalRuleApplicable(IMorphologicalRule rule)
             return curRule == rule || (curRule == null && rule is CompoundingRule);
         }
 
+        /// <summary>
+        /// Exposes the same trail-position state <see cref="IsMorphologicalRuleApplicable"/> checks, so a
+        /// synthesis cascade can look up the one rule (or, when <paramref name="rule"/> comes back null, the
+        /// compounding rules) that could possibly apply next, instead of probing the whole rule battery and
+        /// relying on <see cref="IsMorphologicalRuleApplicable"/> to reject every miss. Returns false when no
+        /// morphological rule can apply at all (<paramref name="rule"/> is meaningless in that case, not "any
+        /// compounding rule" -- that reading only holds when this returns true and <paramref name="rule"/> is
+        /// null).
+        /// </summary>
+        internal bool TryGetNextMorphologicalRuleToApply(out IMorphologicalRule rule)
+        {
+            if (_mruleAppIndex < 0)
+            {
+                rule = null;
+                return false;
+            }
+            rule = _mruleApps[_mruleAppIndex];
+            return true;
+        }
+
         internal bool HasRemainingRulesFromStratum(Stratum stratum)
         {
             if (_mruleAppIndex < 0)
@@ -356,6 +397,14 @@ internal int GetUnapplicationCount(IMorphologicalRule mrule)
             return numUnapplies;
         }
 
+        /// <summary>
+        /// The full per-rule unapplication-count multiset backing <see cref="GetUnapplicationCount"/>, for
+        /// <see cref="AnalysisStateKey"/> (order-independent analysis-cascade memoization -- see
+        /// parse-optimization.md Phase 2). Null means empty, matching this class's existing lazy-allocation
+        /// convention.
+        /// </summary>
+        internal IReadOnlyDictionary<IMorphologicalRule, int> UnappliedRuleCounts => _mrulesUnapplied;
+
         /// <summary>
         /// Notifies this word synthesis that the specified morphological rule has applied.
         /// </summary>
@@ -416,6 +465,15 @@ internal int NonHeadCount
             get { return _nonHeadApps.Count; }
         }
 
+        /// <summary>
+        /// Length of the morphological-rule trail so far -- <c>_mruleApps.Count</c>. Recorded alongside
+        /// <see cref="NonHeadCount"/> at the point a <see cref="AnalysisStateKey"/>'s subtree is memoized
+        /// (parse-optimization.md Phase 3), so a later differently-ordered arrival at the same key knows
+        /// where its own trail ends and the memoized subtree's suffix begins -- see
+        /// <see cref="ReplayOnto"/>.
+        /// </summary>
+        internal int MorphologicalRuleTrailLength => _mruleApps.Count;
+
         internal void NonHeadUnapplied(Word nonHead)
         {
             CheckFrozen();
@@ -474,6 +532,54 @@ internal IList<Word> ExpandAlternatives()
             return alternatives;
         }
 
+        /// <summary>
+        /// Re-parents a Word computed while exploring the subtree below some analysis-cascade node N onto
+        /// <paramref name="queryNode"/> -- a different Word that reached the same <see cref="AnalysisStateKey"/>
+        /// as N via a different morphological-rule unapplication order (parse-optimization.md Phase 3's
+        /// positive memo; see <see cref="MemoizedCombinationRuleCascade"/>). Everything computed strictly
+        /// WITHIN the subtree -- deeper shape/feature edits, and any rules or non-heads unapplied below N --
+        /// is a deterministic function of N's content alone (Shape, both FeatureStructs, the rule-unapplication
+        /// multiset, and non-head count all match between N and <paramref name="queryNode"/> by definition of
+        /// an equal key), so it is kept as-is from `this`. Only the two ORDERED structures the key deliberately
+        /// summarizes as counts/multisets -- the morphological-rule trail and the non-head list -- have their
+        /// PREFIX (whatever was accumulated before reaching N) replaced with <paramref name="queryNode"/>'s own
+        /// actual prefix, since arrival order can only ever affect that part.
+        /// </summary>
+        /// <param name="queryNode">The word that hit the memo -- its trail/non-heads become the new prefix.</param>
+        /// <param name="mruleTrailPrefixLength">
+        /// <c>_mruleApps.Count</c> of N at the moment its subtree was memoized -- everything in `this`'s trail
+        /// from this index on is the subtree-local suffix to keep.
+        /// </param>
+        /// <param name="nonHeadPrefixLength">Same, for <c>_nonHeadApps</c>.</param>
+        internal Word ReplayOnto(Word queryNode, int mruleTrailPrefixLength, int nonHeadPrefixLength)
+        {
+            // Shape-sharing clone (parse-optimization.md Phase 10a): a replay edits only the two trail
+            // lists below and then freezes -- the shape is never touched, so the deep Shape copy a plain
+            // Clone() makes here (hundreds of thousands of replays per heavy word) is pure waste.
+            Word clone = CloneShareFrozenShape();
+
+            List<IMorphologicalRule> mruleSuffix = clone._mruleApps.GetRange(
+                mruleTrailPrefixLength,
+                clone._mruleApps.Count - mruleTrailPrefixLength
+            );
+            clone._mruleApps.Clear();
+            clone._mruleApps.AddRange(queryNode._mruleApps);
+            clone._mruleApps.AddRange(mruleSuffix);
+            clone._mruleAppIndex = clone._mruleApps.Count - 1;
+
+            List<Word> nonHeadSuffix = clone._nonHeadApps.GetRange(
+                nonHeadPrefixLength,
+                clone._nonHeadApps.Count - nonHeadPrefixLength
+            );
+            clone._nonHeadApps.Clear();
+            clone._nonHeadApps.AddRange(queryNode._nonHeadApps.CloneItems());
+            clone._nonHeadApps.AddRange(nonHeadSuffix);
+            clone._nonHeadAppIndex = clone._nonHeadApps.Count - 1;
+
+            clone.Freeze();
+            return clone;
+        }
+
         public Allomorph GetAllomorph(Annotation<ShapeNode> morph)
         {
             var alloID = (string)morph.FeatureStruct.GetValue(HCFeatureSystem.Allomorph);
@@ -584,6 +690,21 @@ public Word Clone()
             return new Word(this);
         }
 
+        /// <summary>
+        /// <see cref="Clone"/>, except the clone shares this word's <see cref="Shape"/> instance instead
+        /// of deep-copying it -- only when that shape is already frozen (otherwise this falls back to a
+        /// normal deep copy, so it is always safe to call). For callers that clone, edit non-shape state,
+        /// and freeze -- never touching the shape -- the deep copy is pure waste: parse-optimization.md
+        /// Phase 10-pre measured the Shape/annotation graph at ~25% of all bytes allocated on heavy
+        /// words, dominated by exactly such clones. The contract is on the caller: the clone's shape must
+        /// never be mutated before the clone is discarded or frozen. Violations fail loudly (the shared
+        /// shape is frozen, so mutation throws) rather than corrupting the source.
+        /// </summary>
+        internal Word CloneShareFrozenShape()
+        {
+            return new Word(this, shareFrozenShape: true);
+        }
+
         public override string ToString()
         {
             return Shape.ToRegexString(Stratum.CharacterDefinitionTable, true);
diff --git a/src/SIL.Machine/FeatureModel/FeatureStruct.cs b/src/SIL.Machine/FeatureModel/FeatureStruct.cs
index cc9c083a7..d3f8c4d7c 100644
--- a/src/SIL.Machine/FeatureModel/FeatureStruct.cs
+++ b/src/SIL.Machine/FeatureModel/FeatureStruct.cs
@@ -1368,11 +1368,32 @@ private void EnsureWritable()
             _sharedSource = null;
         }
 
+        // Test hook: incremented every time Freeze() takes the shared-hash shortcut below. A hash- or
+        // value-equality assertion alone can't tell the shortcut apart from the (equally correct) full
+        // walk, since both compute the same result -- this counter is what makes the regression test
+        // non-vacuous (parse-optimization.md Phase 3/3b hit this exact trap with ReplayOnto's memo
+        // tests, both of which passed even when the replay/graft logic was mutated to a no-op).
+        internal static long DiagSharedFreezeHits;
+
         public void Freeze()
         {
             if (IsFrozen)
                 return;
 
+            // A copy-on-write clone that was never mutated still borrows _sharedSource's exact
+            // _definite reference (see EnsureWritable): its hash cannot differ, since a frozen
+            // source's _definite subtree is immutable and any mutation of this clone would have
+            // already inflated a private copy and cleared _shared. Skip the full FreezeImpl walk
+            // and adopt the cached hash directly -- the same shortcut Shape.Freeze() already takes
+            // for its own copy-on-write clones (parse-optimization.md Phase 7b).
+            if (_shared && _sharedSource != null)
+            {
+                IsFrozen = true;
+                _hashCode = _sharedSource.GetFrozenHashCode();
+                DiagSharedFreezeHits++;
+                return;
+            }
+
             _hashCode = FreezeImpl(new HashSet<FeatureValue>());
         }
 
diff --git a/src/SIL.Machine/Rules/CombinationRuleCascade.cs b/src/SIL.Machine/Rules/CombinationRuleCascade.cs
index 25fdfb8c6..f0818179f 100644
--- a/src/SIL.Machine/Rules/CombinationRuleCascade.cs
+++ b/src/SIL.Machine/Rules/CombinationRuleCascade.cs
@@ -26,6 +26,7 @@ public override IEnumerable<TData> Apply(TData input)
         {
             var output = new HashSet<TData>(Comparer);
             ApplyRules(input, !MultipleApplication ? new HashSet<int>() : null, output);
+            AddRuleStats(output.Count);
             return output;
         }
 
diff --git a/src/SIL.Machine/Rules/InstrumentedRule.cs b/src/SIL.Machine/Rules/InstrumentedRule.cs
new file mode 100644
index 000000000..277deca24
--- /dev/null
+++ b/src/SIL.Machine/Rules/InstrumentedRule.cs
@@ -0,0 +1,118 @@
+using System.Collections.Generic;
+using SIL.Machine.Annotations;
+
+namespace SIL.Machine.Rules
+{
+    /// <summary>
+    /// One observed "context" a rule succeeded under -- e.g. the part of speech of the word it applied to,
+    /// which allomorph/subrule fired, or whether the input was still a bare root. Grammar-constraint mining
+    /// (parse-optimization.md-adjacent: use runtime evidence to suggest tightenable rule declarations) needs
+    /// both the count (300 vs 4 is the signal) and a handful of real words (so a linguist can eyeball the 4
+    /// counterexamples and judge whether they're legitimate or a grammar bug).
+    /// </summary>
+    public class RuleBucket
+    {
+        public const int MaxExamples = 10;
+
+        public long Count;
+        public readonly List<string> Examples = new List<string>();
+
+        public void Record(string example)
+        {
+            Count++;
+            if (Examples.Count < MaxExamples)
+                Examples.Add(example);
+        }
+    }
+
+    /// <summary>
+    /// This class instruments IRules.
+    /// Statistics are stored in InputCount, OutputCount, and ElapsedTime.
+    /// The rules update the statistics when Apply is called.
+    /// Name and SubRules are filled in when the rule is created.
+    /// Rules that can distinguish *why* a given application succeeded (which allomorph, which category,
+    /// whether the target was a bare stem, ...) additionally record named buckets via RecordBucket --
+    /// see Morpher.AccumulateRuleStats for how these survive across a whole corpus run instead of being
+    /// cleared per word.
+    /// </summary>
+    /// <typeparam name="TData"></typeparam>
+    /// <typeparam name="TOffset"></typeparam>
+    public abstract class InstrumentedRule<TData, TOffset> : IRule<TData, TOffset>
+        where TData : IAnnotatedData<TOffset>
+    {
+        public string Name { get; set; }
+        public int InputCount;
+        public int OutputCount;
+        public int SuccessCount;
+        public long ElapsedTime;
+        public IList<InstrumentedRule<TData, TOffset>> SubRules = new List<InstrumentedRule<TData, TOffset>>();
+
+        // Keyed by an arbitrary "bucket group" name (e.g. "category", "allomorph") so one rule can report
+        // several independent breakdowns without them being conflated into a single key space.
+        public IDictionary<string, Dictionary<string, RuleBucket>> BucketGroups =
+            new Dictionary<string, Dictionary<string, RuleBucket>>();
+
+        // Generic-arity backtick suffix (e.g. "CombinationRuleCascade`2") stripped so reports read as
+        // "CombinationRuleCascade" -- callers that want something more specific (a stratum/template/morpheme
+        // name) still overwrite Name after construction.
+        protected InstrumentedRule()
+        {
+            string typeName = GetType().Name;
+            int tickIndex = typeName.IndexOf('`');
+            Name = tickIndex < 0 ? typeName : typeName.Substring(0, tickIndex);
+        }
+
+        protected void AddSubRules(IEnumerable<IRule<TData, TOffset>> rules)
+        {
+            foreach (IRule<TData, TOffset> rule in rules)
+            {
+                AddSubRule(rule);
+            }
+        }
+
+        protected void AddSubRule(IRule<TData, TOffset> rule)
+        {
+            SubRules.Add(rule as InstrumentedRule<TData, TOffset>);
+        }
+
+        protected void AddRuleStats(int outputCount)
+        {
+            InputCount++;
+            OutputCount += outputCount;
+            if (outputCount > 0)
+                SuccessCount++;
+        }
+
+        // group examples: "category" ("Verb", "Noun", ...), "allomorph" ("0", "1", ...), "stemName",
+        // "rootDirect" ("true"/"false"). Callers pick whichever groups are meaningful for that rule type.
+        protected void RecordBucket(string group, string key, string example)
+        {
+            if (!BucketGroups.TryGetValue(group, out Dictionary<string, RuleBucket> buckets))
+            {
+                buckets = new Dictionary<string, RuleBucket>();
+                BucketGroups[group] = buckets;
+            }
+            if (!buckets.TryGetValue(key, out RuleBucket bucket))
+            {
+                bucket = new RuleBucket();
+                buckets[key] = bucket;
+            }
+            bucket.Record(example);
+        }
+
+        public void ClearStats()
+        {
+            InputCount = 0;
+            OutputCount = 0;
+            SuccessCount = 0;
+            ElapsedTime = 0;
+            BucketGroups.Clear();
+            foreach (var rule in SubRules)
+            {
+                rule?.ClearStats();
+            }
+        }
+
+        public abstract IEnumerable<TData> Apply(TData input);
+    }
+}
diff --git a/src/SIL.Machine/Rules/LinearRuleCascade.cs b/src/SIL.Machine/Rules/LinearRuleCascade.cs
index b4e985a33..1e413999a 100644
--- a/src/SIL.Machine/Rules/LinearRuleCascade.cs
+++ b/src/SIL.Machine/Rules/LinearRuleCascade.cs
@@ -26,6 +26,7 @@ public override IEnumerable<TData> Apply(TData input)
         {
             var output = new HashSet<TData>(Comparer);
             ApplyRules(input, 0, output);
+            AddRuleStats(output.Count);
             return output;
         }
 
diff --git a/src/SIL.Machine/Rules/ParallelCombinationRuleCascade.cs b/src/SIL.Machine/Rules/ParallelCombinationRuleCascade.cs
index b698989f1..c7ff9ae12 100644
--- a/src/SIL.Machine/Rules/ParallelCombinationRuleCascade.cs
+++ b/src/SIL.Machine/Rules/ParallelCombinationRuleCascade.cs
@@ -81,7 +81,9 @@ public override IEnumerable<TData> Apply(TData input)
                 to = temp;
             }
 
-            return output.Distinct(Comparer);
+            TData[] distinctOutput = output.Distinct(Comparer).ToArray();
+            AddRuleStats(distinctOutput.Length);
+            return distinctOutput;
         }
     }
 }
diff --git a/src/SIL.Machine/Rules/ParallelRuleBatch.cs b/src/SIL.Machine/Rules/ParallelRuleBatch.cs
index afe053c79..9663a0bcd 100644
--- a/src/SIL.Machine/Rules/ParallelRuleBatch.cs
+++ b/src/SIL.Machine/Rules/ParallelRuleBatch.cs
@@ -28,7 +28,9 @@ public override IEnumerable<TData> Apply(TData input)
                 }
             );
 
-            return output.Distinct(Comparer);
+            TData[] distinctOutput = output.Distinct(Comparer).ToArray();
+            AddRuleStats(distinctOutput.Length);
+            return distinctOutput;
         }
     }
 }
diff --git a/src/SIL.Machine/Rules/PermutationRuleCascade.cs b/src/SIL.Machine/Rules/PermutationRuleCascade.cs
index b16671f44..3af96a395 100644
--- a/src/SIL.Machine/Rules/PermutationRuleCascade.cs
+++ b/src/SIL.Machine/Rules/PermutationRuleCascade.cs
@@ -26,6 +26,7 @@ public override IEnumerable<TData> Apply(TData input)
         {
             var output = new HashSet<TData>(Comparer);
             ApplyRules(input, 0, output);
+            AddRuleStats(output.Count);
             return output;
         }
 
diff --git a/src/SIL.Machine/Rules/PipelineRuleCascade.cs b/src/SIL.Machine/Rules/PipelineRuleCascade.cs
index 524300883..1db20338f 100644
--- a/src/SIL.Machine/Rules/PipelineRuleCascade.cs
+++ b/src/SIL.Machine/Rules/PipelineRuleCascade.cs
@@ -29,6 +29,7 @@ public override IEnumerable<TData> Apply(TData input)
                 inputSet = outputSet;
             }
 
+            AddRuleStats(outputSet.Count);
             return outputSet;
         }
     }
diff --git a/src/SIL.Machine/Rules/RuleBatch.cs b/src/SIL.Machine/Rules/RuleBatch.cs
index 61249068b..cea1af827 100644
--- a/src/SIL.Machine/Rules/RuleBatch.cs
+++ b/src/SIL.Machine/Rules/RuleBatch.cs
@@ -4,7 +4,7 @@
 
 namespace SIL.Machine.Rules
 {
-    public class RuleBatch<TData, TOffset> : IRule<TData, TOffset>
+    public class RuleBatch<TData, TOffset> : InstrumentedRule<TData, TOffset>
         where TData : IAnnotatedData<TOffset>
     {
         private readonly List<IRule<TData, TOffset>> _rules;
@@ -25,6 +25,7 @@ public RuleBatch(IEnumerable<IRule<TData, TOffset>> rules, bool disjunctive, IEq
             _rules = new List<IRule<TData, TOffset>>(rules);
             _disjunctive = disjunctive;
             _comparer = comparer;
+            AddSubRules(_rules);
         }
 
         public IReadOnlyList<IRule<TData, TOffset>> Rules
@@ -42,16 +43,20 @@ public bool IsDisjunctive
             get { return _disjunctive; }
         }
 
-        public virtual IEnumerable<TData> Apply(TData input)
+        public override IEnumerable<TData> Apply(TData input)
         {
             var output = new HashSet<TData>(_comparer);
             foreach (IRule<TData, TOffset> rule in _rules)
             {
                 output.UnionWith(rule.Apply(input));
                 if (_disjunctive && output.Count > 0)
+                {
+                    AddRuleStats(output.Count);
                     return output;
+                }
             }
 
+            AddRuleStats(output.Count);
             return output;
         }
     }
diff --git a/src/SIL.Machine/Rules/RuleCascade.cs b/src/SIL.Machine/Rules/RuleCascade.cs
index a139e8ced..c1876d9c4 100644
--- a/src/SIL.Machine/Rules/RuleCascade.cs
+++ b/src/SIL.Machine/Rules/RuleCascade.cs
@@ -5,7 +5,7 @@
 
 namespace SIL.Machine.Rules
 {
-    public abstract class RuleCascade<TData, TOffset> : IRule<TData, TOffset>
+    public abstract class RuleCascade<TData, TOffset> : InstrumentedRule<TData, TOffset>
         where TData : IAnnotatedData<TOffset>
     {
         private readonly ReadOnlyList<IRule<TData, TOffset>> _rules;
@@ -30,6 +30,7 @@ IEqualityComparer<TData> comparer
             _rules = new ReadOnlyList<IRule<TData, TOffset>>(rules.ToList());
             _multiApp = multiApp;
             _comparer = comparer;
+            AddSubRules(_rules);
         }
 
         public IEqualityComparer<TData> Comparer
@@ -47,7 +48,7 @@ public IReadOnlyList<IRule<TData, TOffset>> Rules
             get { return _rules; }
         }
 
-        public abstract IEnumerable<TData> Apply(TData input);
+        public abstract override IEnumerable<TData> Apply(TData input);
 
         protected virtual IEnumerable<TData> ApplyRule(IRule<TData, TOffset> rule, int index, TData input)
         {
diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs
index 8245d17a1..1f5b6c243 100644
--- a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs
+++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs
@@ -553,4 +553,303 @@ private static string AnalysisSignature(Morpher morpher, string word)
                 .OrderBy(s => s, System.StringComparer.Ordinal)
         );
     }
+
+    [Test]
+    public void ParseWord_SingleThreaded_MatchesParallel_WithCompounding()
+    {
+        // Exercises the Phase-3 positive-memo replay path (parse-optimization.md) specifically for
+        // compounding, not just plain affixes: an affix rule that commutes with a compounding rule --
+        // both peers in the same Unordered MorphologicalRules cascade -- means the analysis cascade can
+        // revisit an equal AnalysisStateKey (same shape/features/rule-counts/non-head count) reached via
+        // different arrival orders, where the accumulated _nonHeadApps prefix need not be identical
+        // across arrivals even though the key treats non-heads as a bare count. Word.ReplayOnto grafts
+        // each arrival's OWN accumulated trail/non-head prefix onto a memoized subtree's suffix rather
+        // than reusing the memoized arrival's prefix verbatim -- if that graft were wrong, the memoized
+        // single-threaded cascade would diverge from the (unmemoized) parallel cascade here.
+        var any = FeatureStruct.New().Symbol(HCFeatureSystem.Segment).Value;
+        var rule1 = new CompoundingRule { Name = "rule1" };
+        Allophonic.MorphologicalRules.Add(rule1);
+        rule1.Subrules.Add(
+            new CompoundingSubrule
+            {
+                HeadLhs = { Pattern<Word, int>.New("head").Annotation(any).OneOrMore.Value },
+                NonHeadLhs = { Pattern<Word, int>.New("nonHead").Annotation(any).OneOrMore.Value },
+                Rhs = { new CopyFromInput("head"), new InsertSegments(Table3, "+"), new CopyFromInput("nonHead") },
+            }
+        );
+
+        var prefix = new AffixProcessRule
+        {
+            Name = "prefix",
+            Gloss = "PAST",
+            RequiredSyntacticFeatureStruct = FeatureStruct.New(Language.SyntacticFeatureSystem).Symbol("V").Value,
+            OutSyntacticFeatureStruct = FeatureStruct
+                .New(Language.SyntacticFeatureSystem)
+                .Feature(Head)
+                .EqualTo(head => head.Feature("tense").EqualTo("past"))
+                .Value,
+        };
+        Allophonic.MorphologicalRules.Insert(0, prefix);
+        prefix.Allomorphs.Add(
+            new AffixProcessAllomorph
+            {
+                Lhs = { Pattern<Word, int>.New("1").Annotation(any).OneOrMore.Value },
+                Rhs = { new InsertSegments(Table3, "di+"), new CopyFromInput("1") },
+            }
+        );
+
+        var parallel = new Morpher(TraceManager, Language);
+        var singleThreaded = new Morpher(TraceManager, Language, maxDegreeOfParallelism: 1);
+
+        foreach (string word in new[] { "pʰutdidat", "pʰutdat" })
+        {
+            List<Word> singleResult = singleThreaded.ParseWord(word).ToList();
+            List<Word> parallelResult = parallel.ParseWord(word).ToList();
+            Assert.That(
+                singleResult.Select(WordResultSignature).OrderBy(s => s, System.StringComparer.Ordinal),
+                Is.EqualTo(parallelResult.Select(WordResultSignature).OrderBy(s => s, System.StringComparer.Ordinal)),
+                $"single-threaded parse of '{word}' must match the parallel parse"
+            );
+        }
+    }
+
+    private static string WordResultSignature(Word word)
+    {
+        // AllomorphsInMorphOrder alone would not catch a broken trail/non-head graft (it walks Shape
+        // annotations, which ReplayOnto never touches) -- MorphemesInApplicationOrder walks _mruleApps/
+        // _nonHeadApps directly, which is exactly what Word.ReplayOnto rewrites.
+        return string.Join("+", word.AllomorphsInMorphOrder.Select(a => a.Morpheme.Id))
+            + "|"
+            + string.Join("+", word.MorphemesInApplicationOrder.Select(m => m.Id));
+    }
+
+    [Test]
+    public void ParseWord_SingleThreaded_MatchesParallel_WithAffixTemplate()
+    {
+        // Exercises the template-battery memo (AnalysisStratumRule.ApplyTemplateBattery) specifically:
+        // TWO free prefix rules that commute with each other and with a template slot suffix. Unapplying
+        // di-then-ku vs ku-then-di reaches the same AnalysisStateKey (same shape, same rule MULTISET)
+        // with a different trail ORDER, so the second arrival replays the first arrival's stored
+        // template outputs with its own trail prefix grafted on (Word.ReplayOnto). One commuting prefix
+        // is NOT enough -- a single rule can only unapply once, so no key would ever be re-arrived at
+        // and the memo would never fire (verified: with one prefix the replay counter stays 0).
+        var any = FeatureStruct.New().Symbol(HCFeatureSystem.Segment).Value;
+
+        var edSuffix = new AffixProcessRule
+        {
+            Id = "TPAST",
+            Name = "template_ed_suffix",
+            Gloss = "PAST",
+            RequiredSyntacticFeatureStruct = FeatureStruct.New(Language.SyntacticFeatureSystem).Symbol("V").Value,
+        };
+        edSuffix.Allomorphs.Add(
+            new AffixProcessAllomorph
+            {
+                Lhs = { Pattern<Word, int>.New("1").Annotation(any).OneOrMore.Value },
+                Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+d") },
+            }
+        );
+        var verbTemplate = new AffixTemplate
+        {
+            Name = "verb_template",
+            RequiredSyntacticFeatureStruct = FeatureStruct.New(Language.SyntacticFeatureSystem).Symbol("V").Value,
+        };
+        verbTemplate.Slots.Add(new AffixTemplateSlot(edSuffix) { Optional = true });
+        Morphophonemic.AffixTemplates.Add(verbTemplate);
+
+        var diPrefix = new AffixProcessRule
+        {
+            Id = "TDI",
+            Name = "template_di_prefix",
+            Gloss = "DI",
+            RequiredSyntacticFeatureStruct = FeatureStruct.New(Language.SyntacticFeatureSystem).Symbol("V").Value,
+        };
+        diPrefix.Allomorphs.Add(
+            new AffixProcessAllomorph
+            {
+                Lhs = { Pattern<Word, int>.New("1").Annotation(any).OneOrMore.Value },
+                Rhs = { new InsertSegments(Table3, "di+"), new CopyFromInput("1") },
+            }
+        );
+        Morphophonemic.MorphologicalRules.Add(diPrefix);
+
+        var kuPrefix = new AffixProcessRule
+        {
+            Id = "TKU",
+            Name = "template_ku_prefix",
+            Gloss = "KU",
+            RequiredSyntacticFeatureStruct = FeatureStruct.New(Language.SyntacticFeatureSystem).Symbol("V").Value,
+        };
+        kuPrefix.Allomorphs.Add(
+            new AffixProcessAllomorph
+            {
+                Lhs = { Pattern<Word, int>.New("1").Annotation(any).OneOrMore.Value },
+                Rhs = { new InsertSegments(Table3, "gu+"), new CopyFromInput("1") },
+            }
+        );
+        Morphophonemic.MorphologicalRules.Add(kuPrefix);
+
+        try
+        {
+            var parallel = new Morpher(TraceManager, Language);
+            var singleThreaded = new Morpher(TraceManager, Language, maxDegreeOfParallelism: 1);
+
+            AnalysisStratumRule.DiagTemplateMemoHits = 0;
+            foreach (string word in new[] { "digusagd", "disagd", "gusagd", "sagd", "sag" })
+            {
+                List<Word> singleResult = singleThreaded.ParseWord(word).ToList();
+                List<Word> parallelResult = parallel.ParseWord(word).ToList();
+                Assert.That(
+                    singleResult.Select(WordResultSignature).OrderBy(s => s, System.StringComparer.Ordinal),
+                    Is.EqualTo(
+                        parallelResult.Select(WordResultSignature).OrderBy(s => s, System.StringComparer.Ordinal)
+                    ),
+                    $"single-threaded parse of '{word}' must match the parallel parse"
+                );
+                Assert.That(
+                    singleResult,
+                    Is.Not.Empty.Or.Property("Count").EqualTo(parallelResult.Count),
+                    $"'{word}' sanity: both engines agree on parse count"
+                );
+            }
+            // Guards against this test going vacuous: the replay path must actually fire for this
+            // grammar. (Mutation-tested like Phase 3's mrule-memo test, with the same result: breaking
+            // the ReplayOnto graft -- returning stored words verbatim -- does NOT fail the equivalence
+            // assertions above, because merge-by-shape plus ExpandAlternatives make trail-order
+            // differences unobservable in final signatures for grammars like this one. The graft's
+            // necessity rests on the construction argument documented in MemoizedCombinationRuleCascade;
+            // this assertion at least pins that the memoized path is exercised at all.)
+            Assert.That(
+                AnalysisStratumRule.DiagTemplateMemoHits,
+                Is.GreaterThan(0),
+                "the template memo's replay path must actually fire for this grammar -- if this "
+                    + "trips, the test grammar no longer forces a re-arrival at an equal state key "
+                    + "and the equivalence assertions above are vacuously passing"
+            );
+        }
+        finally
+        {
+            Morphophonemic.AffixTemplates.Remove(verbTemplate);
+            Morphophonemic.MorphologicalRules.Remove(diPrefix);
+            Morphophonemic.MorphologicalRules.Remove(kuPrefix);
+        }
+    }
+
+    [Test]
+    public void EnableLexicalGating_MatchesDisabled_SimpleAffixGrammar()
+    {
+        // parse-optimization.md Phase 5: on a grammar with no reduplication/compounding/metathesis, the
+        // lexical gate should qualify and activate, but must never change which analyses come out --
+        // it only prunes subtrees that could never reach any root, and Entries["32"] ("sag") is directly
+        // reachable the whole way through this simple suffix's unapplication.
+        var any = FeatureStruct.New().Symbol(HCFeatureSystem.Segment).Value;
+        var suffix = new AffixProcessRule
+        {
+            Id = "LEX_GATE_TEST_SUFFIX",
+            Name = "lex_gate_test_suffix",
+            Gloss = "PAST",
+            RequiredSyntacticFeatureStruct = FeatureStruct.New(Language.SyntacticFeatureSystem).Symbol("V").Value,
+        };
+        suffix.Allomorphs.Add(
+            new AffixProcessAllomorph
+            {
+                Lhs = { Pattern<Word, int>.New("1").Annotation(any).OneOrMore.Value },
+                Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+d") },
+            }
+        );
+        Morphophonemic.MorphologicalRules.Add(suffix);
+        try
+        {
+            Assert.That(
+                GrammarAnalyzer.IsEdgeStripperQualified(Language),
+                Is.True,
+                "precondition: this grammar has no reduplication/compounding/metathesis"
+            );
+
+            var gateOff = new Morpher(TraceManager, Language, maxDegreeOfParallelism: 1);
+            var gateOn = new Morpher(TraceManager, Language, maxDegreeOfParallelism: 1) { EnableLexicalGating = true };
+
+            foreach (string word in new[] { "sagd", "sag" })
+            {
+                List<Word> offResult = gateOff.ParseWord(word).ToList();
+                List<Word> onResult = gateOn.ParseWord(word).ToList();
+                Assert.That(
+                    onResult.Select(WordResultSignature).OrderBy(s => s, System.StringComparer.Ordinal),
+                    Is.EqualTo(
+                        offResult.Select(WordResultSignature).OrderBy(s => s, System.StringComparer.Ordinal)
+                    ),
+                    $"lexical-gate-on parse of '{word}' must match gate-off parse"
+                );
+            }
+        }
+        finally
+        {
+            Morphophonemic.MorphologicalRules.Remove(suffix);
+        }
+    }
+
+    [Test]
+    public void IsEdgeStripperQualified_ReturnsFalse_ForReduplication()
+    {
+        // The same Lhs part copied twice in Rhs -- GrammarAnalyzer's own definition of reduplication
+        // (mirrors AnalysisMorphologicalTransform's capturedParts[name] > 1 case).
+        var any = FeatureStruct.New().Symbol(HCFeatureSystem.Segment).Value;
+        var redup = new AffixProcessRule
+        {
+            Id = "LEX_GATE_TEST_REDUP",
+            Name = "lex_gate_test_redup",
+            RequiredSyntacticFeatureStruct = FeatureStruct.New(Language.SyntacticFeatureSystem).Symbol("N").Value,
+        };
+        redup.Allomorphs.Add(
+            new AffixProcessAllomorph
+            {
+                Lhs = { Pattern<Word, int>.New("1").Annotation(any).OneOrMore.Value },
+                Rhs = { new CopyFromInput("1"), new CopyFromInput("1") },
+            }
+        );
+        Allophonic.MorphologicalRules.Add(redup);
+        try
+        {
+            Assert.That(GrammarAnalyzer.IsEdgeStripperQualified(Language), Is.False);
+        }
+        finally
+        {
+            Allophonic.MorphologicalRules.Remove(redup);
+        }
+    }
+
+    [Test]
+    public void IsEdgeStripperQualified_ReturnsFalse_ForInfixation()
+    {
+        // Material inserted BETWEEN two copied (and here, distinct) parts splits the input's own
+        // contiguous material apart -- a real root's contiguous window in the lexicon would no longer
+        // appear as a contiguous window in this rule's output.
+        var any = FeatureStruct.New().Symbol(HCFeatureSystem.Segment).Value;
+        var infix = new AffixProcessRule
+        {
+            Id = "LEX_GATE_TEST_INFIX",
+            Name = "lex_gate_test_infix",
+            RequiredSyntacticFeatureStruct = FeatureStruct.New(Language.SyntacticFeatureSystem).Symbol("N").Value,
+        };
+        infix.Allomorphs.Add(
+            new AffixProcessAllomorph
+            {
+                Lhs =
+                {
+                    Pattern<Word, int>.New("1").Annotation(any).OneOrMore.Value,
+                    Pattern<Word, int>.New("2").Annotation(any).OneOrMore.Value,
+                },
+                Rhs = { new CopyFromInput("1"), new InsertSegments(Table1, "um"), new CopyFromInput("2") },
+            }
+        );
+        Allophonic.MorphologicalRules.Add(infix);
+        try
+        {
+            Assert.That(GrammarAnalyzer.IsEdgeStripperQualified(Language), Is.False);
+        }
+        finally
+        {
+            Allophonic.MorphologicalRules.Remove(infix);
+        }
+    }
 }
diff --git a/tests/SIL.Machine.Tests/FeatureModel/FeatureStructTests.cs b/tests/SIL.Machine.Tests/FeatureModel/FeatureStructTests.cs
index 2f5a6430d..6ab6eb250 100644
--- a/tests/SIL.Machine.Tests/FeatureModel/FeatureStructTests.cs
+++ b/tests/SIL.Machine.Tests/FeatureModel/FeatureStructTests.cs
@@ -1223,6 +1223,27 @@ public void Clone_OfFrozen_NeverMutated_EqualsSourceBothDirections()
         Assert.That(FreezableEqualityComparer<FeatureStruct>.Default.Equals(source, clone), Is.True);
     }
 
+    [Test]
+    public void Clone_OfFrozen_NeverMutated_Freeze_MatchesSourceFrozenHashCode()
+    {
+        // parse-optimization.md Phase 7b: Freeze() on a copy-on-write clone that borrows a frozen
+        // source's exact backing (never mutated, so _shared stays true) must adopt the source's
+        // already-computed hash rather than recomputing it -- the source's _definite subtree is
+        // immutable, so the two are guaranteed to hash identically. A hash/value-equality assertion
+        // alone can't distinguish the shortcut from the (equally correct) full walk -- both compute
+        // the same answer -- so this also asserts the counter that proves the shortcut actually fired.
+        FeatureSystem featSys = CowFeatSys();
+        FeatureStruct source = BuildNestedFrozen(featSys);
+
+        FeatureStruct clone = source.Clone();
+        long hitsBefore = FeatureStruct.DiagSharedFreezeHits;
+        clone.Freeze();
+
+        Assert.That(FeatureStruct.DiagSharedFreezeHits, Is.EqualTo(hitsBefore + 1));
+        Assert.That(clone.GetFrozenHashCode(), Is.EqualTo(source.GetFrozenHashCode()));
+        Assert.That(source.ValueEquals(clone), Is.True);
+    }
+
     [Test]
     public void Clone_FrozenReentrant_MutateClone_PreservesSharingAndLeavesSourceUnchanged()
     {