diff --git a/FieldWorks.sln b/FieldWorks.sln
index aae0cc092f..1b2e68a769 100644
--- a/FieldWorks.sln
+++ b/FieldWorks.sln
@@ -144,6 +144,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ParserCore", "Src\LexText\P
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ParserCoreTests", "Src\LexText\ParserCore\ParserCoreTests\ParserCoreTests.csproj", "{E5F82767-7DC7-599F-BC29-AAFE4AC98060}"
EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "HCWorker", "Src\LexText\HCWorker\HCWorker.csproj", "{27CA033B-B6E6-41E0-A5C9-33DA5BB7F61D}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "HCWorkerTests", "Src\LexText\HCWorker\HCWorkerTests\HCWorkerTests.csproj", "{9CF72C1E-F5E8-463C-B53D-0F39979742F9}"
+EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ParserUI", "Src\LexText\ParserUI\ParserUI.csproj", "{09D7C8FE-DD9B-5C1C-9A4D-9D61B26E878E}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ParserUITests", "Src\LexText\ParserUI\ParserUITests\ParserUITests.csproj", "{2310A14E-5FFA-5939-885C-DA681EAFC168}"
@@ -709,6 +713,18 @@ Global
{1DD0C70B-EA9D-593E-BF23-72FEAB6849DF}.Debug|x64.Build.0 = Debug|x64
{1DD0C70B-EA9D-593E-BF23-72FEAB6849DF}.Release|x64.ActiveCfg = Release|x64
{1DD0C70B-EA9D-593E-BF23-72FEAB6849DF}.Release|x64.Build.0 = Release|x64
+ {27CA033B-B6E6-41E0-A5C9-33DA5BB7F61D}.Bounds|x64.ActiveCfg = Release|x64
+ {27CA033B-B6E6-41E0-A5C9-33DA5BB7F61D}.Bounds|x64.Build.0 = Release|x64
+ {27CA033B-B6E6-41E0-A5C9-33DA5BB7F61D}.Debug|x64.ActiveCfg = Debug|x64
+ {27CA033B-B6E6-41E0-A5C9-33DA5BB7F61D}.Debug|x64.Build.0 = Debug|x64
+ {27CA033B-B6E6-41E0-A5C9-33DA5BB7F61D}.Release|x64.ActiveCfg = Release|x64
+ {27CA033B-B6E6-41E0-A5C9-33DA5BB7F61D}.Release|x64.Build.0 = Release|x64
+ {9CF72C1E-F5E8-463C-B53D-0F39979742F9}.Bounds|x64.ActiveCfg = Release|x64
+ {9CF72C1E-F5E8-463C-B53D-0F39979742F9}.Bounds|x64.Build.0 = Release|x64
+ {9CF72C1E-F5E8-463C-B53D-0F39979742F9}.Debug|x64.ActiveCfg = Debug|x64
+ {9CF72C1E-F5E8-463C-B53D-0F39979742F9}.Debug|x64.Build.0 = Debug|x64
+ {9CF72C1E-F5E8-463C-B53D-0F39979742F9}.Release|x64.ActiveCfg = Release|x64
+ {9CF72C1E-F5E8-463C-B53D-0F39979742F9}.Release|x64.Build.0 = Release|x64
{E5F82767-7DC7-599F-BC29-AAFE4AC98060}.Bounds|x64.ActiveCfg = Release|x64
{E5F82767-7DC7-599F-BC29-AAFE4AC98060}.Bounds|x64.Build.0 = Release|x64
{E5F82767-7DC7-599F-BC29-AAFE4AC98060}.Debug|x64.ActiveCfg = Debug|x64
diff --git a/Src/LexText/HCWorker/App.config b/Src/LexText/HCWorker/App.config
new file mode 100644
index 0000000000..c25b035574
--- /dev/null
+++ b/Src/LexText/HCWorker/App.config
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Src/LexText/HCWorker/HCWorker.csproj b/Src/LexText/HCWorker/HCWorker.csproj
new file mode 100644
index 0000000000..dd26f5827c
--- /dev/null
+++ b/Src/LexText/HCWorker/HCWorker.csproj
@@ -0,0 +1,54 @@
+
+
+
+ HCWorker
+ SIL.FieldWorks.WordWorks.Parser.HCWorker
+ net48
+ Exe
+ win-x64168,169,219,414,649,1635,1702,1701
+ false
+ false
+
+ App.config
+
+
+ true
+ portable
+ false
+ DEBUG;TRACE
+
+
+ portable
+ true
+ TRACE
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Properties\CommonAssemblyInfo.cs
+
+
+
diff --git a/Src/LexText/HCWorker/HCWorkerService.cs b/Src/LexText/HCWorker/HCWorkerService.cs
new file mode 100644
index 0000000000..a404149961
--- /dev/null
+++ b/Src/LexText/HCWorker/HCWorkerService.cs
@@ -0,0 +1,165 @@
+// Copyright (c) 2026 SIL International
+// This software is licensed under the LGPL, version 2.1 or later
+// (http://www.gnu.org/licenses/lgpl-2.1.html)
+
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.ServiceModel;
+using System.Threading.Tasks;
+using SIL.FieldWorks.WordWorks.Parser;
+using SIL.Machine.Annotations;
+using SIL.Machine.Morphology.HermitCrab;
+using SIL.Machine.Morphology.HermitCrab.MorphologicalRules;
+
+namespace SIL.FieldWorks.WordWorks.Parser.HCWorker
+{
+ ///
+ /// Hosts one Morpher for the lifetime of the worker process. One instance is shared across all
+ /// WCF calls (InstanceContextMode.Single) and calls run concurrently (ConcurrencyMode.Multiple)
+ /// - Morpher.ParseWord was already called this way in-process (ParserWorker's parallel batch,
+ /// each iteration calling HCParser.ParseWord -> Morpher.ParseWord with no external locking), so
+ /// moving it out-of-process introduces no new thread-safety requirement.
+ ///
+ /// The DTO extraction below (ToWordAnalysisDto) is the id-collection half of HCParser.GetMorphs,
+ /// running here where the Word/Allomorph/Morpheme graph lives; the LCM-object-resolution half
+ /// stays in HCParser.GetMorphs, which consumes the returned MorphDto[]. The Form/Msa/InflType
+ /// keys come from HCParser's own constants, so worker and client can never disagree on them.
+ ///
+ [ServiceBehavior(InstanceContextMode = InstanceContextMode.Single, ConcurrencyMode = ConcurrencyMode.Multiple)]
+ public class HCWorkerService : IHCWorkerService
+ {
+ private volatile Morpher _morpher;
+
+ public void UpdateGrammar(HCGrammarDto grammar)
+ {
+ if (grammar == null)
+ throw new ArgumentNullException(nameof(grammar));
+
+ // XmlLanguageLoader.Load only takes a file path, so round-trip the grammar XML through a
+ // temp file rather than adding a string/stream overload to the HC library.
+ string tempPath = Path.Combine(Path.GetTempPath(), $"hcworker-grammar-{Guid.NewGuid():N}.xml");
+ try
+ {
+ File.WriteAllText(tempPath, grammar.CompiledGrammarXml);
+ Language language = XmlLanguageLoader.Load(tempPath);
+ _morpher = new Morpher(new TraceManager(), language)
+ {
+ DeletionReapplications = grammar.DeletionReapplications,
+ MaxStemCount = grammar.MaxStemCount,
+ MergeEquivalentAnalyses = grammar.MergeEquivalentAnalyses
+ };
+ }
+ finally
+ {
+ try
+ {
+ File.Delete(tempPath);
+ }
+ catch (IOException)
+ {
+ // best-effort cleanup; a stray temp file is not worth failing the grammar update over
+ }
+ }
+ }
+
+ public WordAnalysisDto[] ParseWord(string word, bool guessRoots)
+ {
+ Morpher morpher = RequireMorpher();
+ return morpher.ParseWord(word, out _, guessRoots).Select(ToWordAnalysisDto).ToArray();
+ }
+
+ public IDictionary ParseWordsBatch(string[] words, bool guessRoots)
+ {
+ Morpher morpher = RequireMorpher();
+ var results = new ConcurrentDictionary();
+ // Parses the whole batch server-side with no artificial DOP cap: the cap in
+ // ParserWorker existed only to keep FieldWorks' UI thread responsive under Workstation
+ // GC, which no longer applies once parsing lives in this Server-GC process.
+ Parallel.ForEach(
+ words,
+ new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount },
+ word =>
+ {
+ try
+ {
+ results[word] = morpher.ParseWord(word, out _, guessRoots).Select(ToWordAnalysisDto).ToArray();
+ }
+ catch (Exception)
+ {
+ // Guard each word so one unexpected exception (e.g. an out-of-vocabulary
+ // character, which throws InvalidShapeException) cannot abort the whole
+ // batch, mirroring ParserWorker.ParseAndUpdateWordformGuarded and
+ // HCParser.ParseWord's own try/catch around the equivalent call.
+ results[word] = new WordAnalysisDto[0];
+ }
+ }
+ );
+ // Return a plain Dictionary: DataContractSerializer's IDictionary support is defined in
+ // terms of the concrete Dictionary shape, so don't rely on ConcurrentDictionary matching it.
+ return new Dictionary(results);
+ }
+
+ private Morpher RequireMorpher()
+ {
+ Morpher morpher = _morpher;
+ if (morpher == null)
+ throw new InvalidOperationException("UpdateGrammar must be called before parsing.");
+ return morpher;
+ }
+
+ internal static WordAnalysisDto ToWordAnalysisDto(Word ws)
+ {
+ var morphemeIndices = new Dictionary();
+ var morphs = new List();
+ foreach (Annotation morph in ws.Morphs)
+ {
+ Allomorph allomorph = ws.GetAllomorph(morph);
+ int formId = ParseNullableIntProperty(allomorph.Properties, HCParser.FormID) ?? 0;
+ if (formId == 0)
+ continue;
+
+ if (!morphemeIndices.TryGetValue(allomorph.Morpheme, out int morphemeIndex))
+ {
+ morphemeIndex = morphemeIndices.Count;
+ morphemeIndices[allomorph.Morpheme] = morphemeIndex;
+ }
+
+ string formStr = ws.Shape.GetNodes(morph.Range).ToString(ws.Stratum.CharacterDefinitionTable, false);
+ morphs.Add(
+ new MorphDto
+ {
+ FormId = formId,
+ FormId2 = ParseNullableIntProperty(allomorph.Properties, HCParser.FormID2) ?? 0,
+ IsAffixProcessAllomorph = allomorph is AffixProcessAllomorph,
+ FormStr = formStr,
+ Guessed = allomorph.Guessed,
+ MsaId = ParseIntProperty(allomorph.Morpheme.Properties, HCParser.MsaID),
+ InflTypeId = ParseNullableIntProperty(allomorph.Morpheme.Properties, HCParser.InflTypeID) ?? 0,
+ MorphemeIndex = morphemeIndex
+ }
+ );
+ }
+ return new WordAnalysisDto { Morphs = morphs.ToArray() };
+ }
+
+ private static int ParseIntProperty(IDictionary properties, string key)
+ {
+ // Properties round-trip through XmlLanguageWriter/XmlLanguageLoader as strings even
+ // though HCLoader stored them as ints (hcEntry.Properties[HCParser.MsaID] = msa.Hvo),
+ // so parse rather than unbox.
+ if (!properties.TryGetValue(key, out object value) || value == null)
+ throw new InvalidOperationException($"Morpheme is missing required property '{key}'.");
+ return int.Parse(value.ToString());
+ }
+
+ private static int? ParseNullableIntProperty(IDictionary properties, string key)
+ {
+ if (!properties.TryGetValue(key, out object value) || value == null)
+ return null;
+ return int.Parse(value.ToString());
+ }
+ }
+}
diff --git a/Src/LexText/HCWorker/HCWorkerTests/HCWorkerServiceTests.cs b/Src/LexText/HCWorker/HCWorkerTests/HCWorkerServiceTests.cs
new file mode 100644
index 0000000000..aec381547a
--- /dev/null
+++ b/Src/LexText/HCWorker/HCWorkerTests/HCWorkerServiceTests.cs
@@ -0,0 +1,280 @@
+// Copyright (c) 2026 SIL International
+// This software is licensed under the LGPL, version 2.1 or later
+// (http://www.gnu.org/licenses/lgpl-2.1.html)
+
+using System.IO;
+using System.Linq;
+using System.ServiceModel;
+using NUnit.Framework;
+using SIL.Machine.Annotations;
+using SIL.Machine.FeatureModel;
+using SIL.Machine.Matching;
+using SIL.Machine.Morphology.HermitCrab;
+using SIL.Machine.Morphology.HermitCrab.MorphologicalRules;
+
+namespace SIL.FieldWorks.WordWorks.Parser.HCWorker
+{
+ ///
+ /// Validates the out-of-process worker along its two risk axes:
+ /// 1. Grammar-transfer fidelity: XmlLanguageWriter.Save -> XmlLanguageLoader.Load preserves the
+ /// ad hoc Properties[HCParser.FormID]/[MsaID]/[InflTypeID] tags HCLoader writes and
+ /// HCParser.GetMorphs needs, and the DTO carries them back across the process boundary -
+ /// plus a real net.pipe ServiceHost/ChannelFactory round trip (catches DataContract mistakes
+ /// an in-proc call would miss). Uses a single-morph grammar built from scratch with
+ /// NCName-safe symbol ids (XmlLanguageWriter emits ids as XML IDs).
+ /// 2. DTO-extraction correctness (HCWorkerService.ToWordAnalysisDto) on a real multi-morph Word,
+ /// including the MorphemeIndex grouping that stands in for GetMorphs' Dictionary<Morpheme,
+ /// MorphInfo> reference-identity lookup.
+ ///
+ [TestFixture]
+ public class HCWorkerServiceTests
+ {
+ private const int RootMsaId = 42;
+ private const int RootFormId = 555;
+
+ private Language _language;
+ private string _grammarXml;
+
+ [SetUp]
+ public void SetUp()
+ {
+ // Symbol/feature ids double as XML IDs (NCName) once written via XmlLanguageWriter, so
+ // they must be NCName-safe - matching how HCLoader generates ids in production (e.g.
+ // "pos" + msa.Hvo, never a bare symbol character like "+").
+ var phoneticFeatSys = new FeatureSystem
+ {
+ new SymbolicFeature("cons", new FeatureSymbol("consPos", "+"), new FeatureSymbol("consNeg", "-")),
+ new SymbolicFeature("voc", new FeatureSymbol("vocPos", "+"), new FeatureSymbol("vocNeg", "-")),
+ new SymbolicFeature("place", new FeatureSymbol("alveolar"), new FeatureSymbol("velar"))
+ };
+ phoneticFeatSys.Freeze();
+
+ // "s" and "g" are both consonants; a distinguishing "place" feature keeps
+ // GetMatchingStrReps from returning whichever was added first for both.
+ var table = new CharacterDefinitionTable { Name = "table" };
+ AddSeg(table, phoneticFeatSys, "s", "consPos", "vocNeg", "alveolar");
+ AddSeg(table, phoneticFeatSys, "a", "consNeg", "vocPos");
+ AddSeg(table, phoneticFeatSys, "g", "consPos", "vocNeg", "velar");
+
+ var syntacticFeatSys = new SyntacticFeatureSystem();
+ syntacticFeatSys.AddPartsOfSpeech(new FeatureSymbol("V", "Verb"));
+ syntacticFeatSys.Freeze();
+
+ var stratum = new Stratum(table) { Name = "Test", MorphologicalRuleOrder = MorphologicalRuleOrder.Unordered };
+
+ var root = new LexEntry
+ {
+ Id = "root",
+ Gloss = "sag",
+ SyntacticFeatureStruct = FeatureStruct.New(syntacticFeatSys).Symbol("V").Value
+ };
+ root.Allomorphs.Add(new RootAllomorph(new Segments(table, "sag", true)));
+ // Simulate HCLoader tagging the entry's morpheme/allomorph with the ids HCParser.GetMorphs
+ // resolves back to live LCM objects.
+ root.Properties[HCParser.MsaID] = RootMsaId;
+ root.PrimaryAllomorph.Properties[HCParser.FormID] = RootFormId;
+ stratum.Entries.Add(root);
+
+ _language = new Language
+ {
+ Name = "WorkerTest",
+ PhonologicalFeatureSystem = phoneticFeatSys,
+ SyntacticFeatureSystem = syntacticFeatSys,
+ Strata = { stratum }
+ };
+ _language.CharacterDefinitionTables.Add(table);
+
+ string tempPath = Path.Combine(Path.GetTempPath(), $"hcworker-test-{TestContext.CurrentContext.Test.ID}.xml");
+ XmlLanguageWriter.Save(_language, tempPath);
+ _grammarXml = File.ReadAllText(tempPath);
+ File.Delete(tempPath);
+ }
+
+ private static void AddSeg(
+ CharacterDefinitionTable table,
+ FeatureSystem phoneticFeatSys,
+ string strRep,
+ params string[] symbols
+ )
+ {
+ var fs = new FeatureStruct();
+ foreach (string symbolId in symbols)
+ {
+ FeatureSymbol symbol = phoneticFeatSys.GetSymbol(symbolId);
+ fs.AddValue(symbol.Feature, new SymbolicFeatureValue(symbol));
+ }
+ table.AddSegment(strRep, fs);
+ }
+
+ private HCGrammarDto MakeGrammarDto() =>
+ new HCGrammarDto
+ {
+ CompiledGrammarXml = _grammarXml,
+ DeletionReapplications = 0,
+ MaxStemCount = 2,
+ MergeEquivalentAnalyses = false
+ };
+
+ [Test]
+ public void ParseWord_AfterGrammarRoundTrip_CarriesFieldWorksIds()
+ {
+ var service = new HCWorkerService();
+ service.UpdateGrammar(MakeGrammarDto());
+ WordAnalysisDto[] actual = service.ParseWord("sag", false);
+
+ Assert.That(actual, Has.Length.EqualTo(1));
+ Assert.That(actual[0].Morphs, Has.Length.EqualTo(1));
+
+ MorphDto root = actual[0].Morphs[0];
+ Assert.That(root.FormId, Is.EqualTo(RootFormId));
+ Assert.That(root.MsaId, Is.EqualTo(RootMsaId));
+ Assert.That(root.FormStr, Is.EqualTo("sag"));
+ Assert.That(root.IsAffixProcessAllomorph, Is.False);
+ }
+
+ [Test]
+ public void ParseWordsBatch_ReturnsOneEntryPerWord()
+ {
+ var service = new HCWorkerService();
+ service.UpdateGrammar(MakeGrammarDto());
+
+ var result = service.ParseWordsBatch(new[] { "sag", "nonword" }, false);
+
+ Assert.That(result.Keys, Is.EquivalentTo(new[] { "sag", "nonword" }));
+ Assert.That(result["sag"], Has.Length.EqualTo(1));
+ Assert.That(result["nonword"], Is.Empty);
+ }
+
+ [Test]
+ public void ParseWord_BeforeUpdateGrammar_Throws()
+ {
+ var service = new HCWorkerService();
+ Assert.Throws(() => service.ParseWord("sag", false));
+ }
+
+ [Test]
+ public void OverWcfNamedPipe_RoundTripsCorrectly()
+ {
+ string pipeName = "hcworker-test-" + TestContext.CurrentContext.Test.ID;
+ NetNamedPipeBinding pipeBinding = PipeBindingFactory.Create();
+
+ using (var host = new ServiceHost(new HCWorkerService()))
+ {
+ host.AddServiceEndpoint(typeof(IHCWorkerService), pipeBinding, "net.pipe://localhost/" + pipeName);
+ host.Open();
+ try
+ {
+ var factory = new ChannelFactory(
+ pipeBinding,
+ new EndpointAddress("net.pipe://localhost/" + pipeName)
+ );
+ IHCWorkerService client = factory.CreateChannel();
+
+ client.UpdateGrammar(MakeGrammarDto());
+ WordAnalysisDto[] result = client.ParseWord("sag", false);
+
+ Assert.That(result, Has.Length.EqualTo(1));
+ Assert.That(result[0].Morphs, Has.Length.EqualTo(1));
+ Assert.That(result[0].Morphs[0].FormId, Is.EqualTo(RootFormId));
+
+ var batch = client.ParseWordsBatch(new[] { "sag" }, false);
+ Assert.That(batch["sag"], Has.Length.EqualTo(1));
+
+ ((IClientChannel)client).Close();
+ factory.Close();
+ }
+ finally
+ {
+ host.Close();
+ }
+ }
+ }
+
+ ///
+ /// Exercises ToWordAnalysisDto directly on a real multi-morph Word (root + suffix rule) - no
+ /// XML round trip, so the Pattern-serialization limitation of the round-trip path doesn't
+ /// apply. This is the scenario GetMorphs' second-occurrence dictionary lookup exists for;
+ /// MorphemeIndex is the DTO's wire-safe replacement for it.
+ ///
+ [Test]
+ public void ToWordAnalysisDto_MultiMorphWord_GroupsByMorphemeAndFlagsAffixProcessAllomorph()
+ {
+ var phonologicalFeatSys = new FeatureSystem
+ {
+ new SymbolicFeature("cons", new FeatureSymbol("cons+", "+"), new FeatureSymbol("cons-", "-")),
+ new SymbolicFeature("voc", new FeatureSymbol("voc+", "+"), new FeatureSymbol("voc-", "-"))
+ };
+ phonologicalFeatSys.Freeze();
+ var syntacticFeatSys = new SyntacticFeatureSystem();
+ syntacticFeatSys.AddPartsOfSpeech(new FeatureSymbol("V", "Verb"));
+ syntacticFeatSys.Freeze();
+
+ var table = new CharacterDefinitionTable { Name = "table" };
+ AddSeg(table, phonologicalFeatSys, "s", "cons+", "voc-");
+ AddSeg(table, phonologicalFeatSys, "a", "cons-", "voc+");
+ AddSeg(table, phonologicalFeatSys, "g", "cons+", "voc-");
+ AddSeg(table, phonologicalFeatSys, "d", "cons+", "voc-");
+ table.AddBoundary("+");
+
+ var stratum = new Stratum(table) { Name = "Test", MorphologicalRuleOrder = MorphologicalRuleOrder.Unordered };
+
+ var root = new LexEntry
+ {
+ Id = "root",
+ Gloss = "sag",
+ SyntacticFeatureStruct = FeatureStruct.New(syntacticFeatSys).Symbol("V").Value
+ };
+ root.Allomorphs.Add(new RootAllomorph(new Segments(table, "sag", true)));
+ root.Properties[HCParser.MsaID] = RootMsaId;
+ root.PrimaryAllomorph.Properties[HCParser.FormID] = RootFormId;
+ stratum.Entries.Add(root);
+
+ var any = FeatureStruct.New().Symbol(HCFeatureSystem.Segment).Value;
+ var pastSuffix = new AffixProcessRule
+ {
+ Id = "PAST",
+ Name = "ed_suffix",
+ Gloss = "PAST",
+ RequiredSyntacticFeatureStruct = FeatureStruct.New(syntacticFeatSys).Symbol("V").Value
+ };
+ var suffixAllomorph = new AffixProcessAllomorph
+ {
+ Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value },
+ Rhs = { new CopyFromInput("1"), new InsertSegments(table, "+d") }
+ };
+ const int suffixFormId = 777;
+ const int suffixMsaId = 99;
+ suffixAllomorph.Properties[HCParser.FormID] = suffixFormId;
+ pastSuffix.Properties[HCParser.MsaID] = suffixMsaId;
+ pastSuffix.Allomorphs.Add(suffixAllomorph);
+ stratum.MorphologicalRules.Add(pastSuffix);
+
+ var language = new Language
+ {
+ Name = "WorkerTest",
+ PhonologicalFeatureSystem = phonologicalFeatSys,
+ SyntacticFeatureSystem = syntacticFeatSys,
+ Strata = { stratum }
+ };
+
+ var morpher = new Morpher(new TraceManager(), language);
+ Word word = morpher.ParseWord("sagd", out _, false).Single();
+
+ WordAnalysisDto dto = HCWorkerService.ToWordAnalysisDto(word);
+
+ Assert.That(dto.Morphs, Has.Length.EqualTo(2));
+ MorphDto rootMorph = dto.Morphs[0];
+ MorphDto suffixMorph = dto.Morphs[1];
+
+ Assert.That(rootMorph.FormId, Is.EqualTo(RootFormId));
+ Assert.That(rootMorph.MsaId, Is.EqualTo(RootMsaId));
+ Assert.That(rootMorph.IsAffixProcessAllomorph, Is.False);
+
+ Assert.That(suffixMorph.FormId, Is.EqualTo(suffixFormId));
+ Assert.That(suffixMorph.MsaId, Is.EqualTo(suffixMsaId));
+ Assert.That(suffixMorph.IsAffixProcessAllomorph, Is.True);
+
+ Assert.That(suffixMorph.MorphemeIndex, Is.Not.EqualTo(rootMorph.MorphemeIndex));
+ }
+ }
+}
diff --git a/Src/LexText/HCWorker/HCWorkerTests/HCWorkerTests.csproj b/Src/LexText/HCWorker/HCWorkerTests/HCWorkerTests.csproj
new file mode 100644
index 0000000000..edbd52ba56
--- /dev/null
+++ b/Src/LexText/HCWorker/HCWorkerTests/HCWorkerTests.csproj
@@ -0,0 +1,56 @@
+
+
+
+ HCWorkerTests
+ SIL.FieldWorks.WordWorks.Parser.HCWorker
+ net48
+ Library
+ true
+
+ true168,169,219,414,649,1635,1702,1701
+ false
+ false
+
+
+ DEBUG;TRACE
+ true
+ false
+ portable
+
+
+ TRACE
+ true
+ true
+ portable
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Properties\CommonAssemblyInfo.cs
+
+
+
diff --git a/Src/LexText/HCWorker/Program.cs b/Src/LexText/HCWorker/Program.cs
new file mode 100644
index 0000000000..cb2d2bd3c2
--- /dev/null
+++ b/Src/LexText/HCWorker/Program.cs
@@ -0,0 +1,91 @@
+// Copyright (c) 2026 SIL International
+// This software is licensed under the LGPL, version 2.1 or later
+// (http://www.gnu.org/licenses/lgpl-2.1.html)
+
+using System;
+using System.Diagnostics;
+using System.ServiceModel;
+using System.Threading;
+using SIL.FieldWorks.WordWorks.Parser;
+
+namespace SIL.FieldWorks.WordWorks.Parser.HCWorker
+{
+ ///
+ /// Entry point for the out-of-process Server-GC HermitCrab worker. Spawned lazily by
+ /// FieldWorks' HCWorkerProcessManager (Src\LexText\ParserCore) as:
+ /// HCWorker.exe <pipeName> <parentProcessId>
+ /// The WCF contract (IHCWorkerService + DTOs), the pipe binding, and the Form/Msa/InflType key
+ /// constants all live in ParserCore, so this host and the FieldWorks-side client share one
+ /// definition. See RUSTIFY-fieldworks-worker-design.md.
+ ///
+ public static class Program
+ {
+ public static int Main(string[] args)
+ {
+ if (args.Length < 2 || !int.TryParse(args[1], out int parentProcessId))
+ {
+ Console.Error.WriteLine("Usage: HCWorker.exe ");
+ return 1;
+ }
+ string pipeName = args[0];
+
+ // Safety net mirroring FLExBridgeHelper.cs's process watchdog: if FieldWorks dies
+ // (crash, kill, normal exit without an explicit shutdown of us) this ensures the
+ // worker - and its Server-GC memory footprint - does not outlive it.
+ StartParentWatchdog(parentProcessId);
+
+ NetNamedPipeBinding pipeBinding = PipeBindingFactory.Create();
+
+ using (var host = new ServiceHost(new HCWorkerService()))
+ {
+ host.AddServiceEndpoint(typeof(IHCWorkerService), pipeBinding, "net.pipe://localhost/" + pipeName);
+ host.Open();
+
+ // Readiness = the pipe is open; FieldWorks calls UpdateGrammar immediately after
+ // spawn and every parse call fails with a clear error until that lands
+ // (HCWorkerService.RequireMorpher), so there is no separate ready handshake.
+ Console.Out.WriteLine("READY");
+ Console.Out.Flush();
+
+ // Block forever; the process exits via the parent watchdog above or being killed
+ // directly by FieldWorks (design §4 "Shutdown").
+ Thread.Sleep(Timeout.Infinite);
+ }
+ return 0;
+ }
+
+ private static void StartParentWatchdog(int parentProcessId)
+ {
+ Process parent;
+ try
+ {
+ parent = Process.GetProcessById(parentProcessId);
+ }
+ catch (ArgumentException)
+ {
+ // Parent already gone before we even started - exit immediately rather than
+ // leaking a Server-GC process with nothing to serve.
+ Environment.Exit(0);
+ return;
+ }
+
+ var watchdog = new Thread(() =>
+ {
+ try
+ {
+ parent.WaitForExit();
+ }
+ catch (Exception)
+ {
+ // Handle may already be invalid; either way, treat it as "parent is gone."
+ }
+ Environment.Exit(0);
+ })
+ {
+ IsBackground = true,
+ Name = "HCWorker parent-process watchdog"
+ };
+ watchdog.Start();
+ }
+ }
+}
diff --git a/Src/LexText/HCWorker/Properties/AssemblyInfo.cs b/Src/LexText/HCWorker/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000000..37b81037c1
--- /dev/null
+++ b/Src/LexText/HCWorker/Properties/AssemblyInfo.cs
@@ -0,0 +1,7 @@
+// Copyright (c) 2026 SIL International
+// This software is licensed under the LGPL, version 2.1 or later
+// (http://www.gnu.org/licenses/lgpl-2.1.html)
+
+using System.Runtime.CompilerServices;
+
+[assembly: InternalsVisibleTo("HCWorkerTests")]
diff --git a/Src/LexText/ParserCore/HCParser.cs b/Src/LexText/ParserCore/HCParser.cs
index 37015bb53b..774f3ab958 100644
--- a/Src/LexText/ParserCore/HCParser.cs
+++ b/Src/LexText/ParserCore/HCParser.cs
@@ -4,10 +4,12 @@
using System;
using System.Collections.Generic;
+using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
+using System.Threading;
using System.Xml;
using System.Xml.Linq;
using SIL.LCModel;
@@ -22,7 +24,20 @@ namespace SIL.FieldWorks.WordWorks.Parser
public class HCParser : DisposableBase, IParser
{
private readonly LcmCache m_cache;
- private Morpher m_morpher;
+ // Out-of-process Server-GC worker proxy (RUSTIFY-fieldworks-worker-design.md) that now
+ // does the bulk/interactive parsing an in-process m_morpher used to do directly. Tracing/
+ // Try-a-Word (GetTraceMorpher/ParseToXml below) deliberately still runs in-process: its
+ // FwXmlTraceManager touches LCM inline while tracing, which the worker has no access to -
+ // see the design's §8 incremental-rollout note (bulk path first, interactive path already
+ // ported here since it needed no such LCM-touching trace manager; Try-a-Word is left for a
+ // follow-up).
+ private readonly HCWorkerClient m_workerClient;
+ // A dedicated morpher used only by the trace/Try-A-Word path. Tracing mutates the
+ // morpher's LexEntrySelector, RuleSelector, and TraceManager.IsTracing; keeping a
+ // separate morpher (with its own trace manager) ensures those mutations never corrupt
+ // the bulk m_morpher, which may be parsing concurrently on several threads.
+ private Morpher m_traceMorpher;
+ private FwXmlTraceManager m_traceMorpherTraceManager;
private Language m_language;
private readonly FwXmlTraceManager m_traceManager;
private readonly string m_outputDirectory;
@@ -30,11 +45,23 @@ public class HCParser : DisposableBase, IParser
private bool m_forceUpdate;
private bool m_guessRoots;
private bool m_mergeAnalyses;
+ private int m_delReapps;
+ private int m_maxStemCount;
+
+ // Diagnostic perf counters (accumulated across all threads) splitting bulk-parse time
+ // into the lock-free morpher parse vs. the LCM-read mapping (GetMorphs under the read
+ // lock). Near-zero overhead; used by the parser concurrency benchmark.
+ public static long DiagMorpherParseTicks;
+ public static long DiagGetMorphsTicks;
// the public const strings are for GenerateHCConfigForFLExTrans and HCSynthByGlossLib
internal const string CRuleID = "ID";
- internal const string FormID = "ID";
- internal const string FormID2 = "ID2";
+ // FormID/FormID2 are public so the out-of-process HCWorker (Src\LexText\HCWorker) can key
+ // the same Allomorph.Properties bag when it projects a parsed Word down to MorphDto[] -
+ // keeping the worker's id extraction and this class's GetMorphs consumption on one set of
+ // key strings.
+ public const string FormID = "ID";
+ public const string FormID2 = "ID2";
public const string InflTypeID = "InflTypeID";
public const string MsaID = "ID";
internal const string PRuleID = "ID";
@@ -50,6 +77,7 @@ public class HCParser : DisposableBase, IParser
public HCParser(LcmCache cache)
{
m_cache = cache;
+ m_workerClient = new HCWorkerClient();
m_traceManager = new FwXmlTraceManager(m_cache);
m_outputDirectory = Path.GetTempPath();
m_changeListener = new ParserModelChangeListener(m_cache);
@@ -85,13 +113,21 @@ public ParseResult ParseWord(string word)
{
CheckDisposed();
- if (m_morpher == null)
+ if (m_language == null)
return null;
- IEnumerable wordAnalyses;
+ WordAnalysisDto[] wordAnalyses;
try
{
- wordAnalyses = m_morpher.ParseWord(word, out _, m_guessRoots);
+ var morpherSw = Stopwatch.StartNew();
+ // Round-trips to the worker process, OUTSIDE the LCM read lock below. The
+ // worker's parse is the expensive, CPU-bound part and touches only its own copy
+ // of the frozen HC grammar (not LCM), so keeping it off the read lock lets it run
+ // without holding the read lock for its whole duration - same reasoning as the
+ // in-process call this replaces, just across a process boundary now.
+ wordAnalyses = m_workerClient.ParseWord(word, m_guessRoots);
+ morpherSw.Stop();
+ Interlocked.Add(ref DiagMorpherParseTicks, morpherSw.ElapsedTicks);
}
catch (Exception e)
{
@@ -99,10 +135,11 @@ public ParseResult ParseWord(string word)
}
ParseResult result;
+ var getMorphsSw = Stopwatch.StartNew();
using (new WorkerThreadReadHelper(m_cache.ServiceLocator.GetInstance()))
{
var analyses = new List();
- foreach (Word wordAnalysis in wordAnalyses)
+ foreach (WordAnalysisDto wordAnalysis in wordAnalyses)
{
List morphs;
if (GetMorphs(wordAnalysis, out morphs))
@@ -113,10 +150,57 @@ public ParseResult ParseWord(string word)
}
result = new ParseResult(analyses);
}
+ getMorphsSw.Stop();
+ Interlocked.Add(ref DiagGetMorphsTicks, getMorphsSw.ElapsedTicks);
return result;
}
+ ///
+ /// Bulk path (design §5): one WCF round trip for the whole batch of already-normalized
+ /// word forms, instead of ParserWorker's old per-wordform Parallel.ForEach each calling
+ /// the single-word ParseWord above. Returns null (rather than throwing) if the batch call
+ /// itself fails even after HCWorkerClient's own retry-once, so ParserWorker can fall back
+ /// to its per-wordform path for this run instead of losing it entirely (design §6).
+ ///
+ public IDictionary ParseWordsBatch(string[] words)
+ {
+ CheckDisposed();
+
+ if (m_language == null || words.Length == 0)
+ return null;
+
+ IDictionary wordAnalysesByWord;
+ try
+ {
+ wordAnalysesByWord = m_workerClient.ParseWordsBatch(words, m_guessRoots);
+ }
+ catch (Exception)
+ {
+ return null;
+ }
+
+ var results = new Dictionary();
+ using (new WorkerThreadReadHelper(m_cache.ServiceLocator.GetInstance()))
+ {
+ foreach (KeyValuePair kvp in wordAnalysesByWord)
+ {
+ var analyses = new List();
+ foreach (WordAnalysisDto wordAnalysis in kvp.Value)
+ {
+ List morphs;
+ if (GetMorphs(wordAnalysis, out morphs))
+ {
+ analyses.Add(new ParseAnalysis(morphs.Select(mi =>
+ new ParseMorph(mi.Form, mi.Msa, mi.InflType, mi.GuessedString))));
+ }
+ }
+ results[kvp.Key] = new ParseResult(analyses);
+ }
+ }
+ return results;
+ }
+
public XDocument TraceWordXml(string form, IEnumerable selectTraceMorphs)
{
CheckDisposed();
@@ -139,11 +223,15 @@ protected override void DisposeManagedResources()
m_changeListener.Dispose();
m_changeListener = null;
}
+ m_workerClient?.Dispose();
}
private void LoadParser()
{
- m_morpher = null;
+ m_language = null;
+ // Force the trace morpher to be rebuilt over the freshly loaded language.
+ m_traceMorpher = null;
+ m_traceMorpherTraceManager = null;
int delReapps = 0;
// For Hermit Crab, the maximum number of roots/stems allowed is between one and ten.
@@ -170,24 +258,58 @@ private void LoadParser()
if (maxRootsElem != null)
maxStemCount = int.Parse(maxRootsElem.Value);
}
- m_morpher = new Morpher(m_traceManager, m_language) { DeletionReapplications = delReapps };
- m_morpher.MaxStemCount = maxStemCount;
- m_morpher.MergeEquivalentAnalyses = m_mergeAnalyses;
+ m_delReapps = delReapps;
+ m_maxStemCount = maxStemCount;
+
+ // Ship the freshly loaded grammar to the worker (design §4/§5 "Grammar change"): the
+ // same HC.NET XML input format XmlLanguageLoader already reads, produced via
+ // XmlLanguageWriter.Save on the Language HCLoader.Load just built - no new
+ // serialization format, no changes to SIL.Machine.Morphology.HermitCrab itself.
+ string grammarFile = Path.Combine(m_outputDirectory, m_cache.ProjectId.Name + "HCGrammar.xml");
+ XmlLanguageWriter.Save(m_language, grammarFile);
+ string grammarXml = File.ReadAllText(grammarFile);
+ File.Delete(grammarFile);
+ m_workerClient.UpdateGrammar(grammarXml, delReapps, maxStemCount, m_mergeAnalyses);
+ }
+
+ ///
+ /// Lazily builds (and returns) the morpher used for tracing. It shares the frozen,
+ /// read-only with the bulk morpher but has its own mutable
+ /// state and its own trace manager, so enabling tracing or setting morpheme selectors
+ /// here cannot affect a bulk parse running on m_morpher.
+ ///
+ private Morpher GetTraceMorpher()
+ {
+ if (m_traceMorpher == null)
+ {
+ m_traceMorpherTraceManager = new FwXmlTraceManager(m_cache);
+ m_traceMorpher = new Morpher(m_traceMorpherTraceManager, m_language)
+ {
+ DeletionReapplications = m_delReapps,
+ MaxStemCount = m_maxStemCount,
+ MergeEquivalentAnalyses = m_mergeAnalyses
+ };
+ }
+ return m_traceMorpher;
}
private XDocument ParseToXml(string form, bool tracing, IEnumerable selectTraceMorphs)
{
- if (m_morpher == null)
+ if (m_language == null)
return null;
+ // Use the dedicated trace morpher so that setting selectors / IsTracing here cannot
+ // corrupt a bulk parse that may be running concurrently on m_morpher.
+ Morpher traceMorpher = GetTraceMorpher();
+
var doc = new XDocument();
using (new WorkerThreadReadHelper(m_cache.ServiceLocator.GetInstance()))
{
if (selectTraceMorphs != null)
{
var selectTraceMorphsSet = new HashSet(selectTraceMorphs);
- m_morpher.LexEntrySelector = entry => selectTraceMorphsSet.Contains((int) entry.Properties[MsaID]);
- m_morpher.RuleSelector = rule =>
+ traceMorpher.LexEntrySelector = entry => selectTraceMorphsSet.Contains((int) entry.Properties[MsaID]);
+ traceMorpher.RuleSelector = rule =>
{
// Need to check if the rule is a morpheme and if it has a non-null msa id.
// If the rule comes from an irregularly inflected form, msa id will be null.
@@ -200,15 +322,15 @@ private XDocument ParseToXml(string form, bool tracing, IEnumerable selectT
}
else
{
- m_morpher.LexEntrySelector = entry => true;
- m_morpher.RuleSelector = rule => true;
+ traceMorpher.LexEntrySelector = entry => true;
+ traceMorpher.RuleSelector = rule => true;
}
- m_morpher.TraceManager.IsTracing = tracing;
+ traceMorpher.TraceManager.IsTracing = tracing;
var wordformElem = new XElement("Wordform", new XAttribute("form", form));
try
{
object trace;
- foreach (Word wordAnalysis in m_morpher.ParseWord(form, out trace, m_guessRoots))
+ foreach (Word wordAnalysis in traceMorpher.ParseWord(form, out trace, m_guessRoots))
{
List morphs;
if (GetMorphs(wordAnalysis, out morphs))
@@ -438,6 +560,127 @@ private bool GetMorphs(Word ws, out List result)
return true;
}
+ ///
+ /// The LCM-object-resolution half of GetMorphs(Word,...) above, ported to run over the
+ /// worker's flat MorphDto[] instead of walking a live Word/Annotation<ShapeNode>/
+ /// Allomorph/Morpheme graph (the worker has no LcmCache, so it can't do these repository
+ /// lookups itself - see HCWorkerService.ToWordAnalysisDto and MorphDto's doc comment in
+ /// IHCWorkerService.cs). Every circumfix/infix-placement decision below is identical to
+ /// the Word-based version; only the source of FormId/FormId2/MsaId/InflTypeId/Guessed/
+ /// FormStr and the "have we seen this morpheme already" key (MorphemeIndex instead of a
+ /// Morpheme reference) changed.
+ ///
+ private bool GetMorphs(WordAnalysisDto wordAnalysis, out List result)
+ {
+ var morphs = new Dictionary();
+
+ var aprCircumfixes = new List();
+ bool isSuffixPortionOfAprCircumfix = false;
+
+ result = new List();
+ foreach (MorphDto morphDto in wordAnalysis.Morphs)
+ {
+ // The worker already skips morphs with no FormId (HCWorkerService.
+ // ToWordAnalysisDto mirrors this method's Word-based twin's `if (formID == 0)
+ // continue;`), so every entry reaching here has one.
+ int formID = morphDto.FormId;
+
+ isSuffixPortionOfAprCircumfix = false;
+ int formID2 = morphDto.FormId2;
+ if (formID2 == 0 && morphDto.IsAffixProcessAllomorph)
+ {
+ // Per the Leipzig glossing rules (https://www.eva.mpg.de/lingua/resources/glossing-rules.php),
+ // circumfixes should appear both before and after the material they attach to.
+ // HC does not have an overt marker for a circumfix when it is an affix processing rule (aka APR).
+ // The following code determines when an APR is marked as a circumfix in FLEx and ensures the
+ // two instances of it as a morph are included in the result at the correct places.
+ // This is a fix for https://jira.sil.org/browse/LT-21447
+ IMoForm circumForm;
+ if (!m_cache.ServiceLocator.GetInstance().TryGetObject(formID, out circumForm))
+ {
+ result = null;
+ return false;
+ }
+ if (circumForm.MorphTypeRA.Guid == MoMorphTypeTags.kguidMorphCircumfix)
+ {
+ if (aprCircumfixes.Contains(formID))
+ {
+ isSuffixPortionOfAprCircumfix = true;
+ }
+ else
+ {
+ // Remember this allomorph as an APR that is a circumfix
+ aprCircumfixes.Add(formID);
+ }
+ }
+ }
+
+ int curFormID;
+ MorphInfo morphInfo;
+ if (!morphs.TryGetValue(morphDto.MorphemeIndex, out morphInfo) || isSuffixPortionOfAprCircumfix)
+ {
+ curFormID = formID;
+ }
+ else if (formID2 > 0)
+ {
+ // circumfix
+ curFormID = formID2;
+ }
+ else
+ {
+ continue;
+ }
+
+ IMoForm form;
+ if (!m_cache.ServiceLocator.GetInstance().TryGetObject(curFormID, out form))
+ {
+ result = null;
+ return false;
+ }
+
+ IMoMorphSynAnalysis msa;
+ if (!m_cache.ServiceLocator.GetInstance().TryGetObject(morphDto.MsaId, out msa))
+ {
+ result = null;
+ return false;
+ }
+
+ ILexEntryInflType inflType = null;
+ if (morphDto.InflTypeId > 0 && !m_cache.ServiceLocator.GetInstance().TryGetObject(morphDto.InflTypeId, out inflType))
+ {
+ result = null;
+ return false;
+ }
+
+ morphInfo = new MorphInfo
+ {
+ Form = form,
+ GuessedString = morphDto.Guessed ? morphDto.FormStr : null,
+ Msa = msa,
+ InflType = inflType,
+ IsCircumfix = formID2 > 0
+ };
+
+ morphs[morphDto.MorphemeIndex] = morphInfo;
+
+ switch ((form.MorphTypeRA == null ? Guid.Empty : form.MorphTypeRA.Guid).ToString())
+ {
+ case MoMorphTypeTags.kMorphInfix:
+ case MoMorphTypeTags.kMorphInfixingInterfix:
+ if (result.Count == 0)
+ result.Add(morphInfo);
+ else
+ result.Insert(result.Count - 1, morphInfo);
+ break;
+
+ default:
+ result.Add(morphInfo);
+ break;
+ }
+ }
+ return true;
+ }
+
private static string GetMorphTypeString(Guid typeGuid)
{
switch (typeGuid.ToString())
diff --git a/Src/LexText/ParserCore/HCWorkerClient.cs b/Src/LexText/ParserCore/HCWorkerClient.cs
new file mode 100644
index 0000000000..cb05fb2cbd
--- /dev/null
+++ b/Src/LexText/ParserCore/HCWorkerClient.cs
@@ -0,0 +1,142 @@
+// Copyright (c) 2026 SIL International
+// This software is licensed under the LGPL, version 2.1 or later
+// (http://www.gnu.org/licenses/lgpl-2.1.html)
+
+using System;
+using System.Collections.Generic;
+using System.ServiceModel;
+
+namespace SIL.FieldWorks.WordWorks.Parser
+{
+ ///
+ /// Thin WCF proxy to the out-of-process HermitCrab worker, replacing HCParser's direct
+ /// in-process Morpher calls (RUSTIFY-fieldworks-worker-design.md §2/§3). The net.pipe binding
+ /// comes from the shared PipeBindingFactory so this client and the worker's ServiceHost use one
+ /// definition.
+ ///
+ /// Owns an HCWorkerProcessManager for spawn/respawn and remembers the last grammar sent so a
+ /// mid-call worker crash can be recovered from without the caller re-supplying it: on
+ /// CommunicationException/TimeoutException, respawn, replay UpdateGrammar, retry the failed
+ /// call once, then surface the error (design §6).
+ ///
+ public class HCWorkerClient : IDisposable
+ {
+ private readonly object m_channelLock = new object();
+ private readonly HCWorkerProcessManager m_processManager = new HCWorkerProcessManager();
+ private ChannelFactory m_factory;
+ private IHCWorkerService m_channel;
+ private HCGrammarDto m_lastGrammar;
+
+ public void UpdateGrammar(string compiledGrammarXml, int deletionReapplications, int maxStemCount, bool mergeEquivalentAnalyses)
+ {
+ var grammar = new HCGrammarDto
+ {
+ CompiledGrammarXml = compiledGrammarXml,
+ DeletionReapplications = deletionReapplications,
+ MaxStemCount = maxStemCount,
+ MergeEquivalentAnalyses = mergeEquivalentAnalyses
+ };
+ CallWithRetry(channel => channel.UpdateGrammar(grammar), grammar);
+ }
+
+ public WordAnalysisDto[] ParseWord(string word, bool guessRoots)
+ {
+ WordAnalysisDto[] result = null;
+ CallWithRetry(channel => result = channel.ParseWord(word, guessRoots), m_lastGrammar);
+ return result;
+ }
+
+ public IDictionary ParseWordsBatch(string[] words, bool guessRoots)
+ {
+ IDictionary result = null;
+ CallWithRetry(channel => result = channel.ParseWordsBatch(words, guessRoots), m_lastGrammar);
+ return result;
+ }
+
+ ///
+ /// Kills the worker process (FieldWorks exit, or an idle timeout - design §4). The next
+ /// call after this lazily respawns and replays UpdateGrammar, same as a crash recovery.
+ ///
+ public void Shutdown()
+ {
+ lock (m_channelLock)
+ {
+ CloseChannel();
+ m_processManager.Shutdown();
+ }
+ }
+
+ private void CallWithRetry(Action call, HCGrammarDto grammarToReplay)
+ {
+ IHCWorkerService channel = GetOrCreateChannel();
+ try
+ {
+ call(channel);
+ // UpdateGrammar itself succeeded - remember it for a future respawn's replay.
+ // (Assigning unconditionally here is harmless when grammarToReplay is m_lastGrammar
+ // itself, e.g. from ParseWord/ParseWordsBatch.)
+ if (grammarToReplay != null)
+ m_lastGrammar = grammarToReplay;
+ }
+ catch (Exception e) when (e is CommunicationException || e is TimeoutException)
+ {
+ // Worker crashed or the pipe is otherwise unusable: respawn, replay the grammar
+ // (idempotent - design §6), and retry the failed call exactly once before
+ // surfacing the error to the caller/UI.
+ lock (m_channelLock)
+ {
+ CloseChannel();
+ }
+ IHCWorkerService retryChannel = GetOrCreateChannel();
+ if (m_lastGrammar != null)
+ retryChannel.UpdateGrammar(m_lastGrammar);
+ call(retryChannel);
+ if (grammarToReplay != null)
+ m_lastGrammar = grammarToReplay;
+ }
+ }
+
+ private IHCWorkerService GetOrCreateChannel()
+ {
+ lock (m_channelLock)
+ {
+ if (m_channel != null)
+ return m_channel;
+
+ string pipeName = m_processManager.EnsureStarted();
+ // One binding definition shared with the worker's ServiceHost (both sides must agree
+ // on quotas/timeouts).
+ NetNamedPipeBinding pipeBinding = PipeBindingFactory.Create();
+
+ m_factory = new ChannelFactory(
+ pipeBinding,
+ new EndpointAddress("net.pipe://localhost/" + pipeName));
+ m_channel = m_factory.CreateChannel();
+ return m_channel;
+ }
+ }
+
+ private void CloseChannel()
+ {
+ lock (m_channelLock)
+ {
+ try
+ {
+ (m_channel as ICommunicationObject)?.Abort();
+ }
+ catch (Exception)
+ {
+ // Best-effort teardown of a channel we already know is broken.
+ }
+ m_factory?.Abort();
+ m_channel = null;
+ m_factory = null;
+ }
+ }
+
+ public void Dispose()
+ {
+ Shutdown();
+ }
+ }
+}
diff --git a/Src/LexText/ParserCore/HCWorkerProcessManager.cs b/Src/LexText/ParserCore/HCWorkerProcessManager.cs
new file mode 100644
index 0000000000..953214a203
--- /dev/null
+++ b/Src/LexText/ParserCore/HCWorkerProcessManager.cs
@@ -0,0 +1,132 @@
+// Copyright (c) 2026 SIL International
+// This software is licensed under the LGPL, version 2.1 or later
+// (http://www.gnu.org/licenses/lgpl-2.1.html)
+
+using System;
+using System.Diagnostics;
+using System.IO;
+using System.Threading;
+using SIL.FieldWorks.Common.FwUtils;
+
+namespace SIL.FieldWorks.WordWorks.Parser
+{
+ ///
+ /// Spawns and watches the out-of-process HermitCrab worker (RUSTIFY-fieldworks-worker-design.md
+ /// §2/§4). Modeled directly on FLExBridgeHelper.cs's Process.Start + WaitForExit watchdog
+ /// pattern (Src\Common\FwUtils\FLExBridgeHelper.cs) - that is this codebase's existing
+ /// precedent for "spawn a helper process and notice if it dies," so this doesn't invent a new
+ /// one. One instance is expected to live for the lifetime of a FieldWorks process (owned by
+ /// HCWorkerClient); it does not itself talk WCF - that's HCWorkerClient's job, kept separate so
+ /// process lifecycle and channel lifecycle can fail/retry independently, matching the design's
+ /// architecture diagram (HCWorkerProcessManager box distinct from the client proxy box).
+ ///
+ public class HCWorkerProcessManager : IDisposable
+ {
+ private const string WorkerExeName = "HCWorker.exe";
+
+ private readonly object m_lock = new object();
+ private Process m_process;
+ private string m_pipeName;
+
+ ///
+ /// Pipe name of the currently running worker, or null if none is running. Unique per
+ /// FieldWorks process (not per-launch) so a respawned worker after a crash still gets a
+ /// fresh, non-colliding pipe name.
+ ///
+ public string PipeName => m_pipeName;
+
+ public bool IsRunning
+ {
+ get
+ {
+ lock (m_lock)
+ {
+ return m_process != null && !m_process.HasExited;
+ }
+ }
+ }
+
+ ///
+ /// Starts the worker if it is not already running (design §4: lazy start on first HC
+ /// parse request per session, not eagerly at FieldWorks startup). Returns the pipe name to
+ /// connect to. Safe to call repeatedly/concurrently.
+ ///
+ public string EnsureStarted()
+ {
+ lock (m_lock)
+ {
+ if (m_process != null && !m_process.HasExited)
+ return m_pipeName;
+
+ m_pipeName = "HCWorker_" + Guid.NewGuid().ToString("N");
+ string exePath = Path.Combine(FwDirectoryFinder.ExeOrDllDirectory, WorkerExeName);
+
+ var startInfo = new ProcessStartInfo
+ {
+ UseShellExecute = false,
+ FileName = exePath,
+ Arguments = $"{m_pipeName} {Process.GetCurrentProcess().Id}",
+ CreateNoWindow = true,
+ RedirectStandardOutput = true
+ };
+
+ var process = new Process { StartInfo = startInfo };
+ process.Start();
+
+ // Safety net mirroring FLExBridgeHelper.cs's process watchdog: if the worker dies
+ // (crash, killed, exits on its own parent-watchdog per Program.cs) this notices so
+ // the next EnsureStarted()/HCWorkerClient retry respawns it rather than talking to
+ // a dead pipe.
+ var watchdog = new Thread(() =>
+ {
+ try
+ {
+ process.WaitForExit();
+ }
+ catch (Exception)
+ {
+ // Process handle may already be invalid; either way treat it as exited.
+ }
+ })
+ { IsBackground = true, Name = "HCWorker process watchdog" };
+ watchdog.Start();
+
+ m_process = process;
+ return m_pipeName;
+ }
+ }
+
+ ///
+ /// Kills the worker (FieldWorks exit, or an idle timeout releasing its Server-GC memory
+ /// footprint - design §4 "Shutdown"). Safe to call when nothing is running.
+ ///
+ public void Shutdown()
+ {
+ lock (m_lock)
+ {
+ if (m_process == null)
+ return;
+ try
+ {
+ if (!m_process.HasExited)
+ m_process.Kill();
+ }
+ catch (Exception)
+ {
+ // Already exited or exiting; nothing more to do.
+ }
+ finally
+ {
+ m_process.Dispose();
+ m_process = null;
+ m_pipeName = null;
+ }
+ }
+ }
+
+ public void Dispose()
+ {
+ Shutdown();
+ }
+ }
+}
diff --git a/Src/LexText/ParserCore/IHCWorkerService.cs b/Src/LexText/ParserCore/IHCWorkerService.cs
new file mode 100644
index 0000000000..cbf9cebcb1
--- /dev/null
+++ b/Src/LexText/ParserCore/IHCWorkerService.cs
@@ -0,0 +1,89 @@
+// Copyright (c) 2026 SIL International
+// This software is licensed under the LGPL, version 2.1 or later
+// (http://www.gnu.org/licenses/lgpl-2.1.html)
+
+using System.Collections.Generic;
+using System.Runtime.Serialization;
+using System.ServiceModel;
+
+namespace SIL.FieldWorks.WordWorks.Parser
+{
+ ///
+ /// The WCF contract for the out-of-process HermitCrab worker (Src\LexText\HCWorker), defined
+ /// once here in ParserCore and referenced by BOTH the worker (which implements it) and
+ /// HCWorkerClient (which consumes it) - no hand-synced duplicate. The explicit Namespace/Name/
+ /// DataMember attributes pin the wire shape. See RUSTIFY-fieldworks-worker-design.md.
+ ///
+ [ServiceContract(Namespace = "http://sil.org/machine/hermitcrab/worker", Name = "IHCWorkerService")]
+ public interface IHCWorkerService
+ {
+ [OperationContract]
+ void UpdateGrammar(HCGrammarDto grammar);
+
+ [OperationContract]
+ WordAnalysisDto[] ParseWord(string word, bool guessRoots);
+
+ [OperationContract]
+ IDictionary ParseWordsBatch(string[] words, bool guessRoots);
+ }
+
+ [DataContract(Namespace = "http://sil.org/machine/hermitcrab/worker")]
+ public class HCGrammarDto
+ {
+ [DataMember]
+ public string CompiledGrammarXml { get; set; }
+
+ [DataMember]
+ public int DeletionReapplications { get; set; }
+
+ [DataMember]
+ public int MaxStemCount { get; set; }
+
+ [DataMember]
+ public bool MergeEquivalentAnalyses { get; set; }
+ }
+
+ [DataContract(Namespace = "http://sil.org/machine/hermitcrab/worker")]
+ public class WordAnalysisDto
+ {
+ [DataMember]
+ public MorphDto[] Morphs { get; set; }
+ }
+
+ ///
+ /// One morph's raw, LCM-lookup-ready fields, in the order HermitCrab.Worker encountered them
+ /// walking the parsed Word - i.e. exactly what HCParser.GetMorphs used to read directly off
+ /// the live Word/Allomorph/Morpheme object graph before parsing moved out-of-process. FormId/
+ /// MsaId/InflTypeId are the ids GetMorphs already resolves via IMoFormRepository/
+ /// IMoMorphSynAnalysisRepository/ILexEntryInflTypeRepository; the circumfix/infix placement
+ /// logic that used to run inline (checking a resolved IMoForm's MorphTypeRA) still runs here,
+ /// now over this flat list instead of over Annotation<ShapeNode>/Allomorph/Morpheme.
+ ///
+ [DataContract(Namespace = "http://sil.org/machine/hermitcrab/worker")]
+ public class MorphDto
+ {
+ [DataMember]
+ public int FormId { get; set; }
+
+ [DataMember]
+ public int FormId2 { get; set; }
+
+ [DataMember]
+ public bool IsAffixProcessAllomorph { get; set; }
+
+ [DataMember]
+ public string FormStr { get; set; }
+
+ [DataMember]
+ public bool Guessed { get; set; }
+
+ [DataMember]
+ public int MsaId { get; set; }
+
+ [DataMember]
+ public int InflTypeId { get; set; }
+
+ [DataMember]
+ public int MorphemeIndex { get; set; }
+ }
+}
diff --git a/Src/LexText/ParserCore/ParseFiler.cs b/Src/LexText/ParserCore/ParseFiler.cs
index 87014466b4..45e9043302 100644
--- a/Src/LexText/ParserCore/ParseFiler.cs
+++ b/Src/LexText/ParserCore/ParseFiler.cs
@@ -13,6 +13,7 @@
using SIL.LCModel.Core.Text;
using SIL.LCModel.DomainServices;
using SIL.LCModel.Infrastructure;
+using SIL.Reporting;
using XCore;
namespace SIL.FieldWorks.WordWorks.Parser
@@ -167,13 +168,17 @@ private bool UpdateWordforms(object parameter)
// update all of the wordforms in a batch, this might slow down the UI thread a little, if it causes too much unresponsiveness
// we can bail out early if there is a message in the Win32 message queue
- IEnumerable results;
+ WordformUpdateWork[] results;
lock (m_syncRoot)
{
results = m_workQueue.ToArray();
m_workQueue.Clear();
}
+ // Instrumentation: filing is the serial part of bulk parsing; time it so the
+ // parse:file split can be measured against the parse timings logged by ParserWorker.
+ var filingTimer = Stopwatch.StartNew();
+
// Update work.Wordform with its own NonUndoableUnitOfWorkHelper
// so that PropChanged will be triggered when it is updated below.
NonUndoableUnitOfWorkHelper.Do(m_cache.ActionHandlerAccessor, () =>
@@ -243,6 +248,10 @@ from ann in m_baseAnnotationRepository.AllInstances()
FireWordformUpdated(work.Wordform, work.Priority, work.ParseResult, work.CheckParser);
}
});
+
+ filingTimer.Stop();
+ if (results.Length > 0)
+ Logger.WriteMinorEvent("Parser filing: {0} results filed in {1} ms", results.Length, filingTimer.ElapsedMilliseconds);
return true;
}
diff --git a/Src/LexText/ParserCore/ParserCoreTests/ParseWorkerTests.cs b/Src/LexText/ParserCore/ParserCoreTests/ParseWorkerTests.cs
index a62cfb9e1a..3b4249c99e 100644
--- a/Src/LexText/ParserCore/ParserCoreTests/ParseWorkerTests.cs
+++ b/Src/LexText/ParserCore/ParserCoreTests/ParseWorkerTests.cs
@@ -161,6 +161,69 @@ public void UpdateWordform()
CheckAnalysisSize("Cats", 0, false);
CheckAnalysisSize("cats", 1, false);
}
+
+ ///
+ /// Parsing a batch of wordforms in parallel must produce exactly the same analyses as
+ /// parsing them one at a time (the key acceptance criterion for the concurrent bulk
+ /// parse). Two disjoint sets of identical-shaped wordforms are parsed, one serially and
+ /// one in parallel, and the resulting analyses are compared.
+ ///
+ [Test]
+ public void ParseAndUpdateWordforms_ParallelMatchesSerial()
+ {
+ IMoStemAllomorph catNForm = null;
+ IMoStemMsa catNMsa = null;
+ UndoableUnitOfWorkHelper.Do("Undo stuff", "Redo stuff", m_actionHandler, () =>
+ {
+ // Noun
+ ILexEntry catN = Cache.ServiceLocator.GetInstance().Create();
+ catNForm = Cache.ServiceLocator.GetInstance().Create();
+ catN.AlternateFormsOS.Add(catNForm);
+ catNForm.Form.VernacularDefaultWritingSystem = TsStringUtils.MakeString("catn", m_vernacularWS.Handle);
+ catNMsa = Cache.ServiceLocator.GetInstance().Create();
+ catN.MorphoSyntaxAnalysesOC.Add(catNMsa);
+ });
+
+ // The same single-analysis result is returned for every (lowercase) wordform.
+ var sharedResult = new ParseResult(new[]
+ {
+ new ParseAnalysis(new[] { new ParseMorph(catNForm, catNMsa) })
+ });
+
+ // Disjoint sets of distinct lowercase wordforms so the two runs do not interfere.
+ // Lowercase forms also skip the lowercase-variant re-parse, keeping the test focused.
+ var serialForms = new[] { "able", "baker", "charlie", "dog", "easy", "foxtrot", "golf" };
+ var parallelForms = new[] { "hotel", "india", "juliet", "kilo", "lima", "mike", "november" };
+
+ Func> parseAll = (forms, maxDop) =>
+ {
+ var worker = new ParserWorker(Cache, null, HandleTaskUpdate, m_idleQueue, null);
+ worker.Parser = new TestParserClass(sharedResult, null);
+ var wordforms = new List();
+ foreach (string form in forms)
+ wordforms.Add(FindOrCreateWordform(form));
+
+ // SUT
+ worker.ParseAndUpdateWordforms(wordforms, ParserPriority.Low, false, maxDop);
+ ExecuteIdleQueue();
+
+ var counts = new Dictionary();
+ foreach (IWfiWordform wf in wordforms)
+ counts[wf.Form.VernacularDefaultWritingSystem.Text] = wf.AnalysesOC.Count;
+ return counts;
+ };
+
+ Dictionary serialCounts = parseAll(serialForms, 1);
+ Dictionary parallelCounts = parseAll(parallelForms, 4);
+
+ // Every serially-parsed wordform got exactly one analysis.
+ foreach (string form in serialForms)
+ Assert.That(serialCounts[form], Is.EqualTo(1), "serial analysis count for " + form);
+ // The parallel batch produced the identical result for its (equivalent) wordforms:
+ // no dropped, duplicated, or missing analyses.
+ foreach (string form in parallelForms)
+ Assert.That(parallelCounts[form], Is.EqualTo(1), "parallel analysis count for " + form);
+ }
#endregion // Tests
}
diff --git a/Src/LexText/ParserCore/ParserScheduler.cs b/Src/LexText/ParserCore/ParserScheduler.cs
index f96a38f754..a482675e5b 100644
--- a/Src/LexText/ParserCore/ParserScheduler.cs
+++ b/Src/LexText/ParserCore/ParserScheduler.cs
@@ -8,6 +8,7 @@
using System;
using System.Collections.Generic;
+using System.Linq;
using SIL.FieldWorks.Common.FwUtils;
using SIL.LCModel.Utils;
using SIL.LCModel;
@@ -54,12 +55,17 @@ abstract class ParserWork
{
protected readonly ParserScheduler m_scheduler;
protected readonly ParserPriority m_priority;
+ // The number of wordforms this work item represents. The queue counts track
+ // wordforms (not work items) so the "Queue: low/med/high" display and the idle
+ // detection stay meaningful when many wordforms are batched into one work item.
+ private readonly int m_queueCount;
- protected ParserWork(ParserScheduler scheduler, ParserPriority priority)
+ protected ParserWork(ParserScheduler scheduler, ParserPriority priority, int queueCount = 1)
{
m_scheduler = scheduler;
m_priority = priority;
- m_scheduler.IncrementQueueCount(m_priority);
+ m_queueCount = queueCount;
+ m_scheduler.IncrementQueueCount(m_priority, m_queueCount);
}
public virtual void DoWork()
@@ -67,7 +73,7 @@ public virtual void DoWork()
// This undoes the IncrementQueueCount above.
// Subclasses should always call base.DoWork().
// Nobody else should call IncrementQueueCount or DecrementQueueCount.
- m_scheduler.DecrementQueueCount(m_priority);
+ m_scheduler.DecrementQueueCount(m_priority, m_queueCount);
}
}
@@ -112,6 +118,31 @@ public override void DoWork()
}
}
+ ///
+ /// A batch of wordforms parsed together. The parses may run concurrently (for
+ /// thread-safe parsers); filing of results stays on the existing serial idle-queue path.
+ ///
+ class UpdateWordformsWork : ParserWork
+ {
+ private readonly IList m_wordforms;
+ private readonly bool m_checkParser;
+ private readonly int m_maxDegreeOfParallelism;
+
+ public UpdateWordformsWork(ParserScheduler scheduler, ParserPriority priority, IList wordforms, bool checkParser, int maxDegreeOfParallelism)
+ : base(scheduler, priority, wordforms.Count)
+ {
+ m_wordforms = wordforms;
+ m_checkParser = checkParser;
+ m_maxDegreeOfParallelism = maxDegreeOfParallelism;
+ }
+
+ public override void DoWork()
+ {
+ m_scheduler.m_parserWorker.ParseAndUpdateWordforms(m_wordforms, m_priority, m_checkParser, m_maxDegreeOfParallelism);
+ base.DoWork();
+ }
+ }
+
class ReloadGrammarAndLexiconWork : ParserWork
{
public ReloadGrammarAndLexiconWork(ParserScheduler scheduler)
@@ -254,18 +285,18 @@ public int GetQueueSize(ParserPriority priority)
return m_queueCounts[(int) priority];
}
- private void IncrementQueueCount(ParserPriority priority)
+ private void IncrementQueueCount(ParserPriority priority, int count = 1)
{
lock (SyncRoot)
- m_queueCounts[(int) priority]++;
+ m_queueCounts[(int) priority] += count;
}
- private void DecrementQueueCount(ParserPriority priority)
+ private void DecrementQueueCount(ParserPriority priority, int count = 1)
{
bool isIdle;
lock (SyncRoot)
{
- m_queueCounts[(int) priority]--;
+ m_queueCounts[(int) priority] -= count;
isIdle = m_queueCounts[(int)ParserPriority.TryAWord] == 0
&& m_queueCounts[(int)ParserPriority.Low] == 0
&& m_queueCounts[(int)ParserPriority.Medium] == 0
@@ -302,12 +333,38 @@ public void ScheduleOneWordformForUpdate(IWfiWordform wordform, ParserPriority p
m_thread.EnqueueWork(priority, new UpdateWordformWork(this, priority, wordform, checkParser));
}
+ ///
+ /// Number of bounded chunks-worth of parallel "waves" packed into a single batch work
+ /// item. Larger amortizes the cost of starting a parallel loop; smaller lets an
+ /// interactive Try-A-Word (higher priority) or grammar reload preempt sooner.
+ ///
+ private const int ParseChunkMultiplier = 8;
+
public void ScheduleWordformsForUpdate(IEnumerable wordforms, ParserPriority priority, bool checkParser)
{
CheckDisposed();
- foreach (var wordform in wordforms)
- ScheduleOneWordformForUpdate(wordform, priority, checkParser);
+ // Materialize once: callers commonly pass lazy queries (AllInstances(), Union(), ...).
+ IList wordformList = wordforms as IList ?? wordforms.ToList();
+ if (wordformList.Count == 0)
+ return;
+
+ int maxDegreeOfParallelism = m_parserWorker.MaxDegreeOfParallelism;
+ // Split into bounded chunks and enqueue one batch work item per chunk. The work
+ // items go through the same priority queue, so a Try-A-Word (priority TryAWord) or
+ // a reload (priority ReloadGrammarAndLexicon) still preempts between chunks, and
+ // Stop()/Dispose only has to wait for the current chunk. A non-parallel parser keeps
+ // one wordform per work item, exactly as before.
+ int chunkSize = maxDegreeOfParallelism <= 1 ? 1 : maxDegreeOfParallelism * ParseChunkMultiplier;
+
+ for (int start = 0; start < wordformList.Count; start += chunkSize)
+ {
+ int count = Math.Min(chunkSize, wordformList.Count - start);
+ var chunk = new List(count);
+ for (int i = 0; i < count; i++)
+ chunk.Add(wordformList[start + i]);
+ m_thread.EnqueueWork(priority, new UpdateWordformsWork(this, priority, chunk, checkParser, maxDegreeOfParallelism));
+ }
}
private void HandleTaskUpdate(TaskReport task)
diff --git a/Src/LexText/ParserCore/ParserWorker.cs b/Src/LexText/ParserCore/ParserWorker.cs
index 58f494f7bd..a4ca43357f 100644
--- a/Src/LexText/ParserCore/ParserWorker.cs
+++ b/Src/LexText/ParserCore/ParserWorker.cs
@@ -26,12 +26,18 @@
*/
using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
using SIL.FieldWorks.Common.FwUtils;
using SIL.LCModel.Core.KernelInterfaces;
using SIL.LCModel.Core.Text;
using SIL.LCModel;
using SIL.LCModel.Infrastructure;
using SIL.ObjectModel;
+using SIL.Reporting;
using XCore;
using SIL.LCModel.DomainServices;
using System.Xml.Linq;
@@ -122,7 +128,278 @@ public void TryAWord(string sForm, bool fDoTrace, int[] sSelectTraceMorphs)
}
}
- public bool ParseAndUpdateWordform(IWfiWordform wordform, ParserPriority priority, bool checkParser = false)
+ ///
+ /// Indicates whether the active parser is safe to run on several wordforms concurrently.
+ /// Only the HermitCrab parser exposes a thread-safe, shared morpher; XAmple wraps a
+ /// single-instance native parser and must stay serial.
+ ///
+ internal bool SupportsParallelParsing => m_parser is HCParser;
+
+ ///
+ /// Property name used to override the bulk-parse concurrency from settings.
+ ///
+ internal const string MaxConcurrencyPropertyName = "ParserMaxConcurrency";
+
+ ///
+ /// The maximum number of wordforms to parse concurrently during bulk parsing.
+ /// Returns 1 (serial) unless the active parser is thread-safe (HermitCrab).
+ ///
+ ///
+ /// Measured bulk HermitCrab parsing scales to only ~2.4x at 4 concurrent parses and
+ /// plateaus around ~2.8x thereafter on a representative project — the ceiling is inside
+ /// the HermitCrab morpher (shared compiled grammar / allocation-bound), not in this code.
+ /// So we default to a small cap that captures most of the available speed-up while
+ /// leaving cores free so the rest of the UI stays responsive during a "Parse All Words".
+ /// Override (up or down) with the "ParserMaxConcurrency" setting.
+ ///
+ public int MaxDegreeOfParallelism
+ {
+ get
+ {
+ CheckDisposed();
+ if (!SupportsParallelParsing)
+ return 1;
+ int defaultDop = Math.Max(1, Math.Min(Environment.ProcessorCount - 1, 4));
+ int dop = m_propertyTable != null
+ ? m_propertyTable.GetIntProperty(MaxConcurrencyPropertyName, defaultDop)
+ : defaultDop;
+ return Math.Max(1, dop);
+ }
+ }
+
+ ///
+ /// Parse a batch of wordforms, optionally in parallel, and file the results.
+ /// The grammar/lexicon update check is done once up front (it is not thread-safe and
+ /// must not run inside the parallel body). Each wordform's parse is CPU-bound and
+ /// lock-free; filing is enqueued to the thread-safe idle queue and still happens
+ /// serially on the UI thread.
+ ///
+ /// The wordforms to parse.
+ /// The priority the parse is run at.
+ /// Whether this is a parser check (no model update).
+ /// Maximum wordforms to parse concurrently;
+ /// 1 means serial. Callers must pass 1 for parsers that are not thread-safe.
+ public void ParseAndUpdateWordforms(IList wordforms, ParserPriority priority, bool checkParser, int maxDegreeOfParallelism)
+ {
+ CheckDisposed();
+
+ if (wordforms.Count == 0)
+ return;
+
+ // Bring the grammar/lexicon up to date exactly once, on this (single) dispatcher
+ // thread, before any concurrent parsing begins (Update() is not thread-safe).
+ CheckNeedsUpdate();
+
+ // Instrumentation: batch wall-clock vs. summed per-wordform parse time lets us
+ // report the achieved parallelism and (together with ParseFiler's filing time) the
+ // parse:file split.
+ var batchTimer = Stopwatch.StartNew();
+ long summedWordMs = 0;
+
+ if (m_parser is HCParser hcParser && maxDegreeOfParallelism > 1 && wordforms.Count > 1)
+ {
+ // Route the whole batch through the out-of-process worker in one call instead of
+ // an in-process Parallel.ForEach over individual ParseWord calls - the worker
+ // parallelizes internally under Server GC with no artificial cap (RUSTIFY-
+ // fieldworks-worker-design.md §5), so FieldWorks no longer needs (or benefits
+ // from) its own per-wordform parallel loop for HC. If the batch call fails outright
+ // (design §6: after HCWorkerClient's own retry-once), this falls back to the
+ // per-wordform guarded loop below rather than losing the run.
+ long? batchWordMs = ParseAndUpdateWordformsBatch(hcParser, wordforms, priority, checkParser);
+ if (batchWordMs.HasValue)
+ {
+ summedWordMs = batchWordMs.Value;
+ }
+ else
+ {
+ var options = new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism };
+ Parallel.ForEach(wordforms, options, wordform =>
+ {
+ var wordTimer = Stopwatch.StartNew();
+ ParseAndUpdateWordformGuarded(wordform, priority, checkParser);
+ Interlocked.Add(ref summedWordMs, wordTimer.ElapsedMilliseconds);
+ });
+ }
+ }
+ else if (maxDegreeOfParallelism <= 1 || wordforms.Count == 1)
+ {
+ // Serial path: behaves exactly as the original one-wordform-at-a-time code,
+ // including letting unexpected exceptions propagate (so XAmple keeps its
+ // existing error handling).
+ foreach (IWfiWordform wordform in wordforms)
+ {
+ var wordTimer = Stopwatch.StartNew();
+ ParseAndUpdateWordform(wordform, priority, checkParser, ensureUpToDate: false);
+ summedWordMs += wordTimer.ElapsedMilliseconds;
+ }
+ }
+ else
+ {
+ // Parallel path (non-HC thread-safe parsers only - HC now batches above). Guard
+ // each wordform so one unexpected exception cannot abort the whole batch (Gotcha #6).
+ var options = new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism };
+ Parallel.ForEach(wordforms, options, wordform =>
+ {
+ var wordTimer = Stopwatch.StartNew();
+ ParseAndUpdateWordformGuarded(wordform, priority, checkParser);
+ Interlocked.Add(ref summedWordMs, wordTimer.ElapsedMilliseconds);
+ });
+ }
+
+ batchTimer.Stop();
+ long wallMs = batchTimer.ElapsedMilliseconds;
+ double effectiveParallelism = wallMs > 0 ? summedWordMs / (double) wallMs : 0.0;
+ Logger.WriteMinorEvent(
+ "Parser batch: {0} wordforms in {1} ms wall (summed parse {2} ms, effective parallelism {3:0.0}x, maxDOP {4})",
+ wordforms.Count, wallMs, summedWordMs, effectiveParallelism, maxDegreeOfParallelism);
+ }
+
+ ///
+ /// Runs for one wordform, swallowing any unexpected
+ /// exception so that a single bad wordform cannot abort an entire parallel batch.
+ /// (HCParser already converts parse failures into ParseResult error results; this guards
+ /// against anything else.)
+ ///
+ private void ParseAndUpdateWordformGuarded(IWfiWordform wordform, ParserPriority priority, bool checkParser)
+ {
+ try
+ {
+ ParseAndUpdateWordform(wordform, priority, checkParser, ensureUpToDate: false);
+ }
+ catch (Exception)
+ {
+ // File an error result so clients still learn the parser finished with this wordform.
+ try
+ {
+ var parseResult = new ParseResult(string.Format(ParserCoreStrings.ksHCInvalidWordform, "", 0, "", ""));
+ m_parseFiler.ProcessParse(wordform, priority, parseResult, checkParser);
+ }
+ catch (Exception)
+ {
+ // Nothing more we can safely do for this wordform.
+ }
+ }
+ }
+
+ ///
+ /// One batch item's normalized form(s), gathered under a single read-lock pass instead of
+ /// once per wordform (Src\LexText\ParserCore\ParserWorker.cs's original ParseAndUpdateWordform
+ /// re-acquired the read lock per wordform; the batch RPC below needs no lock at all, so
+ /// there is no reason to keep doing that here).
+ ///
+ private class BatchItem
+ {
+ public IWfiWordform Wordform;
+ public string Word;
+ public string LowerWord;
+ public ITsString LowerText;
+ }
+
+ ///
+ /// Design §5's bulk path: gathers every wordform's normalized form (and, same as
+ /// ParseAndUpdateWordform, its lowercase variant when different) into one word list, makes
+ /// a single HCParser.ParseWordsBatch WCF round trip for the whole list, then files results
+ /// exactly as ParseAndUpdateWordform would have per wordform. Returns null if the batch
+ /// call itself failed (caller falls back to the per-wordform path), otherwise the batch's
+ /// wall-clock ms (there is no longer a meaningful per-wordform parse-time split once one
+ /// RPC covers the whole batch - see design §7's note that this instrumentation shifts to
+ /// worker-parse-time/IPC-overhead/LCM-lock-time instead).
+ ///
+ private long? ParseAndUpdateWordformsBatch(HCParser hcParser, IList wordforms, ParserPriority priority, bool checkParser)
+ {
+ var items = new List(wordforms.Count);
+ using (new WorkerThreadReadHelper(m_cache.ServiceLocator.GetInstance()))
+ {
+ var normalizer = CustomIcu.GetIcuNormalizer(FwNormalizationMode.knmNFD);
+ foreach (IWfiWordform wordform in wordforms)
+ {
+ ITsString form = wordform.IsValidObject ? wordform.Form.VernacularDefaultWritingSystem : null;
+ if (form == null || string.IsNullOrEmpty(form.Text))
+ {
+ items.Add(new BatchItem { Wordform = wordform });
+ continue;
+ }
+
+ var item = new BatchItem
+ {
+ Wordform = wordform,
+ Word = normalizer.Normalize(form.Text.Replace(' ', '.'))
+ };
+
+ var cf = new CaseFunctions(m_cache.ServiceLocator.WritingSystemManager.Get(form.get_WritingSystemAt(0)));
+ string sLower = cf.ToLower(form.Text);
+ if (sLower != form.Text)
+ {
+ item.LowerWord = normalizer.Normalize(sLower.Replace(' ', '.'));
+ item.LowerText = TsStringUtils.MakeString(sLower, form.get_WritingSystem(0));
+ }
+ items.Add(item);
+ }
+ }
+
+ string[] batchWords = items
+ .SelectMany(i => new[] { i.Word, i.LowerWord })
+ .Where(w => w != null)
+ .Distinct()
+ .ToArray();
+ if (batchWords.Length == 0)
+ {
+ // Every wordform was invalid/empty; nothing to send the worker, but still file
+ // per-wordform so clients learn the parser finished with each of them.
+ foreach (BatchItem item in items)
+ FileInvalidWordform(item.Wordform, priority, checkParser);
+ return 0;
+ }
+
+ var wordTimer = Stopwatch.StartNew();
+ IDictionary resultsByWord;
+ using (var task = new TaskReport(string.Format(ParserCoreStrings.ksParsingX, batchWords[0]), m_taskUpdateHandler))
+ {
+ resultsByWord = hcParser.ParseWordsBatch(batchWords);
+ }
+ wordTimer.Stop();
+
+ if (resultsByWord == null)
+ return null;
+
+ long perWordMs = items.Count > 0 ? wordTimer.ElapsedMilliseconds / items.Count : 0;
+ foreach (BatchItem item in items)
+ {
+ if (item.Word == null)
+ {
+ FileInvalidWordform(item.Wordform, priority, checkParser);
+ continue;
+ }
+
+ ParseResult result;
+ if (!resultsByWord.TryGetValue(item.Word, out result))
+ result = new ParseResult(string.Format(ParserCoreStrings.ksHCInvalidWordform, "", 0, "", ""));
+ result.ParseTime = perWordMs;
+
+ if (item.LowerWord != null && resultsByWord.TryGetValue(item.LowerWord, out ParseResult lcResult))
+ {
+ lcResult.ParseTime = perWordMs;
+ if (lcResult.Analyses.Count > 0 && lcResult.ErrorMessage == null)
+ {
+ // Don't turn lcText into a wordform here.
+ // This avoids a problem with broadcasting PropChanged (cf. LT-22079).
+ m_parseFiler.ProcessParse(item.LowerText, 0, lcResult, checkParser);
+ }
+ }
+
+ m_parseFiler.ProcessParse(item.Wordform, priority, result, checkParser);
+ }
+
+ return wordTimer.ElapsedMilliseconds;
+ }
+
+ private void FileInvalidWordform(IWfiWordform wordform, ParserPriority priority, bool checkParser)
+ {
+ var parseResult = new ParseResult(string.Format(ParserCoreStrings.ksHCInvalidWordform, "", 0, "", ""));
+ m_parseFiler.ProcessParse(wordform, priority, parseResult, checkParser);
+ }
+
+ public bool ParseAndUpdateWordform(IWfiWordform wordform, ParserPriority priority, bool checkParser = false, bool ensureUpToDate = true)
{
CheckDisposed();
@@ -144,7 +421,10 @@ public bool ParseAndUpdateWordform(IWfiWordform wordform, ParserPriority priorit
return false;
}
- CheckNeedsUpdate();
+ // During a parallel batch the caller has already ensured the parser is up to date;
+ // Update() is not thread-safe so it must not run inside the parallel body.
+ if (ensureUpToDate)
+ CheckNeedsUpdate();
var normalizer = CustomIcu.GetIcuNormalizer(FwNormalizationMode.knmNFD);
var word = normalizer.Normalize(form.Text.Replace(' ', '.'));
ParseResult result = null;
diff --git a/Src/LexText/ParserCore/PipeBindingFactory.cs b/Src/LexText/ParserCore/PipeBindingFactory.cs
new file mode 100644
index 0000000000..1066354e97
--- /dev/null
+++ b/Src/LexText/ParserCore/PipeBindingFactory.cs
@@ -0,0 +1,42 @@
+// Copyright (c) 2026 SIL International
+// This software is licensed under the LGPL, version 2.1 or later
+// (http://www.gnu.org/licenses/lgpl-2.1.html)
+
+using System;
+using System.ServiceModel;
+
+namespace SIL.FieldWorks.WordWorks.Parser
+{
+ ///
+ /// The one net.pipe binding definition shared by both ends of the HermitCrab worker channel:
+ /// the worker's ServiceHost (HCWorker.exe) and the in-FieldWorks client (HCWorkerClient). Both
+ /// sides must agree on quotas/timeouts, so there is exactly one copy here.
+ ///
+ public static class PipeBindingFactory
+ {
+ // Real compiled grammars can be several MB once serialized to the HC.NET XML input format
+ // (a real Sena grammar is ~1.4 MB). NetNamedPipeBinding's 64 KB default is nowhere near
+ // enough and fails with a low-level "pipe is being closed" error rather than a clear
+ // quota-exceeded one, so size generously - grammars only grow as projects grow.
+ private const long MaxMessageSize = 256L * 1024 * 1024;
+
+ public static NetNamedPipeBinding Create()
+ {
+ var pipeBinding = new NetNamedPipeBinding();
+ pipeBinding.Security.Mode = NetNamedPipeSecurityMode.None;
+ pipeBinding.MaxBufferSize = ClampToInt(MaxMessageSize);
+ pipeBinding.MaxReceivedMessageSize = MaxMessageSize;
+ pipeBinding.MaxBufferPoolSize = MaxMessageSize;
+ pipeBinding.ReaderQuotas.MaxArrayLength = ClampToInt(MaxMessageSize);
+ pipeBinding.ReaderQuotas.MaxStringContentLength = ClampToInt(MaxMessageSize);
+ pipeBinding.ReaderQuotas.MaxBytesPerRead = 65536;
+ pipeBinding.ReaderQuotas.MaxDepth = 64;
+ pipeBinding.ReaderQuotas.MaxNameTableCharCount = 65536;
+ pipeBinding.SendTimeout = TimeSpan.FromMinutes(10);
+ pipeBinding.ReceiveTimeout = TimeSpan.FromMinutes(10);
+ return pipeBinding;
+ }
+
+ private static int ClampToInt(long value) => (int)Math.Min(value, int.MaxValue);
+ }
+}
diff --git a/Src/Utilities/pcpatrflex/DisambiguateInFLExDB/DisambiguateInFLExDBTests/ParserConcurrencyBenchmark.cs b/Src/Utilities/pcpatrflex/DisambiguateInFLExDB/DisambiguateInFLExDBTests/ParserConcurrencyBenchmark.cs
new file mode 100644
index 0000000000..607baf7064
--- /dev/null
+++ b/Src/Utilities/pcpatrflex/DisambiguateInFLExDB/DisambiguateInFLExDBTests/ParserConcurrencyBenchmark.cs
@@ -0,0 +1,183 @@
+// Copyright (c) 2026 SIL International
+// This software is licensed under the LGPL, version 2.1 or later
+// (http://www.gnu.org/licenses/lgpl-2.1.html)
+//
+// Headless benchmark for concurrent bulk parsing (Parse All Words).
+// NOT part of the normal test suite ([Explicit]); run by FullyQualifiedName filter against a
+// real project whose path is given in the FW_BENCH_FWDATA environment variable.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using GenerateHCConfig;
+using NUnit.Framework;
+using SIL.FieldWorks;
+using SIL.FieldWorks.Common.FwUtils;
+using SIL.FieldWorks.WordWorks.Parser;
+using SIL.LCModel;
+using SIL.LCModel.DomainServices;
+using SIL.LCModel.Infrastructure;
+using SIL.LCModel.Utils;
+
+namespace SIL.DisambiguateInFLExDBTests
+{
+ [TestFixture]
+ [Explicit("Performance benchmark; run manually with FW_BENCH_FWDATA set")]
+ internal class ParserConcurrencyBenchmark
+ {
+ private LcmCache m_cache;
+
+ [OneTimeSetUp]
+ public void Setup()
+ {
+ string fwdata = Environment.GetEnvironmentVariable("FW_BENCH_FWDATA");
+ Assert.That(fwdata, Is.Not.Null.And.Not.Empty, "Set FW_BENCH_FWDATA to a .fwdata path");
+ Assert.That(File.Exists(fwdata), Is.True, "fwdata not found: " + fwdata);
+
+ FwRegistryHelper.Initialize();
+ FwUtils.InitializeIcu();
+ var sync = new SingleThreadedSynchronizeInvoke();
+ var logger = new ConsoleLogger(sync);
+ var dirs = new NullFdoDirectories();
+ var settings = new LcmSettings();
+ var progress = new NullThreadedProgress(sync);
+ var projId = new ProjectId(fwdata);
+ m_cache = LcmCache.CreateCacheFromExistingData(projId, "en", logger, dirs, settings, progress);
+ }
+
+ [OneTimeTearDown]
+ public void Teardown()
+ {
+ if (m_cache != null)
+ {
+ ProjectLockingService.UnlockCurrentProject(m_cache);
+ m_cache.Dispose();
+ m_cache = null;
+ }
+ }
+
+ [Test]
+ [Timeout(2400000)]
+ public void Benchmark()
+ {
+ // Make sure we exercise the HermitCrab (parallelizable) path.
+ NonUndoableUnitOfWorkHelper.Do(m_cache.ActionHandlerAccessor, () =>
+ m_cache.LanguageProject.MorphologicalDataOA.ActiveParser = "HC");
+
+ var allWordforms = m_cache.ServiceLocator.GetInstance().AllInstances().ToList();
+ int cores = Environment.ProcessorCount;
+
+ // Keep the A/B subset small enough that the SERIAL pass finishes quickly.
+ int limit = allWordforms.Count;
+ string limitEnv = Environment.GetEnvironmentVariable("FW_BENCH_LIMIT");
+ if (!string.IsNullOrEmpty(limitEnv) && int.TryParse(limitEnv, out int parsedLimit))
+ limit = Math.Min(parsedLimit, allWordforms.Count);
+ var wordforms = allWordforms.Take(limit).ToList();
+
+ Log($"Project wordforms: {allWordforms.Count} (benchmarking {wordforms.Count}) logical cores: {cores}");
+
+ using (var idleQueue = new IdleQueue { IsPaused = true })
+ using (var worker = new ParserWorker(m_cache, null, t => { }, idleQueue, Path.GetTempPath()))
+ {
+ // One-time grammar load + JIT warm-up (parse a single word).
+ var loadSw = Stopwatch.StartNew();
+ worker.ParseAndUpdateWordforms(wordforms.Take(1).ToList(), ParserPriority.Low, false, 1);
+ DrainIdle(idleQueue);
+ loadSw.Stop();
+ Log($"Grammar load + warm-up (1 word): {loadSw.ElapsedMilliseconds} ms");
+
+ // Serial-only mode: just the full-project baseline (the true "pre" number).
+ if (Environment.GetEnvironmentVariable("FW_BENCH_SERIAL_ONLY") == "1")
+ {
+ long s = MeasureParse(worker, wordforms, 1, "maxDop= 1 (serial)");
+ MeasureFile(idleQueue, " ");
+ Log($"==> Full project serial ({wordforms.Count} wordforms): {s / 1000.0:0.0}s");
+ return;
+ }
+
+ // Parallel-only mode: skip the (very slow) full-project serial baseline; just time
+ // the default cap and full-core runs to report real post-change wall-clock.
+ if (Environment.GetEnvironmentVariable("FW_BENCH_PARALLEL_ONLY") == "1")
+ {
+ int cap = Math.Max(1, Math.Min(cores - 1, 4));
+ long capParse = MeasureParse(worker, wordforms, cap, $"maxDop={cap,2} (default cap)");
+ MeasureFile(idleQueue, " ");
+ long fullParse = MeasureParse(worker, wordforms, cores, $"maxDop={cores,2} (all cores)");
+ MeasureFile(idleQueue, " ");
+ Log($"==> Full project ({wordforms.Count} wordforms): {capParse / 1000.0:0.0}s at cap {cap}, {fullParse / 1000.0:0.0}s at {cores} cores");
+ return;
+ }
+
+ long serialParse = MeasureParse(worker, wordforms, 1, "maxDop= 1");
+ long serialFile = MeasureFile(idleQueue, " ");
+
+ // Sweep the degree of parallelism to find where scaling plateaus.
+ var dops = new[] { 2, 4, 8, 12, 16, cores }.Where(d => d <= cores).Distinct().OrderBy(d => d).ToArray();
+ Log("");
+ Log($"{"DOP",4} | {"parse ms",9} | {"speedup",7} | cores-equiv");
+ Log($"{1,4} | {serialParse,9} | {1.0,6:0.00}x | {1.0,4:0.0}");
+ foreach (int dop in dops)
+ {
+ long p = MeasureParseQuiet(worker, wordforms, dop);
+ MeasureFile(idleQueue, null);
+ double sp = serialParse / (double) Math.Max(1, p);
+ Log($"{dop,4} | {p,9} | {sp,6:0.00}x | {sp,4:0.0}");
+ }
+
+ long bulk = serialParse + serialFile;
+ Log("");
+ Log($"==> Serial split over {wordforms.Count} wordforms: parse {serialParse} ms ({100.0 * serialParse / Math.Max(1, bulk):0}%) " +
+ $"vs file {serialFile} ms ({100.0 * serialFile / Math.Max(1, bulk):0}%)");
+ Log($"==> Per-wordform serial parse {serialParse / (double) wordforms.Count:0.0} ms, file {serialFile / (double) wordforms.Count:0.0} ms");
+ }
+ }
+
+ private long MeasureParse(ParserWorker worker, List wfs, int dop, string label)
+ {
+ HCParser.DiagMorpherParseTicks = 0;
+ HCParser.DiagGetMorphsTicks = 0;
+ var sw = Stopwatch.StartNew();
+ worker.ParseAndUpdateWordforms(wfs, ParserPriority.Low, false, dop);
+ sw.Stop();
+ long morpherMs = HCParser.DiagMorpherParseTicks / TimeSpan.TicksPerMillisecond;
+ long getMorphsMs = HCParser.DiagGetMorphsTicks / TimeSpan.TicksPerMillisecond;
+ Log($"{label}: parse {sw.ElapsedMilliseconds,7} ms (summed across threads: morpher {morpherMs} ms, GetMorphs/readlock {getMorphsMs} ms)");
+ return sw.ElapsedMilliseconds;
+ }
+
+ private long MeasureParseQuiet(ParserWorker worker, List wfs, int dop)
+ {
+ var sw = Stopwatch.StartNew();
+ worker.ParseAndUpdateWordforms(wfs, ParserPriority.Low, false, dop);
+ sw.Stop();
+ return sw.ElapsedMilliseconds;
+ }
+
+ private long MeasureFile(IdleQueue q, string label)
+ {
+ var sw = Stopwatch.StartNew();
+ DrainIdle(q);
+ sw.Stop();
+ if (label != null)
+ Log($"{label} : file {sw.ElapsedMilliseconds,7} ms");
+ return sw.ElapsedMilliseconds;
+ }
+
+ private void DrainIdle(IdleQueue q)
+ {
+ // Run filing repeatedly until the queue is empty (UpdateWordforms re-queues itself if
+ // it can't complete; in this single-threaded benchmark it completes on the first pass).
+ foreach (IdleQueueTask task in q.ToArray())
+ task.Delegate(task.Parameter);
+ q.Clear();
+ }
+
+ private static void Log(string msg)
+ {
+ TestContext.Progress.WriteLine(msg);
+ Console.WriteLine(msg);
+ }
+ }
+}