Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions FieldWorks.sln
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ParserCore", "Src\LexText\P
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ParserCoreTests", "Src\LexText\ParserCore\ParserCoreTests\ParserCoreTests.csproj", "{E5F82767-7DC7-599F-BC29-AAFE4AC98060}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "HCWorker", "Src\LexText\HCWorker\HCWorker.csproj", "{27CA033B-B6E6-41E0-A5C9-33DA5BB7F61D}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "HCWorkerTests", "Src\LexText\HCWorker\HCWorkerTests\HCWorkerTests.csproj", "{9CF72C1E-F5E8-463C-B53D-0F39979742F9}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ParserUI", "Src\LexText\ParserUI\ParserUI.csproj", "{09D7C8FE-DD9B-5C1C-9A4D-9D61B26E878E}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ParserUITests", "Src\LexText\ParserUI\ParserUITests\ParserUITests.csproj", "{2310A14E-5FFA-5939-885C-DA681EAFC168}"
Expand Down Expand Up @@ -709,6 +713,18 @@ Global
{1DD0C70B-EA9D-593E-BF23-72FEAB6849DF}.Debug|x64.Build.0 = Debug|x64
{1DD0C70B-EA9D-593E-BF23-72FEAB6849DF}.Release|x64.ActiveCfg = Release|x64
{1DD0C70B-EA9D-593E-BF23-72FEAB6849DF}.Release|x64.Build.0 = Release|x64
{27CA033B-B6E6-41E0-A5C9-33DA5BB7F61D}.Bounds|x64.ActiveCfg = Release|x64
{27CA033B-B6E6-41E0-A5C9-33DA5BB7F61D}.Bounds|x64.Build.0 = Release|x64
{27CA033B-B6E6-41E0-A5C9-33DA5BB7F61D}.Debug|x64.ActiveCfg = Debug|x64
{27CA033B-B6E6-41E0-A5C9-33DA5BB7F61D}.Debug|x64.Build.0 = Debug|x64
{27CA033B-B6E6-41E0-A5C9-33DA5BB7F61D}.Release|x64.ActiveCfg = Release|x64
{27CA033B-B6E6-41E0-A5C9-33DA5BB7F61D}.Release|x64.Build.0 = Release|x64
{9CF72C1E-F5E8-463C-B53D-0F39979742F9}.Bounds|x64.ActiveCfg = Release|x64
{9CF72C1E-F5E8-463C-B53D-0F39979742F9}.Bounds|x64.Build.0 = Release|x64
{9CF72C1E-F5E8-463C-B53D-0F39979742F9}.Debug|x64.ActiveCfg = Debug|x64
{9CF72C1E-F5E8-463C-B53D-0F39979742F9}.Debug|x64.Build.0 = Debug|x64
{9CF72C1E-F5E8-463C-B53D-0F39979742F9}.Release|x64.ActiveCfg = Release|x64
{9CF72C1E-F5E8-463C-B53D-0F39979742F9}.Release|x64.Build.0 = Release|x64
{E5F82767-7DC7-599F-BC29-AAFE4AC98060}.Bounds|x64.ActiveCfg = Release|x64
{E5F82767-7DC7-599F-BC29-AAFE4AC98060}.Bounds|x64.Build.0 = Release|x64
{E5F82767-7DC7-599F-BC29-AAFE4AC98060}.Debug|x64.ActiveCfg = Debug|x64
Expand Down
15 changes: 15 additions & 0 deletions Src/LexText/HCWorker/App.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<!-- The reason this process exists: Server GC is a process-wide CLR setting fixed at startup,
and FieldWorks.exe (Workstation GC, for UI responsiveness) can't offer it. See
RUSTIFY-fieldworks-worker-design.md. gcConcurrent overlaps background collections with app
threads; background Gen2 still contends, but far less than Workstation GC's single-collector
stop-the-world. -->
<runtime>
<gcServer enabled="true" />
<gcConcurrent enabled="true" />
</runtime>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.8" />
</startup>
</configuration>
54 changes: 54 additions & 0 deletions Src/LexText/HCWorker/HCWorker.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
<?xml version='1.0' encoding='utf-8'?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<AssemblyName>HCWorker</AssemblyName>
<RootNamespace>SIL.FieldWorks.WordWorks.Parser.HCWorker</RootNamespace>
<TargetFramework>net48</TargetFramework>
<OutputType>Exe</OutputType>
<RuntimeIdentifier>win-x64</RuntimeIdentifier> <NoWarn>168,169,219,414,649,1635,1702,1701</NoWarn>
<GenerateAssemblyInfo>false</GenerateAssemblyInfo>
<Prefer32Bit>false</Prefer32Bit>
<!-- Server GC is the whole point of this out-of-process worker: it lets bulk HermitCrab
parsing scale to ~10-12x, which FieldWorks.exe itself can't offer because it runs
Workstation GC for UI responsiveness (a process-wide setting fixed at startup). See
RUSTIFY-fieldworks-worker-design.md. App.config carries <gcServer enabled="true"/>. -->
<AppConfig>App.config</AppConfig>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>portable</DebugType>
<Optimize>false</Optimize>
<DefineConstants>DEBUG;TRACE</DefineConstants>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' ">
<DebugType>portable</DebugType>
<Optimize>true</Optimize>
<DefineConstants>TRACE</DefineConstants>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="SIL.Machine" />
<PackageReference Include="SIL.Machine.Morphology.HermitCrab" />
</ItemGroup>
<ItemGroup>
<Reference Include="System.ServiceModel" />
<Reference Include="netstandard" />
</ItemGroup>
<ItemGroup>
<!-- ParserCore supplies the shared WCF contract (IHCWorkerService + DTOs), PipeBindingFactory,
and the HCParser Form/Msa/InflType key-string constants, so the worker and the FieldWorks
client can never drift. Mirrors the established pattern of the sibling HC exe
GenerateHCConfig, which also references ParserCore. The worker never touches LCM/XAmple. -->
<ProjectReference Include="../ParserCore/ParserCore.csproj" />
</ItemGroup>
<ItemGroup>
<!-- The test project lives in a subdirectory; keep its sources out of this SDK-style project's
recursive glob (mirrors ParserCore excluding ParserCoreTests). -->
<Compile Remove="HCWorkerTests/**" />
<None Remove="HCWorkerTests/**" />
</ItemGroup>
<ItemGroup>
<Compile Include="..\..\CommonAssemblyInfo.cs">
<Link>Properties\CommonAssemblyInfo.cs</Link>
</Compile>
</ItemGroup>
</Project>
165 changes: 165 additions & 0 deletions Src/LexText/HCWorker/HCWorkerService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
// Copyright (c) 2026 SIL International
// This software is licensed under the LGPL, version 2.1 or later
// (http://www.gnu.org/licenses/lgpl-2.1.html)

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.ServiceModel;
using System.Threading.Tasks;
using SIL.FieldWorks.WordWorks.Parser;
using SIL.Machine.Annotations;
using SIL.Machine.Morphology.HermitCrab;
using SIL.Machine.Morphology.HermitCrab.MorphologicalRules;

namespace SIL.FieldWorks.WordWorks.Parser.HCWorker
{
/// <summary>
/// Hosts one Morpher for the lifetime of the worker process. One instance is shared across all
/// WCF calls (InstanceContextMode.Single) and calls run concurrently (ConcurrencyMode.Multiple)
/// - Morpher.ParseWord was already called this way in-process (ParserWorker's parallel batch,
/// each iteration calling HCParser.ParseWord -> Morpher.ParseWord with no external locking), so
/// moving it out-of-process introduces no new thread-safety requirement.
///
/// The DTO extraction below (ToWordAnalysisDto) is the id-collection half of HCParser.GetMorphs,
/// running here where the Word/Allomorph/Morpheme graph lives; the LCM-object-resolution half
/// stays in HCParser.GetMorphs, which consumes the returned MorphDto[]. The Form/Msa/InflType
/// keys come from HCParser's own constants, so worker and client can never disagree on them.
/// </summary>
[ServiceBehavior(InstanceContextMode = InstanceContextMode.Single, ConcurrencyMode = ConcurrencyMode.Multiple)]
public class HCWorkerService : IHCWorkerService
{
private volatile Morpher _morpher;

public void UpdateGrammar(HCGrammarDto grammar)
{
if (grammar == null)
throw new ArgumentNullException(nameof(grammar));

// XmlLanguageLoader.Load only takes a file path, so round-trip the grammar XML through a
// temp file rather than adding a string/stream overload to the HC library.
string tempPath = Path.Combine(Path.GetTempPath(), $"hcworker-grammar-{Guid.NewGuid():N}.xml");
try
{
File.WriteAllText(tempPath, grammar.CompiledGrammarXml);
Language language = XmlLanguageLoader.Load(tempPath);
_morpher = new Morpher(new TraceManager(), language)
{
DeletionReapplications = grammar.DeletionReapplications,
MaxStemCount = grammar.MaxStemCount,
MergeEquivalentAnalyses = grammar.MergeEquivalentAnalyses
};
}
finally
{
try
{
File.Delete(tempPath);
}
catch (IOException)
{
// best-effort cleanup; a stray temp file is not worth failing the grammar update over
}
}
}

public WordAnalysisDto[] ParseWord(string word, bool guessRoots)
{
Morpher morpher = RequireMorpher();
return morpher.ParseWord(word, out _, guessRoots).Select(ToWordAnalysisDto).ToArray();
}

public IDictionary<string, WordAnalysisDto[]> ParseWordsBatch(string[] words, bool guessRoots)
{
Morpher morpher = RequireMorpher();
var results = new ConcurrentDictionary<string, WordAnalysisDto[]>();
// Parses the whole batch server-side with no artificial DOP cap: the cap in
// ParserWorker existed only to keep FieldWorks' UI thread responsive under Workstation
// GC, which no longer applies once parsing lives in this Server-GC process.
Parallel.ForEach(
words,
new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount },
word =>
{
try
{
results[word] = morpher.ParseWord(word, out _, guessRoots).Select(ToWordAnalysisDto).ToArray();
}
catch (Exception)
{
// Guard each word so one unexpected exception (e.g. an out-of-vocabulary
// character, which throws InvalidShapeException) cannot abort the whole
// batch, mirroring ParserWorker.ParseAndUpdateWordformGuarded and
// HCParser.ParseWord's own try/catch around the equivalent call.
results[word] = new WordAnalysisDto[0];
}
}
);
// Return a plain Dictionary: DataContractSerializer's IDictionary support is defined in
// terms of the concrete Dictionary shape, so don't rely on ConcurrentDictionary matching it.
return new Dictionary<string, WordAnalysisDto[]>(results);
}

private Morpher RequireMorpher()
{
Morpher morpher = _morpher;
if (morpher == null)
throw new InvalidOperationException("UpdateGrammar must be called before parsing.");
return morpher;
}

internal static WordAnalysisDto ToWordAnalysisDto(Word ws)
{
var morphemeIndices = new Dictionary<Morpheme, int>();
var morphs = new List<MorphDto>();
foreach (Annotation<ShapeNode> morph in ws.Morphs)
{
Allomorph allomorph = ws.GetAllomorph(morph);
int formId = ParseNullableIntProperty(allomorph.Properties, HCParser.FormID) ?? 0;
if (formId == 0)
continue;

if (!morphemeIndices.TryGetValue(allomorph.Morpheme, out int morphemeIndex))
{
morphemeIndex = morphemeIndices.Count;
morphemeIndices[allomorph.Morpheme] = morphemeIndex;
}

string formStr = ws.Shape.GetNodes(morph.Range).ToString(ws.Stratum.CharacterDefinitionTable, false);
morphs.Add(
new MorphDto
{
FormId = formId,
FormId2 = ParseNullableIntProperty(allomorph.Properties, HCParser.FormID2) ?? 0,
IsAffixProcessAllomorph = allomorph is AffixProcessAllomorph,
FormStr = formStr,
Guessed = allomorph.Guessed,
MsaId = ParseIntProperty(allomorph.Morpheme.Properties, HCParser.MsaID),
InflTypeId = ParseNullableIntProperty(allomorph.Morpheme.Properties, HCParser.InflTypeID) ?? 0,
MorphemeIndex = morphemeIndex
}
);
}
return new WordAnalysisDto { Morphs = morphs.ToArray() };
}

private static int ParseIntProperty(IDictionary<string, object> properties, string key)
{
// Properties round-trip through XmlLanguageWriter/XmlLanguageLoader as strings even
// though HCLoader stored them as ints (hcEntry.Properties[HCParser.MsaID] = msa.Hvo),
// so parse rather than unbox.
if (!properties.TryGetValue(key, out object value) || value == null)
throw new InvalidOperationException($"Morpheme is missing required property '{key}'.");
return int.Parse(value.ToString());
}

private static int? ParseNullableIntProperty(IDictionary<string, object> properties, string key)
{
if (!properties.TryGetValue(key, out object value) || value == null)
return null;
return int.Parse(value.ToString());
}
}
}
Loading
Loading