From d16f1bc213c54795a7129b485313e99c7c2eda98 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Fri, 3 Jul 2026 14:47:44 -0400 Subject: [PATCH] Fix XmlLanguageWriter crash on dangling morpheme/allomorph co-occurrence rules A Language built programmatically (e.g. FieldWorks' HCLoader) can populate MorphemeCoOccurrenceRules/AllomorphCoOccurrenceRules with a morpheme or allomorph that never ends up written under any Stratum -- e.g. an ad-hoc prohibition still targets an affix in a slot whose owning inflectional-affix template was disabled. XmlLanguageWriter.WriteLanguage assumed every referenced morpheme/allomorph would already have an id, so it threw KeyNotFoundException instead of writing the rest of the language. Found via GenerateHCConfig against a real Amharic FieldWorks project. Skip rules that reference an unresolvable morpheme/allomorph instead of throwing, since dropping one unwritable constraint is preferable to failing the whole export. Co-Authored-By: Claude Sonnet 5 --- .../XmlLanguageWriter.cs | 23 ++- .../XmlLanguageWriterCoOccurrenceRuleTests.cs | 134 ++++++++++++++++++ 2 files changed, 153 insertions(+), 4 deletions(-) create mode 100644 tests/SIL.Machine.Morphology.HermitCrab.Tests/XmlLanguageWriterCoOccurrenceRuleTests.cs diff --git a/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageWriter.cs b/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageWriter.cs index 4dec714cb..7e6116fb7 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageWriter.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageWriter.cs @@ -193,22 +193,37 @@ private XElement WriteLanguage() langElem.Add(new XElement("Strata", _language.Strata.Select(WriteStratum))); - if (_language.MorphemeCoOccurrenceRules.Count > 0) + // A caller (e.g. FieldWorks' HCLoader) can populate these rules from morphemes/allomorphs that never + // end up written under Strata above — e.g. an ad-hoc prohibition still targets the affix in a slot + // whose owning inflectional-affix template was disabled. Skip rules we can't resolve an id for + // instead of throwing, since dropping an unwritable constraint is preferable to failing the whole + // export. + (Morpheme Key, MorphemeCoOccurrenceRule Rule)[] writableMorphemeCoOccurRules = _language + .MorphemeCoOccurrenceRules.Where(t => + _morphemes.ContainsKey(t.Key) && t.Rule.Others.All(m => _morphemes.ContainsKey(m)) + ) + .ToArray(); + if (writableMorphemeCoOccurRules.Length > 0) { langElem.Add( new XElement( "MorphemeCoOccurrenceRules", - _language.MorphemeCoOccurrenceRules.Select(t => WriteMorphemeCoOccurrenceRule(t.Key, t.Rule)) + writableMorphemeCoOccurRules.Select(t => WriteMorphemeCoOccurrenceRule(t.Key, t.Rule)) ) ); } - if (_language.AllomorphCoOccurrenceRules.Count > 0) + (Allomorph Key, AllomorphCoOccurrenceRule Rule)[] writableAllomorphCoOccurRules = _language + .AllomorphCoOccurrenceRules.Where(t => + _allomorphs.ContainsKey(t.Key) && t.Rule.Others.All(a => _allomorphs.ContainsKey(a)) + ) + .ToArray(); + if (writableAllomorphCoOccurRules.Length > 0) { langElem.Add( new XElement( "AllomorphCoOccurrenceRules", - _language.AllomorphCoOccurrenceRules.Select(t => WriteAllomorphCoOccurrenceRule(t.Key, t.Rule)) + writableAllomorphCoOccurRules.Select(t => WriteAllomorphCoOccurrenceRule(t.Key, t.Rule)) ) ); } diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/XmlLanguageWriterCoOccurrenceRuleTests.cs b/tests/SIL.Machine.Morphology.HermitCrab.Tests/XmlLanguageWriterCoOccurrenceRuleTests.cs new file mode 100644 index 000000000..440dc50a0 --- /dev/null +++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/XmlLanguageWriterCoOccurrenceRuleTests.cs @@ -0,0 +1,134 @@ +using NUnit.Framework; +using SIL.Machine.FeatureModel; + +namespace SIL.Machine.Morphology.HermitCrab; + +// A Language built programmatically (e.g. by FieldWorks' HCLoader) can have +// MorphemeCoOccurrenceRules/AllomorphCoOccurrenceRules that reference a morpheme/allomorph never written +// under any Stratum -- e.g. an ad-hoc prohibition still targets an affix in a slot whose owning +// inflectional-affix template was disabled. XmlLanguageWriter must skip such rules instead of throwing. +[TestFixture] +public class XmlLanguageWriterCoOccurrenceRuleTests +{ + [Test] + public void Save_MorphemeCoOccurrenceRuleReferencesUnwrittenMorpheme_DoesNotThrowAndOmitsRule() + { + Language language = BuildLanguage(out LexEntry writtenEntry, out LexEntry danglingEntry); + language.MorphemeCoOccurrenceRules.Add( + ( + danglingEntry, + new MorphemeCoOccurrenceRule( + ConstraintType.Exclude, + new Morpheme[] { writtenEntry }, + MorphCoOccurrenceAdjacency.Anywhere + ) + ) + ); + + string xml = SaveToTempFile(language); + Assert.That(xml, Does.Not.Contain("MorphemeCoOccurrenceRules")); + } + + [Test] + public void Save_MorphemeCoOccurrenceRuleReferencesUnwrittenOtherMorpheme_DoesNotThrowAndOmitsRule() + { + Language language = BuildLanguage(out LexEntry writtenEntry, out LexEntry danglingEntry); + language.MorphemeCoOccurrenceRules.Add( + ( + writtenEntry, + new MorphemeCoOccurrenceRule( + ConstraintType.Exclude, + new Morpheme[] { danglingEntry }, + MorphCoOccurrenceAdjacency.Anywhere + ) + ) + ); + + string xml = SaveToTempFile(language); + Assert.That(xml, Does.Not.Contain("MorphemeCoOccurrenceRules")); + } + + [Test] + public void Save_AllomorphCoOccurrenceRuleReferencesUnwrittenAllomorph_DoesNotThrowAndOmitsRule() + { + Language language = BuildLanguage(out LexEntry writtenEntry, out LexEntry danglingEntry); + language.AllomorphCoOccurrenceRules.Add( + ( + danglingEntry.Allomorphs[0], + new AllomorphCoOccurrenceRule( + ConstraintType.Exclude, + new Allomorph[] { writtenEntry.Allomorphs[0] }, + MorphCoOccurrenceAdjacency.Anywhere + ) + ) + ); + + string xml = SaveToTempFile(language); + Assert.That(xml, Does.Not.Contain("AllomorphCoOccurrenceRules")); + } + + [Test] + public void Save_MorphemeCoOccurrenceRuleReferencesOnlyWrittenMorphemes_IsWritten() + { + Language language = BuildLanguage(out LexEntry writtenEntry, out _); + var otherEntry = new LexEntry { Id = "other", Gloss = "other" }; + otherEntry.Allomorphs.Add(new RootAllomorph(new Segments(language.Strata[0].CharacterDefinitionTable, "a"))); + language.Strata[0].Entries.Add(otherEntry); + language.MorphemeCoOccurrenceRules.Add( + ( + writtenEntry, + new MorphemeCoOccurrenceRule( + ConstraintType.Exclude, + new Morpheme[] { otherEntry }, + MorphCoOccurrenceAdjacency.Anywhere + ) + ) + ); + + string xml = SaveToTempFile(language); + Assert.That(xml, Does.Contain("MorphemeCoOccurrenceRules")); + } + + private static Language BuildLanguage(out LexEntry writtenEntry, out LexEntry danglingEntry) + { + var syntacticFeatSys = new SyntacticFeatureSystem(); + syntacticFeatSys.AddPartsOfSpeech(new FeatureSymbol("N", "Noun")); + syntacticFeatSys.Freeze(); + + var table = new CharacterDefinitionTable { Name = "table1" }; + table.AddSegment("a"); + + var stratum = new Stratum(table) { Name = "Stratum1" }; + + writtenEntry = new LexEntry { Id = "written", Gloss = "written" }; + writtenEntry.Allomorphs.Add(new RootAllomorph(new Segments(table, "a"))); + stratum.Entries.Add(writtenEntry); + + // Never added to any stratum, so XmlLanguageWriter never assigns it an id. + danglingEntry = new LexEntry { Id = "dangling", Gloss = "dangling" }; + danglingEntry.Allomorphs.Add(new RootAllomorph(new Segments(table, "a"))); + + return new Language + { + Name = "Test", + SyntacticFeatureSystem = syntacticFeatSys, + CharacterDefinitionTables = { table }, + Strata = { stratum }, + }; + } + + private static string SaveToTempFile(Language language) + { + string path = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); + try + { + Assert.DoesNotThrow(() => XmlLanguageWriter.Save(language, path)); + return File.ReadAllText(path); + } + finally + { + if (File.Exists(path)) + File.Delete(path); + } + } +}