diff --git a/.gitignore b/.gitignore index af6b4a93c..21cecc927 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,13 @@ tests/SIL.Machine.Tests/Corpora/TestData/usfm/target/* tests/SIL.Machine.Tests/Corpora/TestData/project/* tests/SIL.Machine.Tests/Corpora/TestData/pretranslations.json .idea + +# Local-only HermitCrab benchmark fixtures (real Sena/Indonesian grammars + word lists, used +# for ad hoc perf/allocation testing) + FieldWorks project backups. Large and/or not licensed +# for this repo, so they stay untracked; any [Explicit] benchmark that wants them falls back to +# the tracked samples/data/en-hc.xml when they're absent. +*.fwbackup +samples/data/sena-hc.xml +samples/data/sena-words.txt +samples/data/indonesian-hc.xml +samples/data/indonesian-words.txt diff --git a/src/SIL.Machine.Morphology.HermitCrab/AffixTemplate.cs b/src/SIL.Machine.Morphology.HermitCrab/AffixTemplate.cs index 02e16e8e1..ebbb8d061 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/AffixTemplate.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/AffixTemplate.cs @@ -76,12 +76,12 @@ public Stratum Stratum } } - public override IRule CompileAnalysisRule(Morpher morpher) + public override IRule CompileAnalysisRule(Morpher morpher) { return new AnalysisAffixTemplateRule(morpher, this); } - public override IRule CompileSynthesisRule(Morpher morpher) + public override IRule CompileSynthesisRule(Morpher morpher) { return new SynthesisAffixTemplateRule(morpher, this); } diff --git a/src/SIL.Machine.Morphology.HermitCrab/AllomorphEnvironment.cs b/src/SIL.Machine.Morphology.HermitCrab/AllomorphEnvironment.cs index c1ac4f768..a08a76099 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/AllomorphEnvironment.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/AllomorphEnvironment.cs @@ -12,16 +12,12 @@ namespace SIL.Machine.Morphology.HermitCrab public class AllomorphEnvironment : IEquatable { private readonly ConstraintType _type; - private readonly Pattern _leftEnv; - private readonly Matcher _leftEnvMatcher; - private readonly Pattern _rightEnv; - private readonly Matcher _rightEnvMatcher; - - public AllomorphEnvironment( - ConstraintType type, - Pattern leftEnv, - Pattern rightEnv - ) + private readonly Pattern _leftEnv; + private readonly Matcher _leftEnvMatcher; + private readonly Pattern _rightEnv; + private readonly Matcher _rightEnvMatcher; + + public AllomorphEnvironment(ConstraintType type, Pattern leftEnv, Pattern rightEnv) { _type = type; if (leftEnv != null && !leftEnv.IsLeaf) @@ -29,9 +25,9 @@ Pattern rightEnv if (!leftEnv.IsFrozen) throw new ArgumentException("The pattern is not frozen.", "leftEnv"); _leftEnv = leftEnv; - _leftEnvMatcher = new Matcher( + _leftEnvMatcher = new Matcher( leftEnv, - new MatcherSettings + new MatcherSettings { AnchoredToStart = true, Direction = Direction.RightToLeft, @@ -47,9 +43,9 @@ Pattern rightEnv if (!rightEnv.IsFrozen) throw new ArgumentException("The pattern is not frozen.", "rightEnv"); _rightEnv = rightEnv; - _rightEnvMatcher = new Matcher( + _rightEnvMatcher = new Matcher( rightEnv, - new MatcherSettings + new MatcherSettings { AnchoredToStart = true, Filter = ann => @@ -68,12 +64,12 @@ public ConstraintType Type public string Name { get; set; } - public Pattern LeftEnvironment + public Pattern LeftEnvironment { get { return _leftEnv; } } - public Pattern RightEnvironment + public Pattern RightEnvironment { get { return _rightEnv; } } @@ -87,10 +83,24 @@ public bool IsWordValid(Word word, Annotation morph) private bool IsMatch(Word word, Annotation morph) { - if (_leftEnvMatcher != null && !_leftEnvMatcher.IsMatch(word, morph.Range.Start.Prev)) + // RUSTIFY Stage 2: the env matchers are Matcher; pass the bracketing node's + // direction-aware start offset (left env matches RtL, right env LtR — see the ctor). + if ( + _leftEnvMatcher != null + && !_leftEnvMatcher.IsMatch( + word, + word.Shape.MatchStartOffset(morph.Range.Start.Prev, Direction.RightToLeft) + ) + ) return false; - if (_rightEnvMatcher != null && !_rightEnvMatcher.IsMatch(word, morph.Range.End.Next)) + if ( + _rightEnvMatcher != null + && !_rightEnvMatcher.IsMatch( + word, + word.Shape.MatchStartOffset(morph.Range.End.Next, Direction.LeftToRight) + ) + ) return false; return true; diff --git a/src/SIL.Machine.Morphology.HermitCrab/AnalysisAffixTemplateRule.cs b/src/SIL.Machine.Morphology.HermitCrab/AnalysisAffixTemplateRule.cs index 6331e2995..f401ce0fa 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/AnalysisAffixTemplateRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/AnalysisAffixTemplateRule.cs @@ -1,29 +1,27 @@ -using System.Collections.Generic; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; using System.Linq; +using System.Threading.Tasks; using SIL.Machine.Annotations; using SIL.Machine.FeatureModel; using SIL.Machine.Rules; using SIL.ObjectModel; -#if !SINGLE_THREADED -using System; -using System.Collections.Concurrent; -using System.Threading.Tasks; -#endif namespace SIL.Machine.Morphology.HermitCrab { - internal class AnalysisAffixTemplateRule : IRule + internal class AnalysisAffixTemplateRule : IRule { private readonly Morpher _morpher; private readonly AffixTemplate _template; - private readonly List> _rules; + private readonly List> _rules; public AnalysisAffixTemplateRule(Morpher morpher, AffixTemplate template) { _morpher = morpher; _template = template; - _rules = new List>( - template.Slots.Select(slot => new RuleBatch( + _rules = new List>( + template.Slots.Select(slot => new RuleBatch( slot.Rules.Select(mr => mr.CompileAnalysisRule(morpher)), false, FreezableEqualityComparer.Default @@ -47,18 +45,24 @@ public IEnumerable Apply(Word input) inWord.Freeze(); var output = new HashSet(FreezableEqualityComparer.Default); -#if SINGLE_THREADED - ApplySlots(inWord, _rules.Count - 1, output); -#else - ParallelApplySlots(inWord, output); -#endif + if (_morpher.MaxDegreeOfParallelism == 1) + ApplySlots(inWord, _rules.Count - 1, output); + else + ParallelApplySlots(inWord, output); foreach (Word outWord in output) - outWord.SyntacticFeatureStruct.Add(fs); + { + // Clone-then-reassign, not an in-place mutation: outWord may already be frozen (it + // came out of the rule cascade above), and a frozen FeatureStruct must not be mutated + // in place — a future memoized/shared result instance would otherwise leak this edit + // into every branch that shares it. + FeatureStruct sfs = outWord.SyntacticFeatureStruct.Clone(); + sfs.Add(fs); + outWord.SyntacticFeatureStruct = sfs; + } return output; } -#if SINGLE_THREADED private void ApplySlots(Word inWord, int index, HashSet output) { for (int i = index; i >= 0; i--) @@ -78,9 +82,10 @@ private void ApplySlots(Word inWord, int index, HashSet output) _morpher.TraceManager.EndUnapplyTemplate(_template, inWord, true); output.Add(inWord); } -#else + private void ParallelApplySlots(Word inWord, HashSet output) { + var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = _morpher.MaxDegreeOfParallelism }; var outStack = new ConcurrentStack(); var from = new ConcurrentStack>(); from.Push(Tuple.Create(inWord, _rules.Count - 1)); @@ -90,6 +95,7 @@ private void ParallelApplySlots(Word inWord, HashSet output) to.Clear(); Parallel.ForEach( from, + parallelOptions, work => { bool add = true; @@ -126,6 +132,5 @@ private void ParallelApplySlots(Word inWord, HashSet output) output.UnionWith(outStack); } -#endif } } diff --git a/src/SIL.Machine.Morphology.HermitCrab/AnalysisLanguageRule.cs b/src/SIL.Machine.Morphology.HermitCrab/AnalysisLanguageRule.cs index b4673ca55..4bdd3c959 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/AnalysisLanguageRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/AnalysisLanguageRule.cs @@ -6,11 +6,11 @@ namespace SIL.Machine.Morphology.HermitCrab { - internal class AnalysisLanguageRule : IRule + internal class AnalysisLanguageRule : IRule { private readonly Morpher _morpher; private readonly List _strata; - private readonly List> _rules; + private readonly List> _rules; public AnalysisLanguageRule(Morpher morpher, Language language) { diff --git a/src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs b/src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs index 36d9557ad..aadef0838 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs @@ -8,11 +8,11 @@ namespace SIL.Machine.Morphology.HermitCrab { - internal class AnalysisStratumRule : IRule + internal class AnalysisStratumRule : IRule { - private readonly IRule _mrulesRule; - private readonly IRule _prulesRule; - private readonly IRule _templatesRule; + private readonly IRule _mrulesRule; + private readonly IRule _prulesRule; + private readonly IRule _templatesRule; private readonly Stratum _stratum; private readonly Morpher _morpher; @@ -20,16 +20,16 @@ public AnalysisStratumRule(Morpher morpher, Stratum stratum) { _stratum = stratum; _morpher = morpher; - _prulesRule = new LinearRuleCascade( + _prulesRule = new LinearRuleCascade( stratum.PhonologicalRules.Select(prule => CompilePhonologicalRule(prule, morpher)).Reverse() ); - _templatesRule = new RuleBatch( + _templatesRule = new RuleBatch( stratum.AffixTemplates.Select(template => CompileAffixTemplate(template, morpher)), false, FreezableEqualityComparer.Default ); _mrulesRule = null; - IEnumerable> mrules = stratum + IEnumerable> mrules = stratum .MorphologicalRules.Select(mrule => CompileMorphologicalRule(mrule, morpher)) .Reverse(); switch (stratum.MorphologicalRuleOrder) @@ -39,31 +39,38 @@ public AnalysisStratumRule(Morpher morpher, Stratum stratum) // because morphological rules should be considered optional // during unapplication (they are obligatory during application, // but we don't know they have been applied during unapplication). - _mrulesRule = new PermutationRuleCascade( + _mrulesRule = new PermutationRuleCascade( mrules, true, FreezableEqualityComparer.Default ); break; case MorphologicalRuleOrder.Unordered: -#if SINGLE_THREADED - _mrulesRule = new CombinationRuleCascade( - mrules, - true, - FreezableEqualityComparer.Default - ); -#else - _mrulesRule = new ParallelCombinationRuleCascade( - mrules, - true, - FreezableEqualityComparer.Default - ); -#endif + // Single-threaded when the caller caps within-word parallelism (e.g. it + // parallelizes across words itself); parallel cascade otherwise. + _mrulesRule = + morpher.MaxDegreeOfParallelism == 1 + ? (IRule) + new CombinationRuleCascade( + mrules, + true, + FreezableEqualityComparer.Default + ) + : new ParallelCombinationRuleCascade( + mrules, + true, + FreezableEqualityComparer.Default + ) + { + // Honor the within-word parallelism cap rather than running at + // the default (effectively unbounded) scheduler degree. + MaxDegreeOfParallelism = morpher.MaxDegreeOfParallelism, + }; break; } } - private IRule CompileAffixTemplate(AffixTemplate template, Morpher morpher) + private IRule CompileAffixTemplate(AffixTemplate template, Morpher morpher) { try { @@ -75,7 +82,7 @@ private IRule CompileAffixTemplate(AffixTemplate template, Morp } } - private IRule CompileMorphologicalRule(IMorphologicalRule mrule, Morpher morpher) + private IRule CompileMorphologicalRule(IMorphologicalRule mrule, Morpher morpher) { try { @@ -87,7 +94,7 @@ private IRule CompileMorphologicalRule(IMorphologicalRule mrule } } - private IRule CompilePhonologicalRule(IPhonologicalRule prule, Morpher morpher) + private IRule CompilePhonologicalRule(IPhonologicalRule prule, Morpher morpher) { try { @@ -149,7 +156,7 @@ public IEnumerable Apply(Word input) private IEnumerable ApplyMorphologicalRules(Word input) { - foreach (Word mruleOutWord in _mrulesRule.Apply(input).Distinct(FreezableEqualityComparer.Default)) + foreach (Word mruleOutWord in _mrulesRule.Apply(input)) { switch (_stratum.MorphologicalRuleOrder) { @@ -168,7 +175,7 @@ private IEnumerable ApplyMorphologicalRules(Word input) private IEnumerable ApplyTemplates(Word input) { - foreach (Word tempOutWord in _templatesRule.Apply(input).Distinct(FreezableEqualityComparer.Default)) + foreach (Word tempOutWord in _templatesRule.Apply(input)) { switch (_stratum.MorphologicalRuleOrder) { diff --git a/src/SIL.Machine.Morphology.HermitCrab/HCRuleBase.cs b/src/SIL.Machine.Morphology.HermitCrab/HCRuleBase.cs index 4e1fa8c68..4c204feeb 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/HCRuleBase.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/HCRuleBase.cs @@ -16,9 +16,9 @@ protected HCRuleBase() public string Name { get; set; } - public abstract IRule CompileAnalysisRule(Morpher morpher); + public abstract IRule CompileAnalysisRule(Morpher morpher); - public abstract IRule CompileSynthesisRule(Morpher morpher); + public abstract IRule CompileSynthesisRule(Morpher morpher); public IDictionary Properties { diff --git a/src/SIL.Machine.Morphology.HermitCrab/HermitCrabExtensions.cs b/src/SIL.Machine.Morphology.HermitCrab/HermitCrabExtensions.cs index bd05a1c74..5cf2ad5af 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/HermitCrabExtensions.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/HermitCrabExtensions.cs @@ -3,6 +3,7 @@ using System.Text; using System.Text.RegularExpressions; using SIL.Machine.Annotations; +using SIL.Machine.DataStructures; using SIL.Machine.FeatureModel; using SIL.Machine.Matching; using SIL.ObjectModel; @@ -21,11 +22,64 @@ public static FeatureSymbol Type(this Annotation ann) return (FeatureSymbol)ann.FeatureStruct.GetValue(HCFeatureSystem.Type); } - public static FeatureSymbol Type(this Constraint constraint) + public static FeatureSymbol Type(this Constraint constraint) { return (FeatureSymbol)constraint.FeatureStruct.GetValue(HCFeatureSystem.Type); } + // RUSTIFY Stage 2: the FST binds as Fst and its matcher filters / inspects the + // shape's int-offset annotation projection (Annotation), which shares the FeatureStruct + // with the ShapeNode annotations — so these read identically to the ShapeNode overloads. + public static FeatureSymbol Type(this Annotation ann) + { + return (FeatureSymbol)ann.FeatureStruct.GetValue(HCFeatureSystem.Type); + } + + internal static bool IsDeleted(this Annotation ann) + { + SymbolicFeatureValue sfv; + if (ann.FeatureStruct.TryGetValue(HCFeatureSystem.Deletion, out sfv)) + return ((FeatureSymbol)sfv) == HCFeatureSystem.Deleted; + return false; + } + + // ---- RUSTIFY Stage 2: int match/group offset -> ShapeNode resolution ---- + // The FST binds as Fst with offset = node Tag and half-open annotation ranges + // [tag, tag+1). A match/group Range is therefore [leftmostTag, rightmostTag+1): the + // leftmost node is NodeAt(Start) and the rightmost is NodeAt(End-1). These helpers re-express + // the old ShapeNode range navigation (range.Start/.End/.GetStart(dir)/.GetEnd(dir)) over int + // offsets so rule RHS code can keep operating on the segment graph. + + internal static ShapeNode StartNode(this Shape shape, Range range) + { + return shape.NodeAt(range.Start); + } + + internal static ShapeNode EndNode(this Shape shape, Range range) + { + return shape.NodeAt(range.End - 1); + } + + internal static ShapeNode GetStartNode(this Shape shape, Range range, Direction dir) + { + return dir == Direction.LeftToRight ? shape.NodeAt(range.Start) : shape.NodeAt(range.End - 1); + } + + internal static ShapeNode GetEndNode(this Shape shape, Range range, Direction dir) + { + return dir == Direction.LeftToRight ? shape.NodeAt(range.End - 1) : shape.NodeAt(range.Start); + } + + internal static Range ToShapeRange(this Shape shape, Range range) + { + return Range.Create(shape.NodeAt(range.Start), shape.NodeAt(range.End - 1)); + } + + internal static IEnumerable GetNodes(this Shape shape, Range range) + { + return shape.GetNodes(shape.ToShapeRange(range)); + } + internal static FeatureStruct AntiFeatureStruct(this FeatureStruct fs) { // TODO: handle reentrance properly @@ -140,14 +194,14 @@ internal static IEnumerable RemoveDuplicates(this IEnumerable words) return output; } - internal static IEnumerable> DeepCloneExceptBoundaries( - this IEnumerable> nodes + internal static IEnumerable> DeepCloneExceptBoundaries( + this IEnumerable> nodes ) { - foreach (PatternNode node in nodes) + foreach (PatternNode node in nodes) { if ( - node is Constraint constraint + node is Constraint constraint && (constraint.FeatureStruct.IsEmpty || constraint.Type() != HCFeatureSystem.Boundary) ) { @@ -155,27 +209,25 @@ node is Constraint constraint continue; } - if (node is Alternation alternation) + if (node is Alternation alternation) { - var newAlteration = new Alternation( - alternation.Children.DeepCloneExceptBoundaries() - ); + var newAlteration = new Alternation(alternation.Children.DeepCloneExceptBoundaries()); if (newAlteration.Children.Count > 0) yield return newAlteration; continue; } - if (node is Group group) + if (node is Group group) { - var newGroup = new Group(group.Name, group.Children.DeepCloneExceptBoundaries()); + var newGroup = new Group(group.Name, group.Children.DeepCloneExceptBoundaries()); if (newGroup.Children.Count > 0) yield return newGroup; continue; } - if (node is Quantifier quantifier) + if (node is Quantifier quantifier) { - var newQuantifier = new Quantifier( + var newQuantifier = new Quantifier( quantifier.MinOccur, quantifier.MaxOccur, quantifier.Children.DeepCloneExceptBoundaries().SingleOrDefault() @@ -185,12 +237,9 @@ node is Constraint constraint continue; } - if (node is Pattern pattern) + if (node is Pattern pattern) { - var newPattern = new Pattern( - pattern.Name, - pattern.Children.DeepCloneExceptBoundaries() - ); + var newPattern = new Pattern(pattern.Name, pattern.Children.DeepCloneExceptBoundaries()); if (newPattern.Children.Count > 0) yield return newPattern; } diff --git a/src/SIL.Machine.Morphology.HermitCrab/IHCRule.cs b/src/SIL.Machine.Morphology.HermitCrab/IHCRule.cs index 9ae4e7c27..3c98ae8a3 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/IHCRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/IHCRule.cs @@ -7,7 +7,7 @@ public interface IHCRule { string Name { get; set; } - IRule CompileAnalysisRule(Morpher morpher); - IRule CompileSynthesisRule(Morpher morpher); + IRule CompileAnalysisRule(Morpher morpher); + IRule CompileSynthesisRule(Morpher morpher); } } diff --git a/src/SIL.Machine.Morphology.HermitCrab/Language.cs b/src/SIL.Machine.Morphology.HermitCrab/Language.cs index b88afaf37..fd8e983b8 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/Language.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/Language.cs @@ -137,14 +137,14 @@ public ICollection PhonologicalRules get { return _allomorphCoOccurRules; } } - public override IRule CompileAnalysisRule(Morpher morpher) + public override IRule CompileAnalysisRule(Morpher morpher) { return new AnalysisLanguageRule(morpher, this); } - public override IRule CompileSynthesisRule(Morpher morpher) + public override IRule CompileSynthesisRule(Morpher morpher) { - return new PipelineRuleCascade( + return new PipelineRuleCascade( _strata.Select(stratum => stratum.CompileSynthesisRule(morpher)), FreezableEqualityComparer.Default ); diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphemicMorphologicalRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphemicMorphologicalRule.cs index 0381fe255..8a1e76e51 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphemicMorphologicalRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphemicMorphologicalRule.cs @@ -13,7 +13,7 @@ public override MorphemeType MorphemeType get { return MorphemeType.Affix; } } - public abstract IRule CompileAnalysisRule(Morpher morpher); - public abstract IRule CompileSynthesisRule(Morpher morpher); + public abstract IRule CompileAnalysisRule(Morpher morpher); + public abstract IRule CompileSynthesisRule(Morpher morpher); } } diff --git a/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs b/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs index e4fd1879d..10cdc45c6 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs @@ -1,9 +1,12 @@ using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; +using System.Threading.Tasks; using SIL.Extensions; using SIL.Machine.Annotations; using SIL.Machine.FeatureModel; +using SIL.Machine.FiniteState; using SIL.Machine.Morphology.HermitCrab.MorphologicalRules; using SIL.Machine.Rules; using SIL.ObjectModel; @@ -11,27 +14,36 @@ using System.IO; #endif -#if !SINGLE_THREADED -using System.Collections.Concurrent; -using System.Threading.Tasks; -#endif - namespace SIL.Machine.Morphology.HermitCrab { public class Morpher : IMorphologicalAnalyzer, IMorphologicalGenerator { private readonly Language _lang; - private readonly IRule _analysisRule; - private readonly IRule _synthesisRule; + private readonly IRule _analysisRule; + private readonly IRule _synthesisRule; private readonly Dictionary _allomorphTries; private readonly ITraceManager _traceManager; private readonly ReadOnlyObservableCollection _morphemes; private readonly IList _lexicalPatterns = new List(); public Morpher(ITraceManager traceManager, Language lang) + : this(traceManager, lang, -1) { } + + /// + /// Caps the parallelism used within a single parse. A value of 1 makes the + /// morpher fully single-threaded (analysis cascade, affix-template unapplication, and + /// synthesis all run sequentially) — this is the mode a caller should use when it + /// parallelizes across words itself (e.g. FieldWorks' "Parse All Words"), to + /// avoid nested parallelism / thread-pool oversubscription. Any value <= 0 defaults + /// to (the historical behavior). + /// + public Morpher(ITraceManager traceManager, Language lang, int maxDegreeOfParallelism) { _lang = lang; _traceManager = traceManager; + // Must be set before CompileAnalysisRule: the analysis rules choose a sequential vs. + // parallel cascade at construction time based on this value. + MaxDegreeOfParallelism = maxDegreeOfParallelism <= 0 ? Environment.ProcessorCount : maxDegreeOfParallelism; _allomorphTries = new Dictionary(); var morphemes = new ObservableList(); foreach (Stratum stratum in _lang.Strata) @@ -84,6 +96,12 @@ public ITraceManager TraceManager /// public bool MergeEquivalentAnalyses { get; set; } + /// + /// Caps parallelism used within a single parse; 1 = fully single-threaded. + /// Set via the constructor (it influences how the analysis rules are compiled). + /// + public int MaxDegreeOfParallelism { get; } + public Func LexEntrySelector { get; set; } public Func RuleSelector { get; set; } @@ -121,7 +139,7 @@ public IEnumerable ParseWord(string word, out object trace, bool guessRoot trace = input.CurrentTrace; // Unapply rules - var analyses = new ConcurrentQueue(_analysisRule.Apply(input)); + IList analyses = _analysisRule.Apply(input).ToList(); #if OUTPUT_ANALYSES var lines = new List(); @@ -134,7 +152,8 @@ public IEnumerable ParseWord(string word, out object trace, bool guessRoot File.WriteAllLines("analyses.txt", lines.OrderBy(l => l)); #endif - IList origAnalyses = guessRoot ? analyses.ToList() : null; + // analyses is already materialized and Synthesize doesn't mutate it, so no copy needed. + IList origAnalyses = guessRoot ? analyses : null; IList syntheses = Synthesize(word, analyses).ToList(); if (guessRoot && syntheses.Count == 0) { @@ -196,6 +215,7 @@ out object trace a => rulePermutations, (a, p) => new { Allomorph = a, RulePermutation = p } ), + new ParallelOptions { MaxDegreeOfParallelism = MaxDegreeOfParallelism }, (synthesisInfo, state) => { try @@ -279,53 +299,32 @@ Stack> permutation in PermuteRules( } } -#if SINGLE_THREADED - private IEnumerable Synthesize(string word, IEnumerable analyses) + private IEnumerable Synthesize(string word, IList analyses) { - var matches = new HashSet(FreezableEqualityComparer.Default); - foreach (Word analysisWord in analyses) + // Single-threaded: used when the caller parallelizes across words itself. + if (MaxDegreeOfParallelism == 1) { - foreach (Word synthesisWord in LexicalLookup(analysisWord)) + var matches = new HashSet(FreezableEqualityComparer.Default); + foreach (Word analysisWord in analyses) { - foreach (Word alternative in synthesisWord.ExpandAlternatives()) - { - foreach (Word validWord in _synthesisRule.Apply(alternative).Where(IsWordValid)) - { - if (IsMatch(word, validWord)) - matches.Add(validWord); - } - } + foreach (Word validWord in SynthesizeAnalysis(word, analysisWord)) + matches.Add(validWord); } + return matches; } - return matches; - } -#else - private IEnumerable Synthesize(string word, ConcurrentQueue analyses) - { - var matches = new ConcurrentBag(); + + // Parallel across the candidate analyses of this one word. + var parallelMatches = new ConcurrentBag(); Exception exception = null; Parallel.ForEach( - Partitioner.Create(0, analyses.Count), - new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }, - (range, state) => + analyses, + new ParallelOptions { MaxDegreeOfParallelism = MaxDegreeOfParallelism }, + (analysisWord, state) => { try { - for (int i = 0; i < range.Item2 - range.Item1; i++) - { - analyses.TryDequeue(out Word analysisWord); - foreach (Word synthesisWord in LexicalLookup(analysisWord)) - { - foreach (Word alternative in synthesisWord.ExpandAlternatives()) - { - foreach (Word validWord in _synthesisRule.Apply(alternative).Where(IsWordValid)) - { - if (IsMatch(word, validWord)) - matches.Add(validWord); - } - } - } - } + foreach (Word validWord in SynthesizeAnalysis(word, analysisWord)) + parallelMatches.Add(validWord); } catch (Exception e) { @@ -336,9 +335,23 @@ private IEnumerable Synthesize(string word, ConcurrentQueue analyses ); if (exception != null) throw exception; - return matches.Distinct(FreezableEqualityComparer.Default); + return parallelMatches.Distinct(FreezableEqualityComparer.Default); + } + + private IEnumerable SynthesizeAnalysis(string word, Word analysisWord) + { + foreach (Word synthesisWord in LexicalLookup(analysisWord)) + { + foreach (Word alternative in synthesisWord.ExpandAlternatives()) + { + foreach (Word validWord in _synthesisRule.Apply(alternative).Where(IsWordValid)) + { + if (IsMatch(word, validWord)) + yield return validWord; + } + } + } } -#endif internal IEnumerable SearchRootAllomorphs(Stratum stratum, Shape shape) { @@ -378,7 +391,7 @@ private IEnumerable LexicalGuess(Word input) if (_traceManager.IsTracing) _traceManager.LexicalLookup(input.Stratum, input); CharacterDefinitionTable table = input.Stratum.CharacterDefinitionTable; - IEnumerable shapeNodes = input.Shape.GetNodes(input.Range); + IEnumerable shapeNodes = input.Shape.GetNodes(input.Shape.Range); foreach (RootAllomorph lexicalPattern in _lexicalPatterns) { HashSet shapeSet = new HashSet(); diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AffixProcessAllomorph.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AffixProcessAllomorph.cs index cf4200c2a..bb3aad8b4 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AffixProcessAllomorph.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AffixProcessAllomorph.cs @@ -27,7 +27,7 @@ public enum ReduplicationHint public class AffixProcessAllomorph : Allomorph { - private readonly List> _lhs; + private readonly List> _lhs; private readonly List _rhs; private readonly MprFeatureSet _requiredMprFeatures; private readonly MprFeatureSet _excludedMprFeatures; @@ -35,7 +35,7 @@ public class AffixProcessAllomorph : Allomorph public AffixProcessAllomorph() { - _lhs = new List>(); + _lhs = new List>(); _rhs = new List(); _requiredMprFeatures = new MprFeatureSet(); _excludedMprFeatures = new MprFeatureSet(); @@ -45,7 +45,7 @@ public AffixProcessAllomorph() public ReduplicationHint ReduplicationHint { get; set; } - public IList> Lhs + public IList> Lhs { get { return _lhs; } } @@ -80,7 +80,7 @@ protected override bool ConstraintsEqual(Allomorph other) return base.ConstraintsEqual(other) && _requiredMprFeatures.SetEquals(otherAllo._requiredMprFeatures) && _excludedMprFeatures.SetEquals(otherAllo._excludedMprFeatures) - && _lhs.SequenceEqual(otherAllo._lhs, FreezableEqualityComparer>.Default) + && _lhs.SequenceEqual(otherAllo._lhs, FreezableEqualityComparer>.Default) && RequiredSyntacticFeatureStruct.ValueEquals(otherAllo.RequiredSyntacticFeatureStruct); } diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AffixProcessRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AffixProcessRule.cs index 70a8fbf28..ff3eeb30f 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AffixProcessRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AffixProcessRule.cs @@ -77,12 +77,12 @@ public IList Allomorphs get { return _allomorphs; } } - public override IRule CompileAnalysisRule(Morpher morpher) + public override IRule CompileAnalysisRule(Morpher morpher) { return new AnalysisAffixProcessRule(morpher, this); } - public override IRule CompileSynthesisRule(Morpher morpher) + public override IRule CompileSynthesisRule(Morpher morpher) { return new SynthesisAffixProcessRule(morpher, this); } diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisAffixProcessAllomorphRuleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisAffixProcessAllomorphRuleSpec.cs index 9ce94c148..64f1993aa 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisAffixProcessAllomorphRuleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisAffixProcessAllomorphRuleSpec.cs @@ -15,7 +15,7 @@ public AnalysisAffixProcessAllomorphRuleSpec(AffixProcessAllomorph allomorph) Pattern.Freeze(); } - public override Word ApplyRhs(PatternRule rule, Match match) + public override Word ApplyRhs(PatternRule rule, Match match) { Word output = match.Input.Clone(); GenerateShape(_allomorph.Lhs, output.Shape, match); diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisAffixProcessRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisAffixProcessRule.cs index 4e89fef97..b9f6d4acc 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisAffixProcessRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisAffixProcessRule.cs @@ -1,29 +1,30 @@ using System.Collections.Generic; using System.Linq; using SIL.Machine.Annotations; +using SIL.Machine.FeatureModel; using SIL.Machine.Matching; using SIL.Machine.Rules; namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules { - public class AnalysisAffixProcessRule : IRule + public class AnalysisAffixProcessRule : IRule { private readonly Morpher _morpher; private readonly AffixProcessRule _rule; - private readonly List> _rules; + private readonly List> _rules; public AnalysisAffixProcessRule(Morpher morpher, AffixProcessRule rule) { _morpher = morpher; _rule = rule; - _rules = new List>(); + _rules = new List>(); foreach (AffixProcessAllomorph allo in rule.Allomorphs) { _rules.Add( - new MultiplePatternRule( + new MultiplePatternRule( new AnalysisAffixProcessAllomorphRuleSpec(allo), - new MatcherSettings + new MatcherSettings { Filter = ann => ann.Type() == HCFeatureSystem.Segment, MatchingMethod = MatchingMethod.Unification, @@ -55,10 +56,19 @@ public IEnumerable Apply(Word input) bool unapplied = false; foreach (Word outWord in _rules[i].Apply(input).RemoveDuplicates()) { + // Clone-then-reassign, not an in-place mutation: outWord may already be frozen by + // the pattern rule that produced it, and a frozen FeatureStruct must not be + // mutated in place (see Word.FreezeImpl's comment). if (!_rule.RequiredSyntacticFeatureStruct.IsEmpty) - outWord.SyntacticFeatureStruct.Add(_rule.RequiredSyntacticFeatureStruct); + { + FeatureStruct sfs = outWord.SyntacticFeatureStruct.Clone(); + sfs.Add(_rule.RequiredSyntacticFeatureStruct); + outWord.SyntacticFeatureStruct = sfs; + } else if (_rule.OutSyntacticFeatureStruct.IsEmpty) - outWord.SyntacticFeatureStruct.Clear(); + { + outWord.SyntacticFeatureStruct = new FeatureStruct(); + } outWord.MorphologicalRuleUnapplied(_rule); outWord.Freeze(); if (_morpher.TraceManager.IsTracing) diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisCompoundingRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisCompoundingRule.cs index 6dc2a0c28..b5013d4ee 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisCompoundingRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisCompoundingRule.cs @@ -1,29 +1,30 @@ using System.Collections.Generic; using System.Linq; using SIL.Machine.Annotations; +using SIL.Machine.FeatureModel; using SIL.Machine.Matching; using SIL.Machine.Rules; namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules { - public class AnalysisCompoundingRule : IRule + public class AnalysisCompoundingRule : IRule { private readonly Morpher _morpher; private readonly CompoundingRule _rule; - private readonly List> _rules; + private readonly List> _rules; public AnalysisCompoundingRule(Morpher morpher, CompoundingRule rule) { _morpher = morpher; _rule = rule; - _rules = new List>(); + _rules = new List>(); foreach (CompoundingSubrule sr in rule.Subrules) { _rules.Add( - new MultiplePatternRule( + new MultiplePatternRule( new AnalysisCompoundingSubruleRuleSpec(sr), - new MatcherSettings + new MatcherSettings { Filter = ann => ann.Type() == HCFeatureSystem.Segment, MatchingMethod = MatchingMethod.Unification, @@ -126,10 +127,18 @@ RootAllomorph allo in _morpher.SearchRootAllomorphs(_rule.Stratum, outWord.Curre bool unapplied = false; foreach (Word outWord in srOutput) { + // Clone-then-reassign, not an in-place mutation: outWord may already be frozen (see + // Word.FreezeImpl's comment). if (!_rule.HeadRequiredSyntacticFeatureStruct.IsEmpty) - outWord.SyntacticFeatureStruct.Add(_rule.HeadRequiredSyntacticFeatureStruct); + { + FeatureStruct sfs = outWord.SyntacticFeatureStruct.Clone(); + sfs.Add(_rule.HeadRequiredSyntacticFeatureStruct); + outWord.SyntacticFeatureStruct = sfs; + } else if (_rule.OutSyntacticFeatureStruct.IsEmpty) - outWord.SyntacticFeatureStruct.Clear(); + { + outWord.SyntacticFeatureStruct = new FeatureStruct(); + } outWord.MorphologicalRuleUnapplied(_rule); outWord.Freeze(); diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisCompoundingSubruleRuleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisCompoundingSubruleRuleSpec.cs index 50f6177bb..6172e2cb0 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisCompoundingSubruleRuleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisCompoundingSubruleRuleSpec.cs @@ -17,7 +17,7 @@ public AnalysisCompoundingSubruleRuleSpec(CompoundingSubrule subrule) Pattern.Freeze(); } - public override Word ApplyRhs(PatternRule rule, Match match) + public override Word ApplyRhs(PatternRule rule, Match match) { Word output = match.Input.Clone(); GenerateShape(_subrule.HeadLhs, output.Shape, match); diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisMorphologicalTransform.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisMorphologicalTransform.cs index 93e7a0ef6..832f55e68 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisMorphologicalTransform.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisMorphologicalTransform.cs @@ -9,18 +9,15 @@ namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules { public class AnalysisMorphologicalTransform { - private readonly Pattern _pattern; + private readonly Pattern _pattern; private readonly Dictionary> _modifyFromInfos; private readonly Dictionary _capturedParts; - public AnalysisMorphologicalTransform( - IEnumerable> lhs, - IList rhs - ) + public AnalysisMorphologicalTransform(IEnumerable> lhs, IList rhs) { - Dictionary> partLookup = lhs.ToDictionary(p => p.Name); + Dictionary> partLookup = lhs.ToDictionary(p => p.Name); _modifyFromInfos = new Dictionary>(); - _pattern = new Pattern(); + _pattern = new Pattern(); _capturedParts = new Dictionary(); foreach (MorphologicalOutputAction outputAction in rhs) { @@ -46,19 +43,19 @@ protected IDictionary CapturedParts get { return _capturedParts; } } - public Pattern Pattern + public Pattern Pattern { get { return _pattern; } } - public void GenerateShape(IList> lhs, Shape shape, Match match) + public void GenerateShape(IList> lhs, Shape shape, Match match) { shape.Clear(); - foreach (Pattern part in lhs) + foreach (Pattern part in lhs) AddPartNodes(part, match, shape); } - private void AddPartNodes(Pattern part, Match match, Shape output) + private void AddPartNodes(Pattern part, Match match, Shape output) { int count; if (_capturedParts.TryGetValue(part.Name, out count)) @@ -83,15 +80,18 @@ private void AddPartNodes(Pattern part, Match private bool AddCapturedPartNodes( string partName, int index, - Match match, + Match match, FeatureStruct modifyFromFS, Shape output ) { - GroupCapture inputGroup = match.GroupCaptures[GetGroupName(partName, index)]; + GroupCapture inputGroup = match.GroupCaptures[GetGroupName(partName, index)]; if (inputGroup.Success) { - Range outputRange = match.Input.Shape.CopyTo(inputGroup.Range, output); + Range outputRange = match.Input.Shape.CopyTo( + match.Input.Shape.ToShapeRange(inputGroup.Range), + output + ); if (modifyFromFS != null) { foreach (ShapeNode node in output.GetNodes(outputRange)) @@ -106,15 +106,15 @@ Shape output } private void Untruncate( - PatternNode patternNode, + PatternNode patternNode, Shape output, bool optional, VariableBindings varBindings ) { - foreach (PatternNode node in patternNode.Children) + foreach (PatternNode node in patternNode.Children) { - if (node is Constraint constraint && constraint.Type() == HCFeatureSystem.Segment) + if (node is Constraint constraint && constraint.Type() == HCFeatureSystem.Segment) { FeatureStruct fs = constraint.FeatureStruct.Clone(); fs.ReplaceVariables(varBindings); @@ -122,7 +122,7 @@ VariableBindings varBindings } else { - if (node is Quantifier quantifier) + if (node is Quantifier quantifier) { for (int i = 0; i < quantifier.MaxOccur; i++) Untruncate(quantifier, output, i >= quantifier.MinOccur, varBindings); diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisMorphologicalTransformRuleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisMorphologicalTransformRuleSpec.cs index a23ae84b1..2ef6d56ca 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisMorphologicalTransformRuleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisMorphologicalTransformRuleSpec.cs @@ -7,10 +7,10 @@ namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules { public abstract class AnalysisMorphologicalTransformRuleSpec : AnalysisMorphologicalTransform, - IPatternRuleSpec + IPatternRuleSpec { protected AnalysisMorphologicalTransformRuleSpec( - IEnumerable> lhs, + IEnumerable> lhs, IList rhs ) : base(lhs, rhs) { } @@ -20,7 +20,7 @@ public bool IsApplicable(Word input) return true; } - protected bool IsPartCaptured(Match match, string partName) + protected bool IsPartCaptured(Match match, string partName) { int count; if (CapturedParts.TryGetValue(partName, out count)) @@ -34,6 +34,6 @@ protected bool IsPartCaptured(Match match, string partName) return false; } - public abstract Word ApplyRhs(PatternRule rule, Match match); + public abstract Word ApplyRhs(PatternRule rule, Match match); } } diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisRealizationalAffixProcessRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisRealizationalAffixProcessRule.cs index 749aa7b4d..031c6fbad 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisRealizationalAffixProcessRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/AnalysisRealizationalAffixProcessRule.cs @@ -7,24 +7,24 @@ namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules { - public class AnalysisRealizationalAffixProcessRule : IRule + public class AnalysisRealizationalAffixProcessRule : IRule { private readonly Morpher _morpher; private readonly RealizationalAffixProcessRule _rule; - private readonly List> _rules; + private readonly List> _rules; public AnalysisRealizationalAffixProcessRule(Morpher morpher, RealizationalAffixProcessRule rule) { _morpher = morpher; _rule = rule; - _rules = new List>(); + _rules = new List>(); foreach (AffixProcessAllomorph allo in rule.Allomorphs) { _rules.Add( - new MultiplePatternRule( + new MultiplePatternRule( new AnalysisAffixProcessAllomorphRuleSpec(allo), - new MatcherSettings + new MatcherSettings { Filter = ann => ann.Type() == HCFeatureSystem.Segment, MatchingMethod = MatchingMethod.Unification, diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/CompoundingRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/CompoundingRule.cs index a5280ba6a..fca007daf 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/CompoundingRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/CompoundingRule.cs @@ -58,12 +58,12 @@ public ICollection ObligatorySyntacticFeatures public Stratum Stratum { get; set; } - public override IRule CompileAnalysisRule(Morpher morpher) + public override IRule CompileAnalysisRule(Morpher morpher) { return new AnalysisCompoundingRule(morpher, this); } - public override IRule CompileSynthesisRule(Morpher morpher) + public override IRule CompileSynthesisRule(Morpher morpher) { return new SynthesisCompoundingRule(morpher, this); } diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/CompoundingSubrule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/CompoundingSubrule.cs index 7001098d0..6ae61c2dd 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/CompoundingSubrule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/CompoundingSubrule.cs @@ -6,8 +6,8 @@ namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules { public class CompoundingSubrule { - private readonly List> _headLhs; - private readonly List> _nonHeadLhs; + private readonly List> _headLhs; + private readonly List> _nonHeadLhs; private readonly List _rhs; private readonly MprFeatureSet _requiredMprFeatures; @@ -16,8 +16,8 @@ public class CompoundingSubrule public CompoundingSubrule() { - _headLhs = new List>(); - _nonHeadLhs = new List>(); + _headLhs = new List>(); + _nonHeadLhs = new List>(); _rhs = new List(); _requiredMprFeatures = new MprFeatureSet(); @@ -25,12 +25,12 @@ public CompoundingSubrule() _outMprFeatures = new MprFeatureSet(); } - public IList> HeadLhs + public IList> HeadLhs { get { return _headLhs; } } - public IList> NonHeadLhs + public IList> NonHeadLhs { get { return _nonHeadLhs; } } diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/CopyFromInput.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/CopyFromInput.cs index a5ffa91e2..1f4a32f3e 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/CopyFromInput.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/CopyFromInput.cs @@ -13,24 +13,22 @@ public CopyFromInput(string partName) : base(partName) { } public override void GenerateAnalysisLhs( - Pattern analysisLhs, - IDictionary> partLookup, + Pattern analysisLhs, + IDictionary> partLookup, IDictionary capturedParts ) { - Pattern pattern = partLookup[PartName]; + Pattern pattern = partLookup[PartName]; int count = capturedParts.GetOrCreate(PartName, () => 0); string groupName = AnalysisMorphologicalTransform.GetGroupName(PartName, count); - analysisLhs.Children.Add( - new Group(groupName, pattern.Children.DeepCloneExceptBoundaries()) - ); + analysisLhs.Children.Add(new Group(groupName, pattern.Children.DeepCloneExceptBoundaries())); capturedParts[PartName]++; } - public override IEnumerable> Apply(Match match, Word output) + public override IEnumerable> Apply(Match match, Word output) { var mappings = new List>(); - GroupCapture inputGroup = match.GroupCaptures[PartName]; + GroupCapture inputGroup = match.GroupCaptures[PartName]; if (inputGroup.Success) { foreach ( diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/InsertSegments.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/InsertSegments.cs index 0ffebcc64..a24167382 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/InsertSegments.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/InsertSegments.cs @@ -27,19 +27,19 @@ public Segments Segments } public override void GenerateAnalysisLhs( - Pattern analysisLhs, - IDictionary> partLookup, + Pattern analysisLhs, + IDictionary> partLookup, IDictionary capturedParts ) { foreach (ShapeNode node in _segments.Shape) { if (node.Annotation.Type() != HCFeatureSystem.Boundary) - analysisLhs.Children.Add(new Constraint(node.Annotation.FeatureStruct)); + analysisLhs.Children.Add(new Constraint(node.Annotation.FeatureStruct)); } } - public override IEnumerable> Apply(Match match, Word output) + public override IEnumerable> Apply(Match match, Word output) { Shape shape = _segments.Shape; var mappings = new List>(); diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/InsertSimpleContext.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/InsertSimpleContext.cs index ab652c765..bf882a4bf 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/InsertSimpleContext.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/InsertSimpleContext.cs @@ -29,15 +29,15 @@ public SimpleContext SimpleContext } public override void GenerateAnalysisLhs( - Pattern analysisLhs, - IDictionary> partLookup, + Pattern analysisLhs, + IDictionary> partLookup, IDictionary capturedParts ) { - analysisLhs.Children.Add(new Constraint(_simpleCtxt.FeatureStruct.Clone())); + analysisLhs.Children.Add(new Constraint(_simpleCtxt.FeatureStruct.Clone())); } - public override IEnumerable> Apply(Match match, Word output) + public override IEnumerable> Apply(Match match, Word output) { FeatureStruct fs = _simpleCtxt.FeatureStruct.Clone(); fs.ReplaceVariables(match.VariableBindings); diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/ModifyFromInput.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/ModifyFromInput.cs index 70cae03ac..a92cb6bd1 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/ModifyFromInput.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/ModifyFromInput.cs @@ -31,19 +31,19 @@ public SimpleContext SimpleContext } public override void GenerateAnalysisLhs( - Pattern analysisLhs, - IDictionary> partLookup, + Pattern analysisLhs, + IDictionary> partLookup, IDictionary capturedParts ) { - Pattern pattern = partLookup[PartName]; + Pattern pattern = partLookup[PartName]; int count = capturedParts.GetOrCreate(PartName, () => 0); string groupName = AnalysisMorphologicalTransform.GetGroupName(PartName, count); - var group = new Group(groupName, pattern.Children.DeepCloneExceptBoundaries()); + var group = new Group(groupName, pattern.Children.DeepCloneExceptBoundaries()); foreach ( - Constraint constraint in group + Constraint constraint in group .GetNodesDepthFirst() - .OfType>() + .OfType>() .Where(c => c.Type() == (FeatureSymbol)_simpleCtxt.FeatureStruct.GetValue(HCFeatureSystem.Type)) ) { @@ -53,10 +53,10 @@ Constraint constraint in group capturedParts[PartName]++; } - public override IEnumerable> Apply(Match match, Word output) + public override IEnumerable> Apply(Match match, Word output) { var mappings = new List>(); - GroupCapture inputGroup = match.GroupCaptures[PartName]; + GroupCapture inputGroup = match.GroupCaptures[PartName]; foreach ( ShapeNode inputNode in GetSkippedOptionalNodes(match.Input.Shape, inputGroup.Range) .Concat(match.Input.Shape.GetNodes(inputGroup.Range)) diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/MorphologicalOutputAction.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/MorphologicalOutputAction.cs index 5595a232f..619625343 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/MorphologicalOutputAction.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/MorphologicalOutputAction.cs @@ -25,8 +25,8 @@ public string PartName } public abstract void GenerateAnalysisLhs( - Pattern analysisLhs, - IDictionary> partLookup, + Pattern analysisLhs, + IDictionary> partLookup, IDictionary capturedParts ); @@ -35,11 +35,12 @@ IDictionary capturedParts /// /// The match. /// The output word synthesis. - public abstract IEnumerable> Apply(Match match, Word output); + public abstract IEnumerable> Apply(Match match, Word output); - protected IEnumerable GetSkippedOptionalNodes(Shape shape, Range range) + // RUSTIFY Stage 2: range is now an int-offset match/group range; its leftmost node is NodeAt(Start). + protected IEnumerable GetSkippedOptionalNodes(Shape shape, Range range) { - ShapeNode node = range.Start.Prev; + ShapeNode node = shape.NodeAt(range.Start).Prev; var skippedNodes = new List(); while (node.Annotation.Optional) { diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/RealizationalAffixProcessRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/RealizationalAffixProcessRule.cs index da20c683e..ebe39caf5 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/RealizationalAffixProcessRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/RealizationalAffixProcessRule.cs @@ -58,12 +58,12 @@ public IList Allomorphs get { return _allomorphs; } } - public override IRule CompileAnalysisRule(Morpher morpher) + public override IRule CompileAnalysisRule(Morpher morpher) { return new AnalysisRealizationalAffixProcessRule(morpher, this); } - public override IRule CompileSynthesisRule(Morpher morpher) + public override IRule CompileSynthesisRule(Morpher morpher) { return new SynthesisRealizationalAffixProcessRule(morpher, this); } diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessAllomorphRuleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessAllomorphRuleSpec.cs index aab74102d..79a97ee99 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessAllomorphRuleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessAllomorphRuleSpec.cs @@ -8,17 +8,17 @@ namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules { - public class SynthesisAffixProcessAllomorphRuleSpec : IPatternRuleSpec + public class SynthesisAffixProcessAllomorphRuleSpec : IPatternRuleSpec { private readonly AffixProcessAllomorph _allomorph; - private readonly Pattern _pattern; + private readonly Pattern _pattern; private readonly HashSet _nonAllomorphActions; public SynthesisAffixProcessAllomorphRuleSpec(AffixProcessAllomorph allomorph) { _allomorph = allomorph; - IList> lhs = _allomorph.Lhs; + IList> lhs = _allomorph.Lhs; IList rhs = _allomorph.Rhs; _nonAllomorphActions = new HashSet(); var redupParts = new List>(); @@ -119,12 +119,12 @@ List partActions in rhs.Where(action => } } - _pattern = new Pattern(); - foreach (Pattern part in lhs) - _pattern.Children.Add(new Group(part.Name, part.Children.CloneItems())); + _pattern = new Pattern(); + foreach (Pattern part in lhs) + _pattern.Children.Add(new Group(part.Name, part.Children.CloneItems())); } - public Pattern Pattern + public Pattern Pattern { get { return _pattern; } } @@ -134,7 +134,7 @@ public bool IsApplicable(Word input) return true; } - public Word ApplyRhs(PatternRule rule, Match match) + public Word ApplyRhs(PatternRule rule, Match match) { Word output = match.Input.Clone(); output.Shape.Clear(); diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessRule.cs index f7dc9c0dc..98a3895d0 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisAffixProcessRule.cs @@ -8,24 +8,24 @@ namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules { - public class SynthesisAffixProcessRule : IRule + public class SynthesisAffixProcessRule : IRule { private readonly Morpher _morpher; private readonly AffixProcessRule _rule; - private readonly List> _rules; + private readonly List> _rules; public SynthesisAffixProcessRule(Morpher morpher, AffixProcessRule rule) { _morpher = morpher; _rule = rule; - _rules = new List>(); + _rules = new List>(); foreach (AffixProcessAllomorph allo in rule.Allomorphs) { var ruleSpec = new SynthesisAffixProcessAllomorphRuleSpec(allo); _rules.Add( - new PatternRule( + new PatternRule( ruleSpec, - new MatcherSettings + new MatcherSettings { Filter = ann => ann.Type().IsOneOf(HCFeatureSystem.Segment, HCFeatureSystem.Boundary) @@ -178,8 +178,13 @@ public IEnumerable Apply(Word input) Word outWord = _rules[i].Apply(input).SingleOrDefault(); if (outWord != null) { - outWord.SyntacticFeatureStruct = syntacticFS; - outWord.SyntacticFeatureStruct.PriorityUnion(_rule.OutSyntacticFeatureStruct); + // Clone before mutating: syntacticFS is shared across every loop iteration + // (computed once, above), so mutating it in place would alias every outWord + // assigned from an earlier iteration. Also protects against outWord already being + // frozen (see Word.FreezeImpl's comment). + FeatureStruct sfs = syntacticFS.Clone(); + sfs.PriorityUnion(_rule.OutSyntacticFeatureStruct); + outWord.SyntacticFeatureStruct = sfs; foreach (Feature obligFeature in _rule.ObligatorySyntacticFeatures) outWord.ObligatorySyntacticFeatures.Add(obligFeature); diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisCompoundingRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisCompoundingRule.cs index a8f16e650..29e3bd5f3 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisCompoundingRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisCompoundingRule.cs @@ -9,30 +9,30 @@ namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules { - public class SynthesisCompoundingRule : IRule + public class SynthesisCompoundingRule : IRule { private readonly Morpher _morpher; private readonly CompoundingRule _rule; - private readonly List, Matcher>> _subruleMatchers; + private readonly List, Matcher>> _subruleMatchers; public SynthesisCompoundingRule(Morpher morpher, CompoundingRule rule) { _morpher = morpher; _rule = rule; - _subruleMatchers = new List, Matcher>>(); + _subruleMatchers = new List, Matcher>>(); foreach (CompoundingSubrule sr in rule.Subrules) _subruleMatchers.Add(Tuple.Create(BuildMatcher(sr.HeadLhs), BuildMatcher(sr.NonHeadLhs))); } - private Matcher BuildMatcher(IEnumerable> lhs) + private Matcher BuildMatcher(IEnumerable> lhs) { - var pattern = new Pattern(); - foreach (Pattern part in lhs) - pattern.Children.Add(new Group(part.Name, part.Children.CloneItems())); + var pattern = new Pattern(); + foreach (Pattern part in lhs) + pattern.Children.Add(new Group(part.Name, part.Children.CloneItems())); - return new Matcher( + return new Matcher( pattern, - new MatcherSettings + new MatcherSettings { Filter = ann => ann.Type().IsOneOf(HCFeatureSystem.Segment, HCFeatureSystem.Boundary) && !ann.IsDeleted(), @@ -167,10 +167,10 @@ public IEnumerable Apply(Word input) continue; } - Match headMatch = _subruleMatchers[i].Item1.Match(input); + Match headMatch = _subruleMatchers[i].Item1.Match(input); if (headMatch.Success) { - Match nonHeadMatch = _subruleMatchers[i].Item2.Match(input.CurrentNonHead); + Match nonHeadMatch = _subruleMatchers[i].Item2.Match(input.CurrentNonHead); if (nonHeadMatch.Success) { Word outWord = ApplySubrule(_rule.Subrules[i], headMatch, nonHeadMatch); @@ -178,8 +178,13 @@ public IEnumerable Apply(Word input) outWord.MprFeatures.AddOutput(_rule.Subrules[i].OutMprFeatures); outWord.MprFeatures.AddOutput(_rule.OutputProdRestrictionsMprFeatures); - outWord.SyntacticFeatureStruct = syntacticFS; - outWord.SyntacticFeatureStruct.PriorityUnion(_rule.OutSyntacticFeatureStruct); + // Clone before mutating: syntacticFS is shared across every loop iteration + // (computed once, above), so mutating it in place would alias every outWord + // assigned from an earlier iteration. Also protects against outWord already + // being frozen (see Word.FreezeImpl's comment). + FeatureStruct sfs = syntacticFS.Clone(); + sfs.PriorityUnion(_rule.OutSyntacticFeatureStruct); + outWord.SyntacticFeatureStruct = sfs; foreach (Feature feature in _rule.ObligatorySyntacticFeatures) outWord.ObligatorySyntacticFeatures.Add(feature); @@ -226,11 +231,7 @@ public IEnumerable Apply(Word input) return output; } - private Word ApplySubrule( - CompoundingSubrule sr, - Match headMatch, - Match nonHeadMatch - ) + private Word ApplySubrule(CompoundingSubrule sr, Match headMatch, Match nonHeadMatch) { // TODO: unify the variable bindings from the head and non-head matches Word output = headMatch.Input.Clone(); diff --git a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisRealizationalAffixProcessRule.cs b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisRealizationalAffixProcessRule.cs index 1ea640909..bd1717f82 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisRealizationalAffixProcessRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/MorphologicalRules/SynthesisRealizationalAffixProcessRule.cs @@ -9,23 +9,23 @@ namespace SIL.Machine.Morphology.HermitCrab.MorphologicalRules { - public class SynthesisRealizationalAffixProcessRule : IRule + public class SynthesisRealizationalAffixProcessRule : IRule { private readonly Morpher _morpher; private readonly RealizationalAffixProcessRule _rule; - private readonly List> _rules; + private readonly List> _rules; public SynthesisRealizationalAffixProcessRule(Morpher morpher, RealizationalAffixProcessRule rule) { _morpher = morpher; _rule = rule; - _rules = new List>(); + _rules = new List>(); foreach (AffixProcessAllomorph allo in rule.Allomorphs) { _rules.Add( - new PatternRule( + new PatternRule( new SynthesisAffixProcessAllomorphRuleSpec(allo), - new MatcherSettings + new MatcherSettings { Filter = ann => ann.Type().IsOneOf(HCFeatureSystem.Segment, HCFeatureSystem.Boundary) @@ -118,8 +118,14 @@ public IEnumerable Apply(Word input) Word outWord = _rules[i].Apply(input).SingleOrDefault(); if (outWord != null) { - outWord.SyntacticFeatureStruct = syntacticFS; - outWord.SyntacticFeatureStruct.PriorityUnion(_rule.RealizationalFeatureStruct); + // Clone before mutating: syntacticFS is shared across every loop iteration (it's + // computed once, above), so mutating it in place here would alias every outWord + // assigned from an earlier iteration to whatever the last PriorityUnion produced. + // Also protects against outWord already being frozen (see Word.FreezeImpl's + // comment). + FeatureStruct sfs = syntacticFS.Clone(); + sfs.PriorityUnion(_rule.RealizationalFeatureStruct); + outWord.SyntacticFeatureStruct = sfs; outWord.MorphologicalRuleApplied(_rule, appliedAllomorphIndices); appliedAllomorphIndices.Add(i); diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisMetathesisRule.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisMetathesisRule.cs index 8b21c1853..5d160243f 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisMetathesisRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisMetathesisRule.cs @@ -8,7 +8,7 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { - public class AnalysisMetathesisRule : IRule + public class AnalysisMetathesisRule : IRule { private readonly Morpher _morpher; private readonly MetathesisRule _rule; @@ -21,7 +21,7 @@ public AnalysisMetathesisRule(Morpher morpher, MetathesisRule rule) var ruleSpec = new AnalysisMetathesisRuleSpec(rule.Pattern, rule.LeftSwitchName, rule.RightSwitchName); - var settings = new MatcherSettings + var settings = new MatcherSettings { Direction = rule.Direction == Direction.LeftToRight ? Direction.RightToLeft : Direction.LeftToRight, Filter = ann => ann.Type().IsOneOf(HCFeatureSystem.Segment, HCFeatureSystem.Anchor), diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisMetathesisRuleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisMetathesisRuleSpec.cs index 6581ffff4..baa0d1a9f 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisMetathesisRuleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisMetathesisRuleSpec.cs @@ -12,21 +12,19 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { public class AnalysisMetathesisRuleSpec : IPhonologicalPatternRuleSpec, IPhonologicalPatternSubruleSpec { - private readonly Pattern _pattern; + private readonly Pattern _pattern; private readonly string _leftGroupName; private readonly string _rightGroupName; - public AnalysisMetathesisRuleSpec(Pattern pattern, string leftGroupName, string rightGroupName) + public AnalysisMetathesisRuleSpec(Pattern pattern, string leftGroupName, string rightGroupName) { _leftGroupName = leftGroupName; _rightGroupName = rightGroupName; - Group[] groupOrder = pattern.Children.OfType>().ToArray(); - Dictionary> groups = groupOrder.ToDictionary(g => g.Name); - _pattern = new Pattern(); - foreach ( - PatternNode node in pattern.Children.TakeWhile(n => !(n is Group)) - ) + Group[] groupOrder = pattern.Children.OfType>().ToArray(); + Dictionary> groups = groupOrder.ToDictionary(g => g.Name); + _pattern = new Pattern(); + foreach (PatternNode node in pattern.Children.TakeWhile(n => !(n is Group))) { _pattern.Children.Add(node.Clone()); } @@ -35,9 +33,9 @@ PatternNode node in pattern.Children.TakeWhile(n => !(n is Grou AddGroup(groups, rightGroupName); foreach ( - PatternNode node in pattern + PatternNode node in pattern .Children.GetNodes(Direction.RightToLeft) - .TakeWhile(n => !(n is Group)) + .TakeWhile(n => !(n is Group)) .Reverse() ) { @@ -46,41 +44,43 @@ PatternNode node in pattern _pattern.Freeze(); } - private void AddGroup(Dictionary> groups, string name) + private void AddGroup(Dictionary> groups, string name) { - var newGroup = new Group(name); - foreach ( - Constraint constraint in groups[name].Children.Cast>() - ) + var newGroup = new Group(name); + foreach (Constraint constraint in groups[name].Children.Cast>()) { - Constraint newConstraint = constraint.Clone(); + Constraint newConstraint = constraint.Clone(); newConstraint.FeatureStruct.AddValue(HCFeatureSystem.Modified, HCFeatureSystem.Clean); newGroup.Children.Add(newConstraint); } _pattern.Children.Add(newGroup); } - public Pattern Pattern + public Pattern Pattern { get { return _pattern; } } public bool MatchSubrule( PhonologicalPatternRule rule, - Match match, + Match match, out PhonologicalSubruleMatch subruleMatch ) { - subruleMatch = new PhonologicalSubruleMatch(this, match.Range, match.VariableBindings); + subruleMatch = new PhonologicalSubruleMatch( + this, + match.Input.Shape.ToShapeRange(match.Range), + match.VariableBindings + ); return true; } - Matcher IPhonologicalPatternSubruleSpec.LeftEnvironmentMatcher + Matcher IPhonologicalPatternSubruleSpec.LeftEnvironmentMatcher { get { return null; } } - Matcher IPhonologicalPatternSubruleSpec.RightEnvironmentMatcher + Matcher IPhonologicalPatternSubruleSpec.RightEnvironmentMatcher { get { return null; } } @@ -91,24 +91,26 @@ bool IPhonologicalPatternSubruleSpec.IsApplicable(Word input) } void IPhonologicalPatternSubruleSpec.ApplyRhs( - Match targetMatch, + Match targetMatch, Range range, VariableBindings varBindings ) { - ShapeNode start = null, - end = null; - foreach (GroupCapture gc in targetMatch.GroupCaptures) + int? startTag = null, + endTag = null; + foreach (GroupCapture gc in targetMatch.GroupCaptures) { - if (start == null || gc.Range.Start.CompareTo(start) < 0) - start = gc.Range.Start; - if (end == null || gc.Range.End.CompareTo(end) > 0) - end = gc.Range.End; + if (!gc.Success) + continue; + if (startTag == null || gc.Range.Start < startTag) + startTag = gc.Range.Start; + if (endTag == null || gc.Range.End > endTag) + endTag = gc.Range.End; } - Debug.Assert(start != null && end != null); + Debug.Assert(startTag != null && endTag != null); - GroupCapture leftGroup = targetMatch.GroupCaptures[_leftGroupName]; - GroupCapture rightGroup = targetMatch.GroupCaptures[_rightGroupName]; + GroupCapture leftGroup = targetMatch.GroupCaptures[_leftGroupName]; + GroupCapture rightGroup = targetMatch.GroupCaptures[_rightGroupName]; foreach ( Tuple tuple in targetMatch diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisRewriteRule.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisRewriteRule.cs index 95eacbf73..e691b4c0a 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisRewriteRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisRewriteRule.cs @@ -10,7 +10,7 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { - public class AnalysisRewriteRule : IRule + public class AnalysisRewriteRule : IRule { private enum ReapplyType { @@ -28,7 +28,7 @@ public AnalysisRewriteRule(Morpher morpher, RewriteRule rule) _morpher = morpher; _rule = rule; - var settings = new MatcherSettings + var settings = new MatcherSettings { Direction = rule.Direction == Direction.LeftToRight ? Direction.RightToLeft : Direction.LeftToRight, Filter = ann => ann.Type().IsOneOf(HCFeatureSystem.Segment, HCFeatureSystem.Anchor), @@ -49,11 +49,7 @@ public AnalysisRewriteRule(Morpher morpher, RewriteRule rule) ruleSpec = new FeatureAnalysisRewriteRuleSpec(settings, rule.Lhs, sr); if (_rule.ApplicationMode == RewriteApplicationMode.Simultaneous) { - foreach ( - Constraint constraint in sr.Rhs.Children.Cast< - Constraint - >() - ) + foreach (Constraint constraint in sr.Rhs.Children.Cast>()) { if (constraint.Type() == HCFeatureSystem.Segment) { @@ -106,12 +102,9 @@ Constraint constraint in sr.Rhs.Children.Cast< } } - private static bool IsUnifiable(Constraint constraint, Pattern env) + private static bool IsUnifiable(Constraint constraint, Pattern env) { - foreach ( - Constraint curConstraint in env.GetNodesDepthFirst() - .OfType>() - ) + foreach (Constraint curConstraint in env.GetNodesDepthFirst().OfType>()) { if ( curConstraint.Type() == HCFeatureSystem.Segment diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisRewriteSubruleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisRewriteSubruleSpec.cs index 540267e6d..fd42491ea 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisRewriteSubruleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/AnalysisRewriteSubruleSpec.cs @@ -7,12 +7,12 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { public class AnalysisRewriteSubruleSpec : RewriteSubruleSpec { - private readonly Action, Range, VariableBindings> _applyAction; + private readonly Action, Range, VariableBindings> _applyAction; public AnalysisRewriteSubruleSpec( - MatcherSettings matcherSettings, + MatcherSettings matcherSettings, RewriteSubrule subrule, - Action, Range, VariableBindings> applyAction + Action, Range, VariableBindings> applyAction ) : base( matcherSettings, @@ -23,16 +23,16 @@ Action, Range, VariableBindings> applyAction _applyAction = applyAction; } - private static Pattern CreateEnvironmentPattern(Pattern env) + private static Pattern CreateEnvironmentPattern(Pattern env) { - Pattern pattern = null; + Pattern pattern = null; if (!env.IsEmpty) - pattern = new Pattern(env.Children.DeepCloneExceptBoundaries()); + pattern = new Pattern(env.Children.DeepCloneExceptBoundaries()); return pattern; } public override void ApplyRhs( - Match targetMatch, + Match targetMatch, Range range, VariableBindings varBindings ) diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/EpenthesisAnalysisRewriteRuleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/EpenthesisAnalysisRewriteRuleSpec.cs index 6c29369b9..225cb3254 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/EpenthesisAnalysisRewriteRuleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/EpenthesisAnalysisRewriteRuleSpec.cs @@ -9,15 +9,15 @@ public class EpenthesisAnalysisRewriteRuleSpec : RewriteRuleSpec { private readonly int _targetCount; - public EpenthesisAnalysisRewriteRuleSpec(MatcherSettings matcherSettings, RewriteSubrule subrule) + public EpenthesisAnalysisRewriteRuleSpec(MatcherSettings matcherSettings, RewriteSubrule subrule) : base(false) { Pattern.Acceptable = IsUnapplicationNonvacuous; _targetCount = subrule.Rhs.Children.Count; - foreach (Constraint constraint in subrule.Rhs.Children.Cast>()) + foreach (Constraint constraint in subrule.Rhs.Children.Cast>()) { - Constraint newConstraint = constraint.Clone(); + Constraint newConstraint = constraint.Clone(); newConstraint.FeatureStruct.AddValue(HCFeatureSystem.Modified, HCFeatureSystem.Clean); Pattern.Children.Add(newConstraint); } @@ -26,7 +26,7 @@ public EpenthesisAnalysisRewriteRuleSpec(MatcherSettings matcherSetti SubruleSpecs.Add(new AnalysisRewriteSubruleSpec(matcherSettings, subrule, Unapply)); } - private static bool IsUnapplicationNonvacuous(Match match) + private static bool IsUnapplicationNonvacuous(Match match) { foreach (ShapeNode node in match.Input.Shape.GetNodes(match.Range)) { @@ -37,7 +37,7 @@ private static bool IsUnapplicationNonvacuous(Match match) return false; } - private void Unapply(Match targetMatch, Range range, VariableBindings varBindings) + private void Unapply(Match targetMatch, Range range, VariableBindings varBindings) { ShapeNode curNode = range.Start; for (int i = 0; i < _targetCount; i++) diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/EpenthesisSynthesisRewriteSubruleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/EpenthesisSynthesisRewriteSubruleSpec.cs index c184bd4dc..6c1f0bdc6 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/EpenthesisSynthesisRewriteSubruleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/EpenthesisSynthesisRewriteSubruleSpec.cs @@ -7,10 +7,10 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { public class EpenthesisSynthesisRewriteSubruleSpec : SynthesisRewriteSubruleSpec { - private readonly Pattern _rhs; + private readonly Pattern _rhs; public EpenthesisSynthesisRewriteSubruleSpec( - MatcherSettings matcherSettings, + MatcherSettings matcherSettings, bool isIterative, RewriteSubrule subrule, int index @@ -21,17 +21,17 @@ int index } public override void ApplyRhs( - Match targetMatch, + Match targetMatch, Range range, VariableBindings varBindings ) { ShapeNode curNode = range.Start; - foreach (PatternNode node in _rhs.Children.GetNodes(targetMatch.Matcher.Direction)) + foreach (PatternNode node in _rhs.Children.GetNodes(targetMatch.Matcher.Direction)) { if (targetMatch.Input.Shape.Count == 256) throw new InfiniteLoopException("An epenthesis rewrite rule is stuck in an infinite loop."); - var constraint = (Constraint)node; + var constraint = (Constraint)node; FeatureStruct fs = constraint.FeatureStruct.Clone(); if (varBindings != null) fs.ReplaceVariables(varBindings); diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/FeatureAnalysisRewriteRuleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/FeatureAnalysisRewriteRuleSpec.cs index e98d95ec6..75c66c0e8 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/FeatureAnalysisRewriteRuleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/FeatureAnalysisRewriteRuleSpec.cs @@ -10,19 +10,19 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { public class FeatureAnalysisRewriteRuleSpec : RewriteRuleSpec { - private readonly Pattern _analysisRhs; + private readonly Pattern _analysisRhs; public FeatureAnalysisRewriteRuleSpec( - MatcherSettings matcherSettings, - Pattern lhs, + MatcherSettings matcherSettings, + Pattern lhs, RewriteSubrule subrule ) : base(false) { var rhsAntiFSs = new List(); foreach ( - Constraint constraint in subrule - .Rhs.Children.OfType>() + Constraint constraint in subrule + .Rhs.Children.OfType>() .Where(c => c.Type() == HCFeatureSystem.Segment) ) { @@ -31,28 +31,26 @@ Constraint constraint in subrule Pattern.Acceptable = match => IsUnapplicationNonvacuous(match, rhsAntiFSs); - _analysisRhs = new Pattern(); + _analysisRhs = new Pattern(); int i = 0; foreach ( - Tuple, PatternNode> tuple in lhs.Children.Zip( - subrule.Rhs.Children - ) + Tuple, PatternNode> tuple in lhs.Children.Zip(subrule.Rhs.Children) ) { - var lhsConstraint = (Constraint)tuple.Item1; - var rhsConstraint = (Constraint)tuple.Item2; + var lhsConstraint = (Constraint)tuple.Item1; + var rhsConstraint = (Constraint)tuple.Item2; if (lhsConstraint.Type() == HCFeatureSystem.Segment && rhsConstraint.Type() == HCFeatureSystem.Segment) { - Constraint targetConstraint = lhsConstraint.Clone(); + Constraint targetConstraint = lhsConstraint.Clone(); targetConstraint.FeatureStruct.PriorityUnion(rhsConstraint.FeatureStruct); targetConstraint.FeatureStruct.AddValue(HCFeatureSystem.Modified, HCFeatureSystem.Clean); - Pattern.Children.Add(new Group("target" + i) { Children = { targetConstraint } }); + Pattern.Children.Add(new Group("target" + i) { Children = { targetConstraint } }); FeatureStruct fs = rhsConstraint.FeatureStruct.AntiFeatureStruct(); fs.Subtract(lhsConstraint.FeatureStruct.AntiFeatureStruct()); fs.AddValue(HCFeatureSystem.Type, HCFeatureSystem.Segment); - _analysisRhs.Children.Add(new Constraint(fs)); + _analysisRhs.Children.Add(new Constraint(fs)); i++; } @@ -62,12 +60,15 @@ Tuple, PatternNode> tuple in lhs.C SubruleSpecs.Add(new AnalysisRewriteSubruleSpec(matcherSettings, subrule, Unapply)); } - private bool IsUnapplicationNonvacuous(Match match, IEnumerable rhsAntiFSs) + private bool IsUnapplicationNonvacuous(Match match, IEnumerable rhsAntiFSs) { int i = 0; foreach (FeatureStruct fs in rhsAntiFSs) { - ShapeNode node = match.GroupCaptures["target" + i].Range.GetStart(match.Matcher.Direction); + ShapeNode node = match.Input.Shape.GetStartNode( + match.GroupCaptures["target" + i].Range, + match.Matcher.Direction + ); foreach (SymbolicFeature sf in fs.Features.OfType()) { SymbolicFeatureValue sfv = fs.GetValue(sf); @@ -97,14 +98,15 @@ private bool IsUnapplicationNonvacuous(Match match, IEnumerable return false; } - private void Unapply(Match targetMatch, Range range, VariableBindings varBindings) + private void Unapply(Match targetMatch, Range range, VariableBindings varBindings) { int i = 0; - foreach ( - Constraint constraint in _analysisRhs.Children.Cast>() - ) + foreach (Constraint constraint in _analysisRhs.Children.Cast>()) { - ShapeNode node = targetMatch.GroupCaptures["target" + i].Range.GetStart(targetMatch.Matcher.Direction); + ShapeNode node = targetMatch.Input.Shape.GetStartNode( + targetMatch.GroupCaptures["target" + i].Range, + targetMatch.Matcher.Direction + ); FeatureStruct fs = node.Annotation.FeatureStruct.Clone(); fs.PriorityUnion(constraint.FeatureStruct); node.Annotation.FeatureStruct.Union(fs, varBindings); diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/FeatureSynthesisRewriteSubruleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/FeatureSynthesisRewriteSubruleSpec.cs index c961fe6c7..58d5beb55 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/FeatureSynthesisRewriteSubruleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/FeatureSynthesisRewriteSubruleSpec.cs @@ -8,10 +8,10 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { public class FeatureSynthesisRewriteSubruleSpec : SynthesisRewriteSubruleSpec { - private readonly Pattern _rhs; + private readonly Pattern _rhs; public FeatureSynthesisRewriteSubruleSpec( - MatcherSettings matcherSettings, + MatcherSettings matcherSettings, bool isIterative, RewriteSubrule subrule, int index @@ -22,18 +22,18 @@ int index } public override void ApplyRhs( - Match targetMatch, + Match targetMatch, Range range, VariableBindings varBindings ) { foreach ( - Tuple> tuple in targetMatch + Tuple> tuple in targetMatch .Input.Shape.GetNodes(range) .Zip(_rhs.Children) ) { - var constraints = (Constraint)tuple.Item2; + var constraints = (Constraint)tuple.Item2; tuple.Item1.Annotation.FeatureStruct.PriorityUnion(constraints.FeatureStruct, varBindings); if (IsIterative) tuple.Item1.SetDirty(true); diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/IPhonologicalPatternRuleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/IPhonologicalPatternRuleSpec.cs index a26f66f6e..4130df814 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/IPhonologicalPatternRuleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/IPhonologicalPatternRuleSpec.cs @@ -5,10 +5,10 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { public interface IPhonologicalPatternRuleSpec { - Pattern Pattern { get; } + Pattern Pattern { get; } bool MatchSubrule( PhonologicalPatternRule rule, - Match match, + Match match, out PhonologicalSubruleMatch subruleMatch ); } diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/IPhonologicalPatternSubruleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/IPhonologicalPatternSubruleSpec.cs index 25964c1c2..92484e61e 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/IPhonologicalPatternSubruleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/IPhonologicalPatternSubruleSpec.cs @@ -6,10 +6,10 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { public interface IPhonologicalPatternSubruleSpec { - Matcher LeftEnvironmentMatcher { get; } - Matcher RightEnvironmentMatcher { get; } + Matcher LeftEnvironmentMatcher { get; } + Matcher RightEnvironmentMatcher { get; } bool IsApplicable(Word input); - void ApplyRhs(Match targetMatch, Range range, VariableBindings varBindings); + void ApplyRhs(Match targetMatch, Range range, VariableBindings varBindings); } } diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/IterativePhonologicalPatternRule.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/IterativePhonologicalPatternRule.cs index 4046e4319..2471bb2b7 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/IterativePhonologicalPatternRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/IterativePhonologicalPatternRule.cs @@ -10,33 +10,40 @@ public class IterativePhonologicalPatternRule : PhonologicalPatternRule { public IterativePhonologicalPatternRule( IPhonologicalPatternRuleSpec ruleSpec, - MatcherSettings matcherSettings + MatcherSettings matcherSettings ) : base(ruleSpec, matcherSettings) { } public override IEnumerable Apply(Word input) { bool applied = false; - Match targetMatch = Matcher.Match(input); + Match targetMatch = Matcher.Match(input); while (targetMatch.Success) { ShapeNode start; PhonologicalSubruleMatch srMatch; + // RUSTIFY Stage 2: int offsets in targetMatch.Range go stale once ApplyRhs mutates the + // shape (the projection re-densifies), so resolve the directional end/start NODE now — + // a ShapeNode handle survives mutation, exactly as the old ShapeNode match range did. + // Only one of end/start is ever used per iteration, so resolve it inside its own + // branch instead of paying for both NodeAt lookups unconditionally. if (RuleSpec.MatchSubrule(this, targetMatch, out srMatch)) { + ShapeNode matchEndNode = input.Shape.GetEndNode(targetMatch.Range, Matcher.Direction); srMatch.SubruleSpec.ApplyRhs(targetMatch, srMatch.Range, srMatch.VariableBindings); applied = true; - start = targetMatch.Range.GetEnd(Matcher.Direction).GetNext(Matcher.Direction); + start = matchEndNode.GetNext(Matcher.Direction); } else { - start = targetMatch.Range.GetStart(Matcher.Direction).GetNext(Matcher.Direction); + ShapeNode matchStartNode = input.Shape.GetStartNode(targetMatch.Range, Matcher.Direction); + start = matchStartNode.GetNext(Matcher.Direction); } if (start == null) break; - targetMatch = Matcher.Match(input, start); + targetMatch = Matcher.Match(input, input.Shape.MatchStartOffset(start, Matcher.Direction)); } if (applied) diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/MetathesisRule.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/MetathesisRule.cs index 0fd0693c7..b926a69c9 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/MetathesisRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/MetathesisRule.cs @@ -13,23 +13,23 @@ public class MetathesisRule : HCRuleBase, IPhonologicalRule { public MetathesisRule() { - Pattern = Pattern.New().Value; + Pattern = Pattern.New().Value; } public Direction Direction { get; set; } - public Pattern Pattern { get; set; } + public Pattern Pattern { get; set; } public string LeftSwitchName { get; set; } public string RightSwitchName { get; set; } - public override IRule CompileAnalysisRule(Morpher morpher) + public override IRule CompileAnalysisRule(Morpher morpher) { return new AnalysisMetathesisRule(morpher, this); } - public override IRule CompileSynthesisRule(Morpher morpher) + public override IRule CompileSynthesisRule(Morpher morpher) { return new SynthesisMetathesisRule(morpher, this); } diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/NarrowAnalysisRewriteRuleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/NarrowAnalysisRewriteRuleSpec.cs index 469f3020a..df0c7aede 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/NarrowAnalysisRewriteRuleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/NarrowAnalysisRewriteRuleSpec.cs @@ -8,12 +8,12 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { public class NarrowAnalysisRewriteRuleSpec : RewriteRuleSpec { - private readonly Pattern _analysisRhs; + private readonly Pattern _analysisRhs; private readonly int _targetCount; public NarrowAnalysisRewriteRuleSpec( - MatcherSettings matcherSettings, - Pattern lhs, + MatcherSettings matcherSettings, + Pattern lhs, RewriteSubrule subrule ) : base(subrule.Rhs.IsEmpty) @@ -24,7 +24,7 @@ RewriteSubrule subrule if (subrule.Rhs.IsEmpty) { Pattern.Children.Add( - new Constraint( + new Constraint( FeatureStruct.New().Symbol(HCFeatureSystem.Segment, HCFeatureSystem.Anchor).Value ) ); @@ -38,12 +38,10 @@ RewriteSubrule subrule SubruleSpecs.Add(new AnalysisRewriteSubruleSpec(matcherSettings, subrule, Unapply)); } - private void Unapply(Match targetMatch, Range range, VariableBindings varBindings) + private void Unapply(Match targetMatch, Range range, VariableBindings varBindings) { ShapeNode curNode = IsTargetEmpty ? range.Start : range.End; - foreach ( - Constraint constraint in _analysisRhs.Children.Cast>() - ) + foreach (Constraint constraint in _analysisRhs.Children.Cast>()) { FeatureStruct fs = constraint.FeatureStruct.Clone(); if (varBindings != null) diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/NarrowSynthesisRewriteSubruleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/NarrowSynthesisRewriteSubruleSpec.cs index 99957f357..c6fd2d182 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/NarrowSynthesisRewriteSubruleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/NarrowSynthesisRewriteSubruleSpec.cs @@ -7,11 +7,11 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { public class NarrowSynthesisRewriteSubruleSpec : SynthesisRewriteSubruleSpec { - private readonly Pattern _rhs; + private readonly Pattern _rhs; private readonly int _targetCount; public NarrowSynthesisRewriteSubruleSpec( - MatcherSettings matcherSettings, + MatcherSettings matcherSettings, bool isIterative, int targetCount, RewriteSubrule subrule, @@ -24,15 +24,15 @@ int index } public override void ApplyRhs( - Match targetMatch, + Match targetMatch, Range range, VariableBindings varBindings ) { ShapeNode curNode = range.End; - foreach (PatternNode node in _rhs.Children) + foreach (PatternNode node in _rhs.Children) { - var constraint = (Constraint)node; + var constraint = (Constraint)node; FeatureStruct fs = constraint.FeatureStruct.Clone(); if (varBindings != null) fs.ReplaceVariables(varBindings); diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/PhonologicalPatternRule.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/PhonologicalPatternRule.cs index b55bf7d2a..ca16bb736 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/PhonologicalPatternRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/PhonologicalPatternRule.cs @@ -5,21 +5,18 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { - public abstract class PhonologicalPatternRule : IRule + public abstract class PhonologicalPatternRule : IRule { private readonly IPhonologicalPatternRuleSpec _ruleSpec; - private readonly Matcher _matcher; + private readonly Matcher _matcher; - protected PhonologicalPatternRule( - IPhonologicalPatternRuleSpec ruleSpec, - MatcherSettings matcherSettings - ) + protected PhonologicalPatternRule(IPhonologicalPatternRuleSpec ruleSpec, MatcherSettings matcherSettings) { _ruleSpec = ruleSpec; - _matcher = new Matcher(_ruleSpec.Pattern, matcherSettings); + _matcher = new Matcher(_ruleSpec.Pattern, matcherSettings); } - public Matcher Matcher + public Matcher Matcher { get { return _matcher; } } diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteRule.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteRule.cs index 30aa423c6..1972fdb71 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteRule.cs @@ -18,11 +18,11 @@ public class RewriteRule : HCRuleBase, IPhonologicalRule public RewriteRule() { - Lhs = Pattern.New().Value; + Lhs = Pattern.New().Value; _subrules = new List(); } - public Pattern Lhs { get; set; } + public Pattern Lhs { get; set; } public IList Subrules { @@ -33,12 +33,12 @@ public IList Subrules public RewriteApplicationMode ApplicationMode { get; set; } - public override IRule CompileAnalysisRule(Morpher morpher) + public override IRule CompileAnalysisRule(Morpher morpher) { return new AnalysisRewriteRule(morpher, this); } - public override IRule CompileSynthesisRule(Morpher morpher) + public override IRule CompileSynthesisRule(Morpher morpher) { return new SynthesisRewriteRule(morpher, this); } diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteRuleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteRuleSpec.cs index f91899d61..2e60d8f4f 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteRuleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteRuleSpec.cs @@ -8,18 +8,18 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { public abstract class RewriteRuleSpec : IPhonologicalPatternRuleSpec { - private readonly Pattern _pattern; + private readonly Pattern _pattern; private readonly List _subruleSpecs; private readonly bool _isTargetEmpty; protected RewriteRuleSpec(bool isTargetEmpty) { - _pattern = new Pattern(); + _pattern = new Pattern(); _subruleSpecs = new List(); _isTargetEmpty = isTargetEmpty; } - public Pattern Pattern + public Pattern Pattern { get { return _pattern; } } @@ -36,10 +36,16 @@ protected bool IsTargetEmpty public bool MatchSubrule( PhonologicalPatternRule rule, - Match match, + Match match, out PhonologicalSubruleMatch subruleMatch ) { + // RUSTIFY Stage 2: match.Range is now Range ([leftmostTag, rightmostTag+1)); resolve its + // bracketing nodes via the shape once (match.Input/match.Range are invariant across the + // subrules tried below), then navigate the segment graph as before. + Shape shape = match.Input.Shape; + ShapeNode rangeStart = shape.NodeAt(match.Range.Start); + ShapeNode rangeEnd = shape.NodeAt(match.Range.End - 1); foreach (RewriteSubruleSpec subruleSpec in _subruleSpecs) { if (!subruleSpec.IsApplicable(match.Input)) @@ -53,13 +59,13 @@ out PhonologicalSubruleMatch subruleMatch { if (match.Matcher.Direction == Direction.LeftToRight) { - leftNode = match.Range.Start; - rightNode = match.Range.End.Next; + leftNode = rangeStart; + rightNode = rangeEnd.Next; } else { - leftNode = match.Range.Start.Prev; - rightNode = match.Range.End; + leftNode = rangeStart.Prev; + rightNode = rangeEnd; } startNode = leftNode; @@ -67,10 +73,10 @@ out PhonologicalSubruleMatch subruleMatch } else { - leftNode = match.Range.Start.Prev; - rightNode = match.Range.End.Next; - startNode = match.Range.Start; - endNode = match.Range.End; + leftNode = rangeStart.Prev; + rightNode = rangeEnd.Next; + startNode = rangeStart; + endNode = rangeEnd; } if (leftNode == null || rightNode == null) @@ -80,9 +86,10 @@ out PhonologicalSubruleMatch subruleMatch } VariableBindings varBindings = match.VariableBindings; - Match leftEnvMatch = subruleSpec.LeftEnvironmentMatcher?.Match( + // left environment is matched right-to-left (see RewriteSubruleSpec) + Match leftEnvMatch = subruleSpec.LeftEnvironmentMatcher?.Match( match.Input, - leftNode, + shape.MatchStartOffset(leftNode, Direction.RightToLeft), varBindings ); if (leftEnvMatch == null || leftEnvMatch.Success) @@ -90,9 +97,10 @@ out PhonologicalSubruleMatch subruleMatch if (leftEnvMatch != null && leftEnvMatch.VariableBindings != null) varBindings = leftEnvMatch.VariableBindings; - Match rightEnvMatch = subruleSpec.RightEnvironmentMatcher?.Match( + // right environment is matched left-to-right (see RewriteSubruleSpec) + Match rightEnvMatch = subruleSpec.RightEnvironmentMatcher?.Match( match.Input, - rightNode, + shape.MatchStartOffset(rightNode, Direction.LeftToRight), varBindings ); if (rightEnvMatch == null || rightEnvMatch.Success) diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteSubrule.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteSubrule.cs index 66331295d..9db8ab38c 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteSubrule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteSubrule.cs @@ -11,17 +11,17 @@ public class RewriteSubrule public RewriteSubrule() { - Rhs = Pattern.New().Value; - LeftEnvironment = Pattern.New().Value; - RightEnvironment = Pattern.New().Value; + Rhs = Pattern.New().Value; + LeftEnvironment = Pattern.New().Value; + RightEnvironment = Pattern.New().Value; RequiredSyntacticFeatureStruct = FeatureStruct.New().Value; _requiredMprFeatures = new MprFeatureSet(); _excludedMprFeatures = new MprFeatureSet(); } - public Pattern Rhs { get; set; } - public Pattern LeftEnvironment { get; set; } - public Pattern RightEnvironment { get; set; } + public Pattern Rhs { get; set; } + public Pattern LeftEnvironment { get; set; } + public Pattern RightEnvironment { get; set; } public FeatureStruct RequiredSyntacticFeatureStruct { get; set; } public MprFeatureSet RequiredMprFeatures { diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteSubruleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteSubruleSpec.cs index a8219cc83..1fa74dbc9 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteSubruleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/RewriteSubruleSpec.cs @@ -7,38 +7,38 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { public abstract class RewriteSubruleSpec : IPhonologicalPatternSubruleSpec { - private readonly Matcher _leftEnvMatcher; - private readonly Matcher _rightEnvMatcher; + private readonly Matcher _leftEnvMatcher; + private readonly Matcher _rightEnvMatcher; protected RewriteSubruleSpec( - MatcherSettings matcherSettings, - Pattern leftEnv, - Pattern rightEnv + MatcherSettings matcherSettings, + Pattern leftEnv, + Pattern rightEnv ) { if (leftEnv != null && !leftEnv.IsEmpty) { - MatcherSettings leftEnvMatcherSettings = matcherSettings.Clone(); + MatcherSettings leftEnvMatcherSettings = matcherSettings.Clone(); leftEnvMatcherSettings.Direction = Direction.RightToLeft; leftEnvMatcherSettings.AnchoredToStart = true; - _leftEnvMatcher = new Matcher(leftEnv, leftEnvMatcherSettings); + _leftEnvMatcher = new Matcher(leftEnv, leftEnvMatcherSettings); } if (rightEnv != null && !rightEnv.IsEmpty) { - MatcherSettings rightEnvMatcherSettings = matcherSettings.Clone(); + MatcherSettings rightEnvMatcherSettings = matcherSettings.Clone(); rightEnvMatcherSettings.Direction = Direction.LeftToRight; rightEnvMatcherSettings.AnchoredToStart = true; - _rightEnvMatcher = new Matcher(rightEnv, rightEnvMatcherSettings); + _rightEnvMatcher = new Matcher(rightEnv, rightEnvMatcherSettings); } } - public Matcher LeftEnvironmentMatcher + public Matcher LeftEnvironmentMatcher { get { return _leftEnvMatcher; } } - public Matcher RightEnvironmentMatcher + public Matcher RightEnvironmentMatcher { get { return _rightEnvMatcher; } } @@ -49,7 +49,7 @@ public virtual bool IsApplicable(Word input) } public abstract void ApplyRhs( - Match targetMatch, + Match targetMatch, Range range, VariableBindings varBindings ); diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SimultaneousPhonologicalPatternRule.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SimultaneousPhonologicalPatternRule.cs index e95ab5690..3c965fe56 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SimultaneousPhonologicalPatternRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SimultaneousPhonologicalPatternRule.cs @@ -12,7 +12,7 @@ public class SimultaneousPhonologicalPatternRule : PhonologicalPatternRule public SimultaneousPhonologicalPatternRule( IPhonologicalPatternRuleSpec ruleSpec, - MatcherSettings matcherSettings + MatcherSettings matcherSettings ) : base(ruleSpec, matcherSettings) { @@ -21,15 +21,15 @@ MatcherSettings matcherSettings public override IEnumerable Apply(Word input) { - var matches = new List, PhonologicalSubruleMatch>>(); - foreach (Match targetMatch in Matcher.AllMatches(input)) + var matches = new List, PhonologicalSubruleMatch>>(); + foreach (Match targetMatch in Matcher.AllMatches(input)) { PhonologicalSubruleMatch srMatch; if (_ruleSpec.MatchSubrule(this, targetMatch, out srMatch)) matches.Add(Tuple.Create(targetMatch, srMatch)); } - foreach (Tuple, PhonologicalSubruleMatch> match in matches) + foreach (Tuple, PhonologicalSubruleMatch> match in matches) match.Item2.SubruleSpec.ApplyRhs(match.Item1, match.Item2.Range, match.Item2.VariableBindings); return input.ToEnumerable(); diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisMetathesisRule.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisMetathesisRule.cs index 2ea546df2..2d8c3af5a 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisMetathesisRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisMetathesisRule.cs @@ -7,7 +7,7 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { - public class SynthesisMetathesisRule : IRule + public class SynthesisMetathesisRule : IRule { private readonly Morpher _morpher; private readonly MetathesisRule _rule; @@ -20,7 +20,7 @@ public SynthesisMetathesisRule(Morpher morpher, MetathesisRule rule) var ruleSpec = new SynthesisMetathesisRuleSpec(rule.Pattern, rule.LeftSwitchName, rule.RightSwitchName); - var settings = new MatcherSettings + var settings = new MatcherSettings { Direction = rule.Direction, Filter = ann => diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisMetathesisRuleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisMetathesisRuleSpec.cs index 312e0a677..01e1a27f2 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisMetathesisRuleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisMetathesisRuleSpec.cs @@ -8,30 +8,24 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { public class SynthesisMetathesisRuleSpec : IPhonologicalPatternRuleSpec, IPhonologicalPatternSubruleSpec { - private readonly Pattern _pattern; + private readonly Pattern _pattern; private readonly string _leftGroupName; private readonly string _rightGroupName; - public SynthesisMetathesisRuleSpec( - Pattern pattern, - string leftGroupName, - string rightGroupName - ) + public SynthesisMetathesisRuleSpec(Pattern pattern, string leftGroupName, string rightGroupName) { _leftGroupName = leftGroupName; _rightGroupName = rightGroupName; - _pattern = new Pattern(); - foreach (PatternNode node in pattern.Children) + _pattern = new Pattern(); + foreach (PatternNode node in pattern.Children) { - if (node is Group group) + if (node is Group group) { - var newGroup = new Group(group.Name); - foreach ( - Constraint constraint in group.Children.Cast>() - ) + var newGroup = new Group(group.Name); + foreach (Constraint constraint in group.Children.Cast>()) { - Constraint newConstraint = constraint.Clone(); + Constraint newConstraint = constraint.Clone(); newConstraint.FeatureStruct.AddValue(HCFeatureSystem.Modified, HCFeatureSystem.Clean); newGroup.Children.Add(newConstraint); } @@ -45,27 +39,31 @@ Constraint constraint in group.Children.Cast Pattern + public Pattern Pattern { get { return _pattern; } } public bool MatchSubrule( PhonologicalPatternRule rule, - Match match, + Match match, out PhonologicalSubruleMatch subruleMatch ) { - subruleMatch = new PhonologicalSubruleMatch(this, match.Range, match.VariableBindings); + subruleMatch = new PhonologicalSubruleMatch( + this, + match.Input.Shape.ToShapeRange(match.Range), + match.VariableBindings + ); return true; } - Matcher IPhonologicalPatternSubruleSpec.LeftEnvironmentMatcher + Matcher IPhonologicalPatternSubruleSpec.LeftEnvironmentMatcher { get { return null; } } - Matcher IPhonologicalPatternSubruleSpec.RightEnvironmentMatcher + Matcher IPhonologicalPatternSubruleSpec.RightEnvironmentMatcher { get { return null; } } @@ -75,18 +73,35 @@ bool IPhonologicalPatternSubruleSpec.IsApplicable(Word input) return true; } - public void ApplyRhs(Match targetMatch, Range range, VariableBindings varBindings) + public void ApplyRhs(Match targetMatch, Range range, VariableBindings varBindings) { - ShapeNode start = null, - end = null; - foreach (GroupCapture gc in targetMatch.GroupCaptures) + // RUSTIFY Stage 2: group captures are int offsets that go stale on the first structural + // mutation (morph.Remove / MoveNodesAfter re-densify the projection), so resolve EVERYTHING + // to ShapeNode refs up front — those survive the moves, as the old ShapeNode ranges did. + Shape shape = targetMatch.Input.Shape; + int? startTag = null, + endTag = null; + foreach (GroupCapture gc in targetMatch.GroupCaptures) { - if (start == null || gc.Range.Start.CompareTo(start) < 0) - start = gc.Range.Start; - if (end == null || gc.Range.End.CompareTo(end) > 0) - end = gc.Range.End; + if (!gc.Success) + continue; + if (startTag == null || gc.Range.Start < startTag) + startTag = gc.Range.Start; + if (endTag == null || gc.Range.End > endTag) + endTag = gc.Range.End; } - Debug.Assert(start != null && end != null); + Debug.Assert(startTag != null && endTag != null); + ShapeNode start = shape.NodeAt(startTag.Value); + ShapeNode end = shape.NodeAt(endTag.Value - 1); + + GroupCapture leftGroup = targetMatch.GroupCaptures[_leftGroupName]; + GroupCapture rightGroup = targetMatch.GroupCaptures[_rightGroupName]; + Range leftRange = shape.ToShapeRange(leftGroup.Range); + Range rightRange = shape.ToShapeRange(rightGroup.Range); + // Already resolved above via ToShapeRange (leftRange.End == EndNode(leftGroup.Range), + // rightRange.Start == NodeAt(rightGroup.Range.Start)) — reuse instead of a second NodeAt lookup. + ShapeNode leftEnd = leftRange.End; + ShapeNode beforeRightGroup = rightRange.Start.Prev; var morphs = targetMatch .Input.Morphs.Where(ann => ann.Range.Overlaps(start, end)) @@ -95,12 +110,8 @@ public void ApplyRhs(Match targetMatch, Range range, foreach (var morph in morphs) morph.Annotation.Remove(); - GroupCapture leftGroup = targetMatch.GroupCaptures[_leftGroupName]; - GroupCapture rightGroup = targetMatch.GroupCaptures[_rightGroupName]; - - ShapeNode beforeRightGroup = rightGroup.Range.Start.Prev; - MoveNodesAfter(targetMatch.Input.Shape, leftGroup.Range.End, rightGroup.Range); - MoveNodesAfter(targetMatch.Input.Shape, beforeRightGroup, leftGroup.Range); + MoveNodesAfter(shape, leftEnd, rightRange); + MoveNodesAfter(shape, beforeRightGroup, leftRange); foreach (var morph in morphs) { @@ -110,7 +121,7 @@ public void ApplyRhs(Match targetMatch, Range range, morph.Annotation.FeatureStruct ); newMorphAnn.Children.AddRange(morph.Children); - targetMatch.Input.Annotations.Add(newMorphAnn, false); + shape.Annotations.Add(newMorphAnn, false); } } diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteRule.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteRule.cs index 1dc7e3ca5..ecf84a7dc 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteRule.cs @@ -8,7 +8,7 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules { - public class SynthesisRewriteRule : IRule + public class SynthesisRewriteRule : IRule { private readonly Morpher _morpher; private readonly RewriteRule _rule; @@ -19,7 +19,7 @@ public SynthesisRewriteRule(Morpher morpher, RewriteRule rule) _morpher = morpher; _rule = rule; - var settings = new MatcherSettings + var settings = new MatcherSettings { Direction = rule.Direction, Filter = ann => diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteRuleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteRuleSpec.cs index 87fa10955..bab19a4e8 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteRuleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteRuleSpec.cs @@ -11,9 +11,9 @@ namespace SIL.Machine.Morphology.HermitCrab.PhonologicalRules public class SynthesisRewriteRuleSpec : RewriteRuleSpec { public SynthesisRewriteRuleSpec( - MatcherSettings matcherSettings, + MatcherSettings matcherSettings, bool isIterative, - Pattern lhs, + Pattern lhs, IEnumerable subrules ) : base(lhs.IsEmpty) @@ -23,14 +23,14 @@ IEnumerable subrules if (lhs.IsEmpty) { Pattern.Children.Add( - new Constraint( + new Constraint( FeatureStruct.New().Symbol(HCFeatureSystem.Segment, HCFeatureSystem.Anchor).Value ) ); } else { - foreach (Constraint constraint in lhs.Children.Cast>()) + foreach (Constraint constraint in lhs.Children.Cast>()) { var newConstraint = constraint.Clone(); if (isIterative) @@ -82,15 +82,15 @@ IEnumerable subrules } } - private static bool CheckTarget(Match match, Pattern lhs) + private static bool CheckTarget(Match match, Pattern lhs) { foreach ( - Tuple> tuple in match + Tuple> tuple in match .Input.Shape.GetNodes(match.Range) .Zip(lhs.Children) ) { - var constraints = (Constraint)tuple.Item2; + var constraints = (Constraint)tuple.Item2; if (tuple.Item1.Annotation.Type() != constraints.Type()) return false; } diff --git a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteSubruleSpec.cs b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteSubruleSpec.cs index a604aaba9..2c93bccd1 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteSubruleSpec.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/PhonologicalRules/SynthesisRewriteSubruleSpec.cs @@ -11,7 +11,7 @@ public abstract class SynthesisRewriteSubruleSpec : RewriteSubruleSpec private readonly bool _isIterative; protected SynthesisRewriteSubruleSpec( - MatcherSettings matcherSettings, + MatcherSettings matcherSettings, bool isIterative, RewriteSubrule subrule, int index diff --git a/src/SIL.Machine.Morphology.HermitCrab/Stratum.cs b/src/SIL.Machine.Morphology.HermitCrab/Stratum.cs index f0fc7e197..823e8402b 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/Stratum.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/Stratum.cs @@ -125,12 +125,12 @@ public ICollection Entries /// The morphological rule order. public MorphologicalRuleOrder MorphologicalRuleOrder { get; set; } - public override IRule CompileAnalysisRule(Morpher morpher) + public override IRule CompileAnalysisRule(Morpher morpher) { return new AnalysisStratumRule(morpher, this); } - public override IRule CompileSynthesisRule(Morpher morpher) + public override IRule CompileSynthesisRule(Morpher morpher) { return new SynthesisStratumRule(morpher, this); } diff --git a/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplateRule.cs b/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplateRule.cs index e8cf7ee17..21248d002 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplateRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplateRule.cs @@ -6,18 +6,18 @@ namespace SIL.Machine.Morphology.HermitCrab { - internal class SynthesisAffixTemplateRule : IRule + internal class SynthesisAffixTemplateRule : IRule { private readonly Morpher _morpher; private readonly AffixTemplate _template; - private readonly List> _rules; + private readonly List> _rules; public SynthesisAffixTemplateRule(Morpher morpher, AffixTemplate template) { _morpher = morpher; _template = template; - _rules = new List>( - template.Slots.Select(slot => new RuleBatch( + _rules = new List>( + template.Slots.Select(slot => new RuleBatch( slot.Rules.Select(mr => mr.CompileSynthesisRule(morpher)), false, FreezableEqualityComparer.Default diff --git a/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplatesRule.cs b/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplatesRule.cs index 1d878cd75..a5ab1aa2a 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplatesRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/SynthesisAffixTemplatesRule.cs @@ -7,12 +7,12 @@ namespace SIL.Machine.Morphology.HermitCrab { - internal class SynthesisAffixTemplatesRule : IRule + internal class SynthesisAffixTemplatesRule : IRule { private readonly Morpher _morpher; private readonly Stratum _stratum; private readonly List _templates; - private readonly List> _templateRules; + private readonly List> _templateRules; public SynthesisAffixTemplatesRule(Morpher morpher, Stratum stratum) { diff --git a/src/SIL.Machine.Morphology.HermitCrab/SynthesisStratumRule.cs b/src/SIL.Machine.Morphology.HermitCrab/SynthesisStratumRule.cs index a15a6de5b..72ff8b24b 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/SynthesisStratumRule.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/SynthesisStratumRule.cs @@ -7,10 +7,10 @@ namespace SIL.Machine.Morphology.HermitCrab { - internal class SynthesisStratumRule : IRule + internal class SynthesisStratumRule : IRule { - private readonly IRule _mrulesRule; - private readonly IRule _prulesRule; + private readonly IRule _mrulesRule; + private readonly IRule _prulesRule; private readonly SynthesisAffixTemplatesRule _templatesRule; private readonly Stratum _stratum; private readonly Morpher _morpher; @@ -19,27 +19,27 @@ public SynthesisStratumRule(Morpher morpher, Stratum stratum) { _templatesRule = new SynthesisAffixTemplatesRule(morpher, stratum); _mrulesRule = null; - IEnumerable> mrules = stratum.MorphologicalRules.Select(mrule => + IEnumerable> mrules = stratum.MorphologicalRules.Select(mrule => mrule.CompileSynthesisRule(morpher) ); switch (stratum.MorphologicalRuleOrder) { case MorphologicalRuleOrder.Linear: - _mrulesRule = new LinearRuleCascade( + _mrulesRule = new LinearRuleCascade( mrules, true, FreezableEqualityComparer.Default ); break; case MorphologicalRuleOrder.Unordered: - _mrulesRule = new CombinationRuleCascade( + _mrulesRule = new CombinationRuleCascade( mrules, true, FreezableEqualityComparer.Default ); break; } - _prulesRule = new LinearRuleCascade( + _prulesRule = new LinearRuleCascade( stratum.PhonologicalRules.Select(prule => prule.CompileSynthesisRule(morpher)) ); _stratum = stratum; diff --git a/src/SIL.Machine.Morphology.HermitCrab/Word.cs b/src/SIL.Machine.Morphology.HermitCrab/Word.cs index 9b29429e9..96748875f 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/Word.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/Word.cs @@ -10,7 +10,7 @@ namespace SIL.Machine.Morphology.HermitCrab { - public class Word : Freezable, IAnnotatedData, ICloneable + public class Word : Freezable, IAnnotatedData, ICloneable { public const string RootMorphID = "ROOT"; @@ -19,8 +19,12 @@ public class Word : Freezable, IAnnotatedData, ICloneable private Shape _shape; private readonly List _mruleApps; private int _mruleAppIndex = -1; - private readonly Dictionary _mrulesUnapplied; - private readonly Dictionary _mrulesApplied; + + // RUSTIFY lever 2: lazily allocated — these morphological-rule bookkeeping maps stay empty through + // the phonological-analysis cascade (where ~345 clones/word happen), so cloning them eagerly per + // candidate allocated an empty dictionary for nothing. Null means empty; created on first write. + private Dictionary _mrulesUnapplied; + private Dictionary _mrulesApplied; private readonly List _nonHeadApps; private int _nonHeadAppIndex = -1; private readonly MprFeatureSet _mprFeatures; @@ -29,7 +33,7 @@ public class Word : Freezable, IAnnotatedData, ICloneable private Stratum _stratum; private bool? _isLastAppliedRuleFinal; private bool _isPartial; - private readonly Dictionary> _disjunctiveAllomorphIndices; + private Dictionary> _disjunctiveAllomorphIndices; // lazily allocated (see above) private int _mruleAppCount = 0; private readonly IList _alternatives = new List(); @@ -42,12 +46,10 @@ public Word(RootAllomorph rootAllomorph, FeatureStruct realizationalFS) SetRootAllomorph(rootAllomorph); RealizationalFeatureStruct = realizationalFS; _mruleApps = new List(); - _mrulesUnapplied = new Dictionary(); - _mrulesApplied = new Dictionary(); + // _mrulesUnapplied / _mrulesApplied / _disjunctiveAllomorphIndices are lazily allocated (null = empty). _nonHeadApps = new List(); _obligatorySyntacticFeatures = new IDBearerSet(); _isLastAppliedRuleFinal = null; - _disjunctiveAllomorphIndices = new Dictionary>(); } public Word(Stratum stratum, Shape shape) @@ -60,13 +62,11 @@ public Word(Stratum stratum, Shape shape) RealizationalFeatureStruct = new FeatureStruct(); _mprFeatures = new MprFeatureSet(); _mruleApps = new List(); - _mrulesUnapplied = new Dictionary(); - _mrulesApplied = new Dictionary(); + // _mrulesUnapplied / _mrulesApplied / _disjunctiveAllomorphIndices are lazily allocated (null = empty). _nonHeadApps = new List(); _obligatorySyntacticFeatures = new IDBearerSet(); _isLastAppliedRuleFinal = null; _isPartial = false; - _disjunctiveAllomorphIndices = new Dictionary>(); } protected Word(Word word) @@ -82,18 +82,29 @@ protected Word(Word word) _mprFeatures = word.MprFeatures.Clone(); _mruleApps = new List(word._mruleApps); _mruleAppIndex = word._mruleAppIndex; - _mrulesUnapplied = new Dictionary(word._mrulesUnapplied); - _mrulesApplied = new Dictionary(word._mrulesApplied); + // Lazily-allocated maps: copy only when the source actually has entries (null = empty), so a + // candidate cloned during phonological analysis allocates none of these dictionaries. + _mrulesUnapplied = + word._mrulesUnapplied == null || word._mrulesUnapplied.Count == 0 + ? null + : new Dictionary(word._mrulesUnapplied); + _mrulesApplied = + word._mrulesApplied == null || word._mrulesApplied.Count == 0 + ? null + : new Dictionary(word._mrulesApplied); _nonHeadApps = new List(word._nonHeadApps.CloneItems()); _nonHeadAppIndex = word._nonHeadAppIndex; _obligatorySyntacticFeatures = new IDBearerSet(word._obligatorySyntacticFeatures); _isLastAppliedRuleFinal = word._isLastAppliedRuleFinal; _isPartial = word._isPartial; CurrentTrace = word.CurrentTrace; - _disjunctiveAllomorphIndices = word._disjunctiveAllomorphIndices.ToDictionary( - kvp => kvp.Key, - kvp => new HashSet(kvp.Value) - ); + _disjunctiveAllomorphIndices = + word._disjunctiveAllomorphIndices == null || word._disjunctiveAllomorphIndices.Count == 0 + ? null + : word._disjunctiveAllomorphIndices.ToDictionary( + kvp => kvp.Key, + kvp => new HashSet(kvp.Value) + ); _mruleAppCount = word._mruleAppCount; } @@ -102,7 +113,7 @@ public IEnumerable> Morphs get { var morphs = new List>(); - foreach (Annotation ann in Annotations) + foreach (Annotation ann in _shape.Annotations) { ann.PostorderTraverse(a => { @@ -173,14 +184,17 @@ public ICollection ObligatorySyntacticFeatures get { return _obligatorySyntacticFeatures; } } - public Range Range + // RUSTIFY Stage 2: Word is the FST's IAnnotatedData and the FST now binds as Fst + // (offset = node Tag), so these expose the shape's int-offset projection. Code that wants the + // ShapeNode-level annotations/range uses word.Shape.Annotations / word.Shape.Range directly. + public Range Range { - get { return _shape.Range; } + get { return _shape.IntRange; } } - public AnnotationList Annotations + public AnnotationList Annotations { - get { return _shape.Annotations; } + get { return _shape.IntAnnotations; } } public Stratum Stratum @@ -318,7 +332,11 @@ internal void MorphologicalRuleUnapplied(IMorphologicalRule mrule) { CheckFrozen(); if (mrule != null) - _mrulesUnapplied.UpdateValue(mrule, () => 0, count => count + 1); + (_mrulesUnapplied = _mrulesUnapplied ?? new Dictionary()).UpdateValue( + mrule, + () => 0, + count => count + 1 + ); if (!(mrule is RealizationalAffixProcessRule)) { _mruleApps.Add(mrule); @@ -333,7 +351,7 @@ internal void MorphologicalRuleUnapplied(IMorphologicalRule mrule) /// The number of unapplications. internal int GetUnapplicationCount(IMorphologicalRule mrule) { - if (!_mrulesUnapplied.TryGetValue(mrule, out int numUnapplies)) + if (_mrulesUnapplied == null || !_mrulesUnapplied.TryGetValue(mrule, out int numUnapplies)) numUnapplies = 0; return numUnapplies; } @@ -349,9 +367,15 @@ internal void MorphologicalRuleApplied(IMorphologicalRule mrule, IEnumerable 0, count => count + 1); + (_mrulesApplied = _mrulesApplied ?? new Dictionary()).UpdateValue( + mrule, + () => 0, + count => count + 1 + ); if (allomorphIndices != null) - _disjunctiveAllomorphIndices.GetOrCreate(_mruleAppCount.ToString()).UnionWith(allomorphIndices); + (_disjunctiveAllomorphIndices = _disjunctiveAllomorphIndices ?? new Dictionary>()) + .GetOrCreate(_mruleAppCount.ToString()) + .UnionWith(allomorphIndices); _mruleAppCount++; } @@ -372,7 +396,7 @@ internal bool? IsLastAppliedRuleFinal /// The number of applications. internal int GetApplicationCount(IMorphologicalRule mrule) { - if (!_mrulesApplied.TryGetValue(mrule, out int numApplies)) + if (_mrulesApplied == null || !_mrulesApplied.TryGetValue(mrule, out int numApplies)) numApplies = 0; return numApplies; } @@ -464,7 +488,10 @@ internal IEnumerable> GetMorphs(Allomorph allomorph) internal IEnumerable GetDisjunctiveAllomorphApplications(Annotation morph) { var morphID = (string)morph.FeatureStruct.GetValue(HCFeatureSystem.MorphID); - if (_disjunctiveAllomorphIndices.TryGetValue(morphID, out HashSet indices)) + if ( + _disjunctiveAllomorphIndices != null + && _disjunctiveAllomorphIndices.TryGetValue(morphID, out HashSet indices) + ) return indices; return null; } @@ -508,6 +535,13 @@ internal void ResetDirty() protected override int FreezeImpl() { int code = 23; + // Freezing SyntacticFeatureStruct is correctness hardening only: it makes the 8 + // mutate-after-freeze call sites elsewhere in this namespace (which clone-then-reassign + // rather than mutate in place, see AnalysisAffixTemplateRule etc.) actually enforced by + // CheckFrozen(), instead of the invariant being purely conventional. Deliberately NOT + // folded into the frozen hash/ValueEquals below — those predate this and dedup must stay + // unchanged. + SyntacticFeatureStruct.Freeze(); _shape.Freeze(); code = code * 31 + _shape.GetFrozenHashCode(); _realizationalFS.Freeze(); diff --git a/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageLoader.cs b/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageLoader.cs index 6469f7990..efea6e214 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageLoader.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageLoader.cs @@ -540,12 +540,12 @@ CharacterDefinitionTable defaultTable { var variables = new Dictionary>(); - Pattern leftEnv = LoadPhoneticTemplate( + Pattern leftEnv = LoadPhoneticTemplate( envElem.Elements("LeftEnvironment").Elements("PhoneticTemplate").SingleOrDefault(), variables, defaultTable ); - Pattern rightEnv = LoadPhoneticTemplate( + Pattern rightEnv = LoadPhoneticTemplate( envElem.Elements("RightEnvironment").Elements("PhoneticTemplate").SingleOrDefault(), variables, defaultTable @@ -1078,7 +1078,7 @@ private void LoadMorphologicalLhs( XElement reqPhonInputElem, Dictionary> variables, Dictionary partNames, - IList> lhs, + IList> lhs, CharacterDefinitionTable defaultTable, string partNamePrefix = null ) @@ -1377,7 +1377,7 @@ private Dictionary> LoadVariables(XElemen return variables; } - private IEnumerable> LoadPatternNodes( + private IEnumerable> LoadPatternNodes( XElement pseqElem, Dictionary> variables, CharacterDefinitionTable defaultTable, @@ -1386,12 +1386,12 @@ Dictionary groupNames { foreach (XElement recElem in pseqElem.Elements()) { - PatternNode node = null; + PatternNode node = null; switch (recElem.Name.LocalName) { case "SimpleContext": SimpleContext simpleCtxt = LoadSimpleContext(recElem, variables); - node = new Constraint(simpleCtxt.FeatureStruct) { Tag = simpleCtxt }; + node = new Constraint(simpleCtxt.FeatureStruct) { Tag = simpleCtxt }; break; case "Segment": @@ -1399,7 +1399,7 @@ Dictionary groupNames CharacterDefinition cd = _charDefs[ (string)recElem.Attribute(recElem.Name.LocalName == "Segment" ? "segment" : "boundary") ]; - node = new Constraint(cd.FeatureStruct) { Tag = cd }; + node = new Constraint(cd.FeatureStruct) { Tag = cd }; break; case "OptionalSegmentSequence": @@ -1407,10 +1407,10 @@ Dictionary groupNames int min = string.IsNullOrEmpty(minStr) ? 0 : int.Parse(minStr); var maxStr = (string)recElem.Attribute("max"); int max = string.IsNullOrEmpty(maxStr) ? -1 : int.Parse(maxStr); - node = new Quantifier( + node = new Quantifier( min, max, - new Group(LoadPatternNodes(recElem, variables, defaultTable, groupNames)) + new Group(LoadPatternNodes(recElem, variables, defaultTable, groupNames)) ); break; @@ -1418,8 +1418,8 @@ Dictionary groupNames CharacterDefinitionTable segsTable = GetTable(recElem, defaultTable); var shapeStr = (string)recElem.Element("PhoneticShape"); var segments = new Segments(segsTable, shapeStr); - node = new Group( - segments.Shape.Select(n => new Constraint(n.Annotation.FeatureStruct)) + node = new Group( + segments.Shape.Select(n => new Constraint(n.Annotation.FeatureStruct)) ) { Tag = segments, @@ -1433,7 +1433,7 @@ Dictionary groupNames if (groupNames == null || string.IsNullOrEmpty(id) || !groupNames.TryGetValue(id, out groupName)) yield return node; else - yield return new Group(groupName, node); + yield return new Group(groupName, node); } } @@ -1460,20 +1460,20 @@ Dictionary> variables return new SimpleContext(nc, ctxtVars); } - private Pattern LoadPhoneticTemplate( + private Pattern LoadPhoneticTemplate( XElement ptempElem, Dictionary> variables, CharacterDefinitionTable defaultTable = null, Dictionary groupNames = null ) { - var pattern = new Pattern(); + var pattern = new Pattern(); if (ptempElem != null) { if ((string)ptempElem.Attribute("initialBoundaryCondition") == "true") - pattern.Children.Add(new Constraint(HCFeatureSystem.LeftSideAnchor)); + pattern.Children.Add(new Constraint(HCFeatureSystem.LeftSideAnchor)); foreach ( - PatternNode node in LoadPatternNodes( + PatternNode node in LoadPatternNodes( ptempElem.Element("PhoneticSequence"), variables, defaultTable, @@ -1484,13 +1484,13 @@ PatternNode node in LoadPatternNodes( pattern.Children.Add(node); } if ((string)ptempElem.Attribute("finalBoundaryCondition") == "true") - pattern.Children.Add(new Constraint(HCFeatureSystem.RightSideAnchor)); + pattern.Children.Add(new Constraint(HCFeatureSystem.RightSideAnchor)); } pattern.Freeze(); return pattern; } - private Pattern LoadPhoneticSequence( + private Pattern LoadPhoneticSequence( XElement pseqElem, Dictionary> variables, CharacterDefinitionTable defaultTable = null, @@ -1498,8 +1498,8 @@ private Pattern LoadPhoneticSequence( ) { if (pseqElem == null) - return Pattern.New().Value; - var pattern = new Pattern(name, LoadPatternNodes(pseqElem, variables, defaultTable, null)); + return Pattern.New().Value; + var pattern = new Pattern(name, LoadPatternNodes(pseqElem, variables, defaultTable, null)); pattern.Freeze(); return pattern; } diff --git a/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageWriter.cs b/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageWriter.cs index 4dec714cb..5f7e5663e 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageWriter.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageWriter.cs @@ -1119,7 +1119,7 @@ private XElement WriteProperties(IDictionary properties) } private XElement WritePhoneticTemplate( - Pattern pattern, + Pattern pattern, Dictionary> variables, CharacterDefinitionTable defaultTable = null, string prefix = null @@ -1134,9 +1134,9 @@ private XElement WritePhoneticTemplate( return phonTempElem; } - private bool IsAnchor(PatternNode node, FeatureSymbol type) + private bool IsAnchor(PatternNode node, FeatureSymbol type) { - if (node is Constraint constraint) + if (node is Constraint constraint) { return constraint.Type() == HCFeatureSystem.Anchor && (FeatureSymbol)constraint.FeatureStruct.GetValue(HCFeatureSystem.AnchorType) == type; @@ -1146,7 +1146,7 @@ private bool IsAnchor(PatternNode node, FeatureSymbol type) } private XElement WritePhoneticSequence( - Pattern pattern, + Pattern pattern, Dictionary> variables, CharacterDefinitionTable defaultTable = null, string prefix = null @@ -1155,20 +1155,20 @@ private XElement WritePhoneticSequence( var seqElem = new XElement("PhoneticSequence"); if (!string.IsNullOrEmpty(pattern.Name)) seqElem.Add(new XAttribute("id", Normalize((prefix ?? "") + pattern.Name))); - foreach (PatternNode node in pattern.Children) + foreach (PatternNode node in pattern.Children) seqElem.Add(WritePatternNodes(node, variables, defaultTable, prefix ?? "", null)); return seqElem; } private IEnumerable WritePatternNodes( - PatternNode node, + PatternNode node, Dictionary> variables, CharacterDefinitionTable defaultTable, string prefix, string id ) { - if (node is Constraint constraint) + if (node is Constraint constraint) { if (constraint.Tag == null) yield break; @@ -1187,13 +1187,13 @@ string id yield break; } - if (node is Quantifier quantifier) + if (node is Quantifier quantifier) { yield return WriteOptionalSegmentSequence(quantifier, variables, defaultTable, id); yield break; } - if (node is Group group) + if (node is Group group) { if (!string.IsNullOrEmpty(group.Name)) { @@ -1219,7 +1219,7 @@ XElement elem in WritePatternNodes( else { // Normal group - foreach (PatternNode childNode in group.Children) + foreach (PatternNode childNode in group.Children) { foreach (XElement elem in WritePatternNodes(childNode, variables, defaultTable, prefix, id)) yield return elem; @@ -1277,7 +1277,7 @@ private XElement WriteSimpleContext( } private XElement WriteOptionalSegmentSequence( - Quantifier quantifier, + Quantifier quantifier, Dictionary> variables, CharacterDefinitionTable defaultTable, string id diff --git a/src/SIL.Machine/Annotations/Annotation.cs b/src/SIL.Machine/Annotations/Annotation.cs index 7292fc591..d4c8c1a42 100644 --- a/src/SIL.Machine/Annotations/Annotation.cs +++ b/src/SIL.Machine/Annotations/Annotation.cs @@ -124,7 +124,18 @@ public bool Optional set { CheckFrozen(); + if (_optional == value) + return; _optional = value; + // Shape's int-offset projection copies Optional by value and caches against the root + // annotation list's Version (see AnnotationList.IncrementVersion). Optional is part of + // the projected view but flipping it is not a structural change, so bump the root + // list's version here to invalidate the cache — otherwise the matcher keeps seeing the + // stale flag and never forks the optional-skip instances (RUSTIFY Stage 2). + Annotation top = this; + while (top.Parent != null) + top = top.Parent; + (top.List as AnnotationList)?.IncrementVersion(); } } diff --git a/src/SIL.Machine/Annotations/AnnotationList.cs b/src/SIL.Machine/Annotations/AnnotationList.cs index c57fa9e24..20536e3f4 100644 --- a/src/SIL.Machine/Annotations/AnnotationList.cs +++ b/src/SIL.Machine/Annotations/AnnotationList.cs @@ -17,6 +17,81 @@ public class AnnotationList private int _currentID; private readonly Annotation _parent; private int _hashCode; + private int _version; + + /// + /// Monotonically increments on every structural change (add/remove/clear). Lets a consumer + /// (e.g. 's lazily-built int-offset annotation projection) cheaply + /// detect when a cached derivative is stale without diffing the list. + /// + internal int Version + { + get { return _version; } + } + + /// + /// Bumps for a non-structural change that a cached derivative still + /// depends on. Specifically: 's int-offset projection copies each + /// annotation's flag by value, so flipping + /// Optional (during analysis/unapplication) must invalidate that cache even though the list + /// structure is unchanged. 's setter calls this on + /// the root list. + /// + internal void IncrementVersion() + { + _version++; + } + + // Cache of filtered+direction-sorted annotation views for FST traversal (see + // TraversalMethodBase.Reset). Only populated on FROZEN lists — a frozen list (and its + // annotations' FeatureStructs) is immutable, so the filtered view is final; for unfrozen + // lists a rule's in-place FeatureStruct edit could silently invalidate a cached view, so + // they never cache. Keyed by filter-delegate reference: filters come from a handful of + // compiler-cached non-capturing lambdas (one per rule-class call site), so the chain stays + // tiny (≤ filters × directions). Lock-free CAS publish; a lost race just rebuilds once. + private sealed class FilteredView + { + internal readonly object Filter; + internal readonly Direction Direction; + internal readonly List> Annotations; + internal readonly FilteredView Next; + + internal FilteredView( + object filter, + Direction direction, + List> annotations, + FilteredView next + ) + { + Filter = filter; + Direction = direction; + Annotations = annotations; + Next = next; + } + } + + private FilteredView _filteredViews; + + internal List> GetFilteredView(object filter, Direction dir) + { + for (FilteredView v = _filteredViews; v != null; v = v.Next) + { + if (ReferenceEquals(v.Filter, filter) && v.Direction == dir) + return v.Annotations; + } + return null; + } + + internal void AddFilteredView(object filter, Direction dir, List> annotations) + { + while (true) + { + FilteredView head = _filteredViews; + var entry = new FilteredView(filter, dir, annotations, head); + if (System.Threading.Interlocked.CompareExchange(ref _filteredViews, entry, head) == head) + return; + } + } public AnnotationList() : base(new AnnotationComparer(), begin => new Annotation(Range.Null)) { } @@ -121,6 +196,7 @@ public override void Add(Annotation node) public void Add(Annotation node, bool subsume) { CheckFrozen(); + _version++; if (_parent != null && !_parent.Range.Contains(node.Range)) { throw new ArgumentException( @@ -160,6 +236,7 @@ public override bool Remove(Annotation node) public bool Remove(Annotation node, bool preserveChildren) { CheckFrozen(); + _version++; if (base.Remove(node)) { if (preserveChildren) @@ -281,6 +358,7 @@ public IEnumerable> GetNodes(Range range, Direction public override void Clear() { CheckFrozen(); + _version++; base.Clear(); } diff --git a/src/SIL.Machine/Annotations/Shape.cs b/src/SIL.Machine/Annotations/Shape.cs index ecea24a02..5e4dd3063 100644 --- a/src/SIL.Machine/Annotations/Shape.cs +++ b/src/SIL.Machine/Annotations/Shape.cs @@ -1,32 +1,82 @@ using System; +using System.Collections; using System.Collections.Generic; using System.Linq; -using SIL.Extensions; using SIL.Machine.DataStructures; using SIL.Machine.FeatureModel; using SIL.ObjectModel; namespace SIL.Machine.Annotations { + /// + /// An ordered sequence of s plus their annotation tree. + /// + /// As of the RUSTIFY flat-shape rework (Phase 3b-impl, Stage 1) a owns its nodes + /// in flat backing arrays addressed by a stable per-node : the prev/next + /// links (an in-array doubly-linked list, so + /// and stay O(1) and the tag-relabel order maintenance is preserved) + /// and the per-node frozen flag live here rather than on the node. The list machinery that used to be + /// inherited from OrderedBidirList<ShapeNode> is reimplemented over those arrays. The + /// objects added to the shape are retained as the canonical one-per-slot + /// handles, so reference identity is unchanged and behavior is byte-identical. + /// public class Shape - : OrderedBidirList, + : IOrderedBidirList, IAnnotatedData, ICloneable, IFreezable, IValueEquatable { + // Link sentinel for "no node" (the old null Next/Prev). + private const int Nil = -1; + private readonly Func _marginSelector; private readonly AnnotationList _annotations; + private readonly IEqualityComparer _comparer; private int _hashCode; + // Flat backing. Slot 0 = Begin margin, slot 1 = End margin, content nodes from slot 2 up. + private ShapeNode[] _nodes; // canonical handle per slot (null = free slot) + private int[] _next; // forward link by slot (Nil = none) + private int[] _prev; // backward link by slot (Nil = none) + private bool[] _frozen; // per-node frozen flag by slot + private int _capacity; + private int _used; // high-water count of slots ever handed out + private readonly Stack _free; // reclaimed slots below the high-water mark + private int _size; // content node count (excludes the two margins) + + private readonly ShapeNode _begin; + private readonly ShapeNode _end; + + // RUSTIFY Stage 3 (III): copy-on-write clone. A clone of a *frozen* shape stores its source here + // and does NOT copy the node graph: the hot read path (the FST matcher) consumes the clone only + // through the int-offset projection (IntAnnotations/IntRange), which is served from the frozen + // source — so a clone that is only traversed (never has a ShapeNode/Annotation handle handed out + // and is never mutated) costs a shell, not N nodes + N annotations + their skip-list towers. The + // first access that needs the real node graph — any flat-backing link read, enumeration, handle + // bridge (NodeAt), .Annotations access, or mutation — calls EnsureInflated() to materialize it. + private Shape _cowSource; + public Shape(Func marginSelector) : this(marginSelector, new AnnotationList()) { } public Shape(Func marginSelector, AnnotationList annotations) - : base(EqualityComparer.Default, marginSelector) { _marginSelector = marginSelector; _annotations = annotations; + _comparer = EqualityComparer.Default; + _free = new Stack(); + _capacity = 0; + _used = 0; + _size = 0; + + _begin = marginSelector(true); + _end = marginSelector(false); + Adopt(_begin, AllocSlot()); + Adopt(_end, AllocSlot()); + _next[_begin.Index] = _end.Index; + _prev[_end.Index] = _begin.Index; + Begin.Tag = int.MinValue; End.Tag = int.MaxValue; _annotations.Add(Begin.Annotation, false); @@ -36,9 +86,115 @@ public Shape(Func marginSelector, AnnotationList ann protected Shape(Shape shape) : this(shape._marginSelector) { - shape.CopyTo(this); + // Copy-on-write only when the source is frozen (immutable, so safe to share): the common + // case, since words are frozen before being cloned. Flatten any COW chain to the real source. + if (shape.IsFrozen) + _cowSource = shape._cowSource ?? shape; + else + shape.CopyTo(this); + } + + // Materialize a copy-on-write clone's real node graph on first access that needs it (see _cowSource). + // Idempotent and not reentrant: clears _cowSource first, then does the real copy from the (frozen, + // non-COW) source; re-freezes if this clone had already been frozen-by-sharing. + private void EnsureInflated() + { + if (_cowSource == null) + return; + Shape src = _cowSource; + _cowSource = null; + bool wasFrozen = IsFrozen; + if (wasFrozen) + IsFrozen = false; + src.CopyTo(this); + if (wasFrozen) + Freeze(); + } + + #region Flat backing helpers + + private void EnsureCapacity(int n) + { + if (n <= _capacity) + return; + int newCap = _capacity == 0 ? 4 : _capacity * 2; + while (newCap < n) + newCap *= 2; + Array.Resize(ref _nodes, newCap); + Array.Resize(ref _next, newCap); + Array.Resize(ref _prev, newCap); + Array.Resize(ref _frozen, newCap); + _capacity = newCap; + } + + private int AllocSlot() + { + if (_free.Count > 0) + return _free.Pop(); + int idx = _used++; + EnsureCapacity(_used); + return idx; + } + + private void Adopt(ShapeNode node, int idx) + { + _nodes[idx] = node; + _next[idx] = Nil; + _prev[idx] = Nil; + _frozen[idx] = false; + node.Owner = this; + node.Index = idx; + } + + // Detaches a node from this shape (the old OrderedBidirListNode.Clear): frees its slot and + // resets the handle to the detached state. Does not adjust _size; callers manage that. + // Preserves a frozen node's IsFrozen==true for its remaining (detached) lifetime by carrying + // the flag over to _detachedFrozen before clearing Owner — ShapeNode.IsFrozen used to be a + // permanent per-node bool that never reset once set, and detaching a node must not silently + // un-freeze it. + private void Detach(ShapeNode node) + { + int idx = node.Index; + bool wasFrozen = _frozen[idx]; + _nodes[idx] = null; + _next[idx] = Nil; + _prev[idx] = Nil; + _frozen[idx] = false; + node.Owner = null; + node.Index = -1; + if (wasFrozen) + node.MarkDetachedFrozen(); + _free.Push(idx); } + internal ShapeNode GetNextLink(int index) + { + if (_cowSource != null) + EnsureInflated(); + int n = _next[index]; + return n < 0 ? null : _nodes[n]; + } + + internal ShapeNode GetPrevLink(int index) + { + if (_cowSource != null) + EnsureInflated(); + int p = _prev[index]; + return p < 0 ? null : _nodes[p]; + } + + internal bool IsNodeFrozen(int index) + { + return _frozen[index]; + } + + internal void SetNodeFrozen(int index) + { + _frozen[index] = true; + } + + #endregion + public Range Range { get { return Range.Create(Begin, End); } @@ -46,16 +202,46 @@ public Range Range public AnnotationList Annotations { - get { return _annotations; } + get + { + // Hands out the ShapeNode-keyed annotation tree (morph extraction, rule code, result + // comparison) — needs the real node graph. + EnsureInflated(); + return _annotations; + } } public bool IsFrozen { get; private set; } + // ---- RUSTIFY Stage 2: int-offset projection (the Fst bridge) ---- + // The FST binds as Fst with offset = a DENSE per-projection node position (0..N+1 in + // node order: Begin=0, content 1..N, End=N+1). Dense contiguous offsets — rather than the + // shape's sparse Tag — are what keep the int model byte-identical: they never collide with the + // Range.Null = [-1,-1] sentinel, never overflow the half-open +1 (Tag's End == int.MaxValue + // did), and keep the End anchor a non-empty [N+1, N+2) (matching the ShapeNode anchor's length). + // These views are rebuilt lazily, gated on the annotation list Version (+ frozen state), so a + // stable/frozen shape builds them once and reuses them across the thousands of Transduce calls + // per word, while a shape mutated in place by an iterative rewrite rule rebuilds on next access. + private AnnotationList _intAnnotations; + private Dictionary _byOffset; + private Dictionary _nodeOffset; + private int _intProjectionVersion = -1; + private bool _intProjectionFrozen; + public void Freeze() { if (IsFrozen) return; + // A copy-on-write clone equals its already-frozen source: adopt the frozen state (and its + // hash) without materializing the node graph, so freeze-then-traverse stays handle-free. + if (_cowSource != null) + { + IsFrozen = true; + _hashCode = _cowSource.GetFrozenHashCode(); + return; + } + IsFrozen = true; Begin.Freeze(); int i = 0; @@ -71,6 +257,155 @@ public void Freeze() _hashCode = 23; _hashCode = _hashCode * 31 + Count; _hashCode = _hashCode * 31 + _annotations.GetFrozenHashCode(); + + // Build the int-offset projection now, while frozen and single-threaded. A frozen shape is + // immutable, so this projection is final — and (RUSTIFY Stage 3 / COW) copy-on-write clones + // delegate their IntAnnotations to this frozen source, possibly from several parse threads at + // once. Building eagerly here means those concurrent reads always hit a complete cache rather + // than racing a lazy first build of the offset dictionaries. No extra work overall: a frozen + // shape that is frozen is one that will be traversed (by itself or its COW clones). + EnsureIntProjection(); + // Freeze the (final) projection so the FST traversal can cache filtered views on it + // (AnnotationList.GetFilteredView gates on IsFrozen — for an unfrozen list, in-place + // FeatureStruct edits could silently invalidate a cached view). Also fail-fast hardens + // the COW invariant: any unexpected mutation of a shared projection now throws. + _intAnnotations.Freeze(); + } + + // Maps a ShapeNode annotation range to its int-offset range using the dense per-projection node + // positions: a single node [n, n] -> half-open [off(n), off(n)+1); a span [s, e] -> + // [off(s), off(e)+1). Relationship-preserving vs the inclusive ShapeNode form (see the + // IntOffsetRangeMapping parity test); dense offsets make it free of the Tag edge cases. + private Range ToIntRange(Range r) + { + return Range.Create(_nodeOffset[r.Start], _nodeOffset[r.End] + 1); + } + + private void EnsureIntProjection() + { + if ( + _intAnnotations != null + && _intProjectionVersion == _annotations.Version + && _intProjectionFrozen == IsFrozen + ) + { + return; + } + + // Assign dense offsets to every node in node order: Begin=0, content 1..N, End=N+1. + _nodeOffset = new Dictionary(); + _byOffset = new Dictionary(); + int pos = 0; + AssignOffset(Begin, ref pos); + foreach (ShapeNode node in this) + AssignOffset(node, ref pos); + AssignOffset(End, ref pos); + + var dest = new AnnotationList(); + foreach (Annotation top in _annotations) + dest.Add(ProjectAnnotation(top), false); + + _intAnnotations = dest; + _intProjectionVersion = _annotations.Version; + _intProjectionFrozen = IsFrozen; + } + + private void AssignOffset(ShapeNode node, ref int pos) + { + _nodeOffset[node] = pos; + _byOffset[pos] = node; + pos++; + } + + private Annotation ProjectAnnotation(Annotation src) + { + // Share the FeatureStruct by reference (no clone): the int annotation is a view, and a + // rule's in-place FeatureStruct edit on a matched node must remain visible. + var ann = new Annotation(ToIntRange(src.Range), src.FeatureStruct) { Optional = src.Optional }; + if (!src.IsLeaf) + { + foreach (Annotation child in src.Children) + ann.Children.Add(ProjectAnnotation(child), false); + } + return ann; + } + + /// + /// The int-offset projection of this shape's annotations (RUSTIFY Stage 2): the + /// the Fst<Word,int> traversal consumes. Built + /// lazily and cached against the annotation . + /// + public AnnotationList IntAnnotations + { + get + { + // The whole point of COW: serve the projection from the frozen source without + // materializing this clone's node graph. This is the FST matcher's only access path. + if (_cowSource != null) + return _cowSource.IntAnnotations; + EnsureIntProjection(); + return _intAnnotations; + } + } + + /// + /// The whole-shape int range — the half-open image of the inclusive ShapeNode range + /// [Begin, End], i.e. [off(Begin), off(End) + 1). The +1 matters: the only + /// framework consumer is Matcher.GetStartAnnotation via Range.GetStart(dir), and a + /// right-to-left match starts at GetStart(RtL) == End. The End anchor's dense node range + /// is [off(End), off(End)+1), whose RtL start coordinate is off(End)+1 — so End + /// must be off(End)+1 for a RtL match to begin at the End anchor rather than at + /// the last content node (which would skip any edit adjacent to End, e.g. inserting a deleted + /// segment after the final vowel during analysis). + /// + public Range IntRange + { + get + { + if (_cowSource != null) + return _cowSource.IntRange; + EnsureIntProjection(); + return Range.Create(_nodeOffset[Begin], _nodeOffset[End] + 1); + } + } + + /// + /// Resolves an int offset (a dense node position) back to its node — the reverse of the + /// int-offset projection, used by rule RHS code to act on the segment graph. Works on frozen + /// and unfrozen shapes. + /// + public ShapeNode NodeAt(int offset) + { + // Hands out a real ShapeNode of this shape (rule-RHS / mutation path) — must materialize. + EnsureInflated(); + EnsureIntProjection(); + return _byOffset[offset]; + } + + /// + /// The int offset (dense node position) of a node. Companion to . + /// + public int OffsetOf(ShapeNode node) + { + EnsureInflated(); + EnsureIntProjection(); + return _nodeOffset[node]; + } + + /// + /// The offset to pass to Matcher.Match(input, start) to begin matching at + /// in direction . A node's half-open annotation + /// is [off, off+1), and the matcher locates the start annotation by its + /// Range.GetStart(dir): that is off left-to-right but off+1 right-to-left. + /// (With the old inclusive [node, node] ShapeNode ranges this was direction-agnostic; + /// the dense half-open int model needs this adjustment to stay byte-identical for RtL matches.) + /// + public int MatchStartOffset(ShapeNode node, Direction dir) + { + EnsureInflated(); + EnsureIntProjection(); + int off = _nodeOffset[node]; + return dir == Direction.LeftToRight ? off : off + 1; } private void CheckFrozen() @@ -79,6 +414,178 @@ private void CheckFrozen() throw new InvalidOperationException("The shape is immutable."); } + #region ICollection / IBidirList + + public int Count + { + // COW-safe: the clone's content count equals its frozen source's, without inflating. + get { return _cowSource != null ? _cowSource.Count : _size; } + } + + bool ICollection.IsReadOnly + { + get { return false; } + } + + public ShapeNode Begin + { + get { return _begin; } + } + + public ShapeNode End + { + get { return _end; } + } + + public ShapeNode GetBegin(Direction dir) + { + return dir == Direction.LeftToRight ? Begin : End; + } + + public ShapeNode GetEnd(Direction dir) + { + return dir == Direction.LeftToRight ? End : Begin; + } + + public ShapeNode First + { + // Count is COW-aware; GetNextLink inflates if needed, so this hands out a real node. + get { return Count == 0 ? null : GetNextLink(_begin.Index); } + } + + public ShapeNode Last + { + get { return Count == 0 ? null : GetPrevLink(_end.Index); } + } + + public ShapeNode GetFirst(Direction dir) + { + return dir == Direction.LeftToRight ? First : Last; + } + + public ShapeNode GetLast(Direction dir) + { + return dir == Direction.LeftToRight ? Last : First; + } + + public ShapeNode GetNext(ShapeNode cur) + { + return GetNext(cur, Direction.LeftToRight); + } + + public ShapeNode GetNext(ShapeNode cur, Direction dir) + { + if (cur.List != this) + throw new ArgumentException("cur is not a member of this collection.", "cur"); + return dir == Direction.LeftToRight ? cur.Next : cur.Prev; + } + + public ShapeNode GetPrev(ShapeNode cur) + { + return GetPrev(cur, Direction.LeftToRight); + } + + public ShapeNode GetPrev(ShapeNode cur, Direction dir) + { + if (cur.List != this) + throw new ArgumentException("cur is not a member of this collection.", "cur"); + return dir == Direction.LeftToRight ? cur.Prev : cur.Next; + } + + public bool Find(ShapeNode example, out ShapeNode result) + { + return Find(example, Direction.LeftToRight, out result); + } + + public bool Find(ShapeNode start, ShapeNode example, out ShapeNode result) + { + return Find(start, example, Direction.LeftToRight, out result); + } + + public bool Find(ShapeNode example, Direction dir, out ShapeNode result) + { + return Find(GetFirst(dir), example, dir, out result); + } + + public bool Find(ShapeNode start, ShapeNode example, Direction dir, out ShapeNode result) + { + for (ShapeNode n = start; n != GetEnd(dir); n = n.GetNext(dir)) + { + if (_comparer.Equals(example, n)) + { + result = n; + return true; + } + } + result = null; + return false; + } + + public bool Contains(ShapeNode node) + { + return node.List == this; + } + + public void CopyTo(ShapeNode[] array, int arrayIndex) + { + foreach (ShapeNode node in this) + array[arrayIndex++] = node; + } + + IEnumerator IEnumerable.GetEnumerator() + { + // Count is COW-aware; First inflates if needed. (Use Count, not _size — a COW clone has + // _size == 0 until inflated.) + if (Count == 0) + yield break; + + for (ShapeNode node = First; node != End; node = node.Next) + yield return node; + } + + IEnumerator IEnumerable.GetEnumerator() + { + return ((IEnumerable)this).GetEnumerator(); + } + + public void Add(ShapeNode node) + { + AddAfter(_end.Prev, node, Direction.LeftToRight); + } + + public void AddRange(IEnumerable e) + { + foreach (ShapeNode node in e) + Add(node); + } + + public void AddRangeAfter(ShapeNode node, IEnumerable newNodes, Direction dir) + { + if (_size == 0 && node == null) + node = GetBegin(dir); + + if (node.List != this) + throw new ArgumentException("node is not a member of this collection.", "node"); + + foreach (ShapeNode newNode in newNodes) + { + AddAfter(node, newNode, dir); + node = newNode; + } + } + + public void AddRangeAfter(ShapeNode node, IEnumerable newNodes) + { + AddRangeAfter(node, newNodes, Direction.LeftToRight); + } + + public void AddAfter(ShapeNode node, ShapeNode newNode) + { + AddAfter(node, newNode, Direction.LeftToRight); + } + + #endregion + public ShapeNode Add(FeatureStruct fs) { return Add(fs, false); @@ -104,10 +611,30 @@ public Range CopyTo(ShapeNode srcStart, ShapeNode srcEnd, Shape dest) return CopyTo(Range.Create(srcStart, srcEnd), dest); } + // Per-thread scratch map reused across CopyTo calls. CopyTo runs on every Word.Clone + // (hundreds per parse on a real grammar) and the map is fully consumed before CopyTo + // returns (never retained) and CopyTo is not reentrant, so reusing one map per thread + // removes a per-clone Dictionary allocation without any sharing hazard. This is a SAFE + // pool — unlike the across-word FST arena (RUSTIFY Phase 1b), nothing here survives the + // call, so it cannot promote parse data to Gen2 / regress parallel parsing. + [ThreadStatic] + private static Dictionary CloneMapping; + public Range CopyTo(Range srcRange, Shape dest) { + // Reads this shape's real node graph + annotations as the copy source — materialize if COW. + // (When called from EnsureInflated the source is the real frozen shape, so this is a no-op.) + EnsureInflated(); ShapeNode startNode = null; ShapeNode endNode = null; + // Build the src->dest node mapping inline while cloning, instead of a second pass + // with GetNodes().Zip().ToDictionary(). CopyTo runs on every Word.Clone (thousands + // per parse on a real grammar), so eliminating the extra enumerations + LINQ + // allocations per clone is a measurable GC win. + Dictionary mapping = CloneMapping; + if (mapping == null) + mapping = CloneMapping = new Dictionary(); + mapping.Clear(); foreach (ShapeNode node in GetNodes(srcRange)) { ShapeNode newNode = node.Clone(); @@ -115,12 +642,10 @@ public Range CopyTo(Range srcRange, Shape dest) startNode = newNode; endNode = newNode; dest.Add(newNode); + mapping[node] = newNode; } Range destRange = Range.Create(startNode, endNode); - Dictionary mapping = GetNodes(srcRange) - .Zip(dest.GetNodes(destRange)) - .ToDictionary(tuple => tuple.Item1, tuple => tuple.Item2); foreach (Annotation ann in _annotations.GetNodes(srcRange)) CopyAnnotations(dest._annotations, ann, mapping); @@ -175,9 +700,10 @@ public ShapeNode AddAfter(ShapeNode node, FeatureStruct fs, bool optional, Direc return newNode; } - public override void AddAfter(ShapeNode node, ShapeNode newNode, Direction dir) + public void AddAfter(ShapeNode node, ShapeNode newNode, Direction dir) { CheckFrozen(); + EnsureInflated(); if (newNode.List == this) throw new ArgumentException("newNode is already a member of this collection.", "newNode"); if (node != null && node.List != this) @@ -221,20 +747,51 @@ public override void AddAfter(ShapeNode node, ShapeNode newNode, Direction dir) } } - base.AddAfter(node, newNode, dir); + // Splice newNode into the in-array linked list (was OrderedBidirList.AddAfter). + if (Count == 0 && node == null) + node = GetBegin(dir); + + newNode.Remove(); + Adopt(newNode, AllocSlot()); + + ShapeNode anchor = node; + if (dir == Direction.RightToLeft) + anchor = anchor.Prev; + + int aIdx = anchor.Index; + int sIdx = newNode.Index; + int afterIdx = _next[aIdx]; + _next[sIdx] = afterIdx; + _next[aIdx] = sIdx; + _prev[sIdx] = aIdx; + if (afterIdx >= 0) + _prev[afterIdx] = sIdx; + + _size++; _annotations.Add(newNode.Annotation); } - public override bool Remove(ShapeNode node) + public bool Remove(ShapeNode node) { CheckFrozen(); + EnsureInflated(); if (node.List != this) return false; node.Annotation.Remove(); UpdateAnnotations(_annotations, node); - return base.Remove(node); + + int idx = node.Index; + int p = _prev[idx]; + int n = _next[idx]; + if (p >= 0) + _next[p] = n; + if (n >= 0) + _prev[n] = p; + Detach(node); + _size--; + return true; } private void UpdateAnnotations(AnnotationList annList, ShapeNode node) @@ -290,10 +847,15 @@ Annotation ann in annList } } - public override void Clear() + public void Clear() { CheckFrozen(); - base.Clear(); + EnsureInflated(); + foreach (ShapeNode node in this.ToArray()) + Detach(node); + _next[_begin.Index] = _end.Index; + _prev[_end.Index] = _begin.Index; + _size = 0; _annotations.Clear(); _annotations.Add(Begin.Annotation); _annotations.Add(End.Annotation); @@ -387,6 +949,7 @@ public IEnumerable GetNodes(Range range) public IEnumerable GetNodes(Range range, Direction dir) { + EnsureInflated(); return this.GetNodes(range.GetStart(dir), range.GetEnd(dir), dir); } @@ -395,7 +958,13 @@ public bool ValueEquals(Shape other) if (Count != other.Count) return false; - return _annotations.ValueEquals(other._annotations); + // Compare via the int-offset projection, not Annotations: IntAnnotations is served + // lazily from a COW clone's frozen source without materializing the ShapeNode graph + // (see the IntAnnotations getter above), whereas Annotations forces EnsureInflated() on + // both operands. This method is the equality FreezableEqualityComparer uses for + // rule-cascade dedup, so every hash-collision check would otherwise de-COW both + // candidate words for no reason other than the comparison itself. + return IntAnnotations.ValueEquals(other.IntAnnotations); } public int GetFrozenHashCode() diff --git a/src/SIL.Machine/Annotations/ShapeNode.cs b/src/SIL.Machine/Annotations/ShapeNode.cs index cf72e7c8f..ff6eebdab 100644 --- a/src/SIL.Machine/Annotations/ShapeNode.cs +++ b/src/SIL.Machine/Annotations/ShapeNode.cs @@ -7,8 +7,17 @@ namespace SIL.Machine.Annotations { + /// + /// A node in a . As of the RUSTIFY flat-shape rework (Phase 3b-impl, Stage 1) + /// this is a handle into its owning 's flat backing arrays rather than a + /// self-contained doubly-linked-list node: the prev/next links and the frozen flag live in the owner + /// arrays addressed by . The handle object added to a shape is stored as the + /// canonical one-per-slot handle, so reference identity (and therefore ==, dictionary keys and + /// endpoint identity) is preserved exactly as before. + /// stays on the node so it survives a node being moved between shapes. + /// public class ShapeNode - : OrderedBidirListNode, + : IOrderedBidirListNode, IComparable, IComparable, ICloneable, @@ -17,11 +26,29 @@ public class ShapeNode { private readonly Annotation _ann; private int _tag; + private bool _detachedFrozen; + + // Equals() is intentionally left as the default (reference equality) — ShapeNode is used as + // an identity key in Shape.EnsureIntProjection's per-Freeze() Dictionary + // (_nodeOffset). A CPU profile showed the CLR's identity-hash fallback (assigned via + // AssignOffset's dictionary inserts) contributing real self-time on that hot per-word path. + // _id is a construction-order sequence number, immutable for the instance's lifetime (unlike + // _tag/Index, which are reassigned as the node moves/is frozen), so it changes nothing about + // which nodes compare equal. + private static int NextId; + private readonly int _id = System.Threading.Interlocked.Increment(ref NextId); + + // The owning shape, or null when this node is detached (created but not yet added, or removed). + internal Shape Owner { get; set; } + + // Slot index into the owner's flat arrays; -1 when detached. + internal int Index { get; set; } public ShapeNode(FeatureStruct fs) { _ann = new Annotation(Range.Create(this), fs); _tag = int.MinValue; + Index = -1; } protected ShapeNode(ShapeNode node) @@ -45,6 +72,54 @@ public Annotation Annotation get { return _ann; } } + public IBidirList List + { + get { return Owner; } + } + + public ShapeNode Next + { + get { return Owner?.GetNextLink(Index); } + } + + public ShapeNode Prev + { + get { return Owner?.GetPrevLink(Index); } + } + + public ShapeNode GetNext(Direction dir) + { + if (Owner == null) + return null; + return Owner.GetNext(this, dir); + } + + public ShapeNode GetPrev(Direction dir) + { + if (Owner == null) + return null; + return Owner.GetPrev(this, dir); + } + + public bool Remove() + { + if (Owner == null) + return false; + return Owner.Remove(this); + } + + public void AddAfter(ShapeNode newNode, Direction dir) + { + if (Owner == null) + return; + Owner.AddAfter(this, newNode, dir); + } + + public void AddAfter(ShapeNode newNode) + { + AddAfter(newNode, Direction.LeftToRight); + } + public int CompareTo(ShapeNode other) { if (other.List != List) @@ -113,13 +188,31 @@ private void CheckFrozen() throw new InvalidOperationException("The shape node is immutable."); } - public bool IsFrozen { get; private set; } + public bool IsFrozen + { + get { return Owner != null ? Owner.IsNodeFrozen(Index) : _detachedFrozen; } + } public void Freeze() { if (IsFrozen) return; - IsFrozen = true; + if (Owner != null) + Owner.SetNodeFrozen(Index); + else + _detachedFrozen = true; + } + + // Called by Shape.Detach when a frozen node is removed/cleared, so IsFrozen keeps reporting + // true for the node's remaining (now-detached) lifetime instead of silently flipping to false. + internal void MarkDetachedFrozen() + { + _detachedFrozen = true; + } + + public override int GetHashCode() + { + return _id; } public int GetFrozenHashCode() diff --git a/src/SIL.Machine/DataStructures/BidirList.cs b/src/SIL.Machine/DataStructures/BidirList.cs index a3dad1ce3..580ba9980 100644 --- a/src/SIL.Machine/DataStructures/BidirList.cs +++ b/src/SIL.Machine/DataStructures/BidirList.cs @@ -12,22 +12,43 @@ public abstract class BidirList : IBidirList private readonly TNode _end; private readonly IComparer _comparer; - private readonly Random _rand = new Random(); + // [ThreadStatic] instead of one Random per BidirList instance: skip-list level selection is + // statistical (result shape doesn't affect the byte-identical parse output, only balance), so + // sharing one Random per thread is safe. A CPU profile showed constructing a fresh + // System.Random per BidirList (i.e. per AnnotationList, i.e. effectively per Word.Clone) — + // including its OS-entropy-seeded Xoshiro256** state — as real, avoidable self-time. Each + // Word/Shape clone's BidirLists are only ever mutated by the single thread that owns that + // clone (the COW invariant: a shared/frozen Shape is read-only), so no cross-thread Random + // sharing occurs. + [ThreadStatic] + private static Random ThreadRand; + + private static Random Rand + { + get + { + if (ThreadRand == null) + ThreadRand = new Random(); + return ThreadRand; + } + } + private int _size; protected BidirList(IComparer comparer, Func marginSelector) { _begin = marginSelector(true); _end = marginSelector(false); - _begin.Init(this, 33); + // The Begin/End margins grow their tower arrays on demand (see GrowMargins) rather than + // pre-allocating the 33-level skip-list maximum: most lists stay shallow, so the eager [33] + // margin towers were pure waste — ~70% of the per-AnnotationList tower-array allocation, the + // dominant Word.Clone sub-cost on Sena (RUSTIFY Stage 3, increment II). Start at level 0 only. + _begin.Init(this, 1); _begin.Levels = 1; - _end.Init(this, 33); + _end.Init(this, 1); _end.Levels = 1; - for (int i = 0; i < 33; i++) - { - _begin.SetNext(i, _end); - _end.SetPrev(i, _begin); - } + _begin.SetNext(0, _end); + _end.SetPrev(0, _begin); _comparer = comparer; } @@ -55,13 +76,12 @@ public virtual void Add(TNode node) // 1-bits before we encounter the first 0-bit is the level of the node. Since R is // 32-bit, the level can be at most 32. int level = 0; - for (int r = _rand.Next(); (r & 1) == 1; r >>= 1) + for (int r = Rand.Next(); (r & 1) == 1; r >>= 1) { level++; if (level == _begin.Levels) { - _begin.Levels++; - _end.Levels++; + GrowMargins(); break; } } @@ -92,15 +112,29 @@ public virtual void Add(TNode node) _size++; } + // Raise the skip list's height by one level: ensure the margins' tower arrays can hold the new + // level, link Begin<->End at it, then bump the margin levels. Replaces the old eager 33-level + // margin pre-allocation; called only when a freshly added node reaches the current max height. + private void GrowMargins() + { + int newLevel = _begin.Levels; + _begin.EnsureLevelCapacity(newLevel + 1); + _end.EnsureLevelCapacity(newLevel + 1); + _begin.SetNext(newLevel, _end); + _end.SetPrev(newLevel, _begin); + _begin.Levels = newLevel + 1; + _end.Levels = newLevel + 1; + } + public virtual void Clear() { foreach (TNode node in this.ToArray()) node.Clear(); - for (int i = 0; i < 33; i++) - { - _begin.SetNext(i, _end); - _end.SetPrev(i, _begin); - } + // Reset to height 1; only level 0 needs relinking (higher levels are above Levels and are + // never read until GrowMargins re-links them as the list grows tall again). The margin + // arrays keep whatever capacity they grew to, which is reused. + _begin.SetNext(0, _end); + _end.SetPrev(0, _begin); _begin.Levels = 1; _end.Levels = 1; _size = 0; diff --git a/src/SIL.Machine/DataStructures/BidirListNode.cs b/src/SIL.Machine/DataStructures/BidirListNode.cs index dcb26a32e..dce752943 100644 --- a/src/SIL.Machine/DataStructures/BidirListNode.cs +++ b/src/SIL.Machine/DataStructures/BidirListNode.cs @@ -3,9 +3,15 @@ namespace SIL.Machine.DataStructures public abstract class BidirListNode : IBidirListNode where TNode : BidirListNode { + // Skip-list tower links. Level 0 (the only level ~50% of nodes have) is stored inline in fields so + // those nodes allocate no tower array at all, and every taller node's array is one slot shorter; + // levels 1.. live in _nextHigh/_prevHigh (null when Levels <= 1). The per-node `new TNode[levels]` + // towers were the dominant Word.Clone sub-cost on Sena (RUSTIFY Stage 3, increment II). private BidirList _list; - private TNode[] _next; - private TNode[] _prev; + private TNode _next0; + private TNode _prev0; + private TNode[] _nextHigh; + private TNode[] _prevHigh; public IBidirList List { @@ -14,24 +20,12 @@ public IBidirList List public TNode Next { - get - { - if (_next == null) - return null; - - return _next[0]; - } + get { return Levels == 0 ? null : _next0; } } public TNode Prev { - get - { - if (_prev == null) - return null; - - return _prev[0]; - } + get { return Levels == 0 ? null : _prev0; } } /// @@ -73,16 +67,35 @@ public bool Remove() protected internal virtual void Init(BidirList list, int levels) { _list = list; - _next = new TNode[levels]; - _prev = new TNode[levels]; + _next0 = null; + _prev0 = null; + _nextHigh = levels > 1 ? new TNode[levels - 1] : null; + _prevHigh = levels > 1 ? new TNode[levels - 1] : null; Levels = levels; } + // Grow this node's high-level tower arrays to hold a list of total height `levels`. Used by + // BidirList for the Begin/End margins, which grow as the skip list gets taller instead of being + // pre-allocated to the 33-level maximum up front (most skip lists stay shallow). Right-sizes the + // exact level: margins grow one level at a time and the shallow majority never reach here, so + // geometric growth would only over-allocate; the O(height^2) churn it avoids is bounded by the + // ~31-level skip-list cap and only reached by rare very large lists. + internal void EnsureLevelCapacity(int levels) + { + int needHigh = levels - 1; + if (needHigh <= 0 || (_nextHigh?.Length ?? 0) >= needHigh) + return; + System.Array.Resize(ref _nextHigh, needHigh); + System.Array.Resize(ref _prevHigh, needHigh); + } + protected internal virtual void Clear() { _list = null; - _next = null; - _prev = null; + _next0 = null; + _prev0 = null; + _nextHigh = null; + _prevHigh = null; Levels = 0; } @@ -90,22 +103,28 @@ protected internal virtual void Clear() internal TNode GetNext(int level) { - return _next[level]; + return level == 0 ? _next0 : _nextHigh[level - 1]; } internal void SetNext(int level, TNode node) { - _next[level] = node; + if (level == 0) + _next0 = node; + else + _nextHigh[level - 1] = node; } internal TNode GetPrev(int level) { - return _prev[level]; + return level == 0 ? _prev0 : _prevHigh[level - 1]; } internal void SetPrev(int level, TNode node) { - _prev[level] = node; + if (level == 0) + _prev0 = node; + else + _prevHigh[level - 1] = node; } } } diff --git a/src/SIL.Machine/DataStructures/DataStructuresExtensions.cs b/src/SIL.Machine/DataStructures/DataStructuresExtensions.cs index 4f950c37f..9806c227d 100644 --- a/src/SIL.Machine/DataStructures/DataStructuresExtensions.cs +++ b/src/SIL.Machine/DataStructures/DataStructuresExtensions.cs @@ -263,6 +263,82 @@ bool preorder action((TNode)node); } + /// + /// Walks two structurally-isomorphic forests in lockstep (preorder), invoking + /// on each corresponding node pair. Used to pair a cloned tree with + /// its source without allocating the Queue + SelectMany/Zip iterator chain that + /// roots1.SelectMany(GetNodesBreadthFirst).Zip(roots2.SelectMany(GetNodesBreadthFirst)) + /// builds. The two forests MUST be isomorphic (e.g. one is a Clone of the other); the + /// resulting set of node pairs is independent of traversal order, so a preorder walk is + /// interchangeable with the BFS-zip form. is threaded through so the + /// callback can be a static (allocation-free) lambda rather than a closure. + /// + public static void PairedPreorderTraverse( + IEnumerable roots1, + IEnumerable roots2, + TState state, + Action action, + Direction dir + ) + where TNode : class, IBidirTreeNode + { + IEnumerator e1 = roots1.GetEnumerator(); + IEnumerator e2 = roots2.GetEnumerator(); + try + { + bool m1, + m2; + while ((m1 = e1.MoveNext()) & (m2 = e2.MoveNext())) + PairedPreorderNode(e1.Current, e2.Current, state, action, dir); + System.Diagnostics.Debug.Assert( + m1 == m2, + "PairedPreorderTraverse: forests are not isomorphic (root count mismatch)" + ); + } + finally + { + e1.Dispose(); + e2.Dispose(); + } + } + + private static void PairedPreorderNode( + TNode n1, + TNode n2, + TState state, + Action action, + Direction dir + ) + where TNode : class, IBidirTreeNode + { + action(state, n1, n2); + System.Diagnostics.Debug.Assert( + n1.IsLeaf == n2.IsLeaf, + "PairedPreorderTraverse: forests are not isomorphic (leaf mismatch)" + ); + if (!n1.IsLeaf) + { + IEnumerator c1 = n1.Children.GetNodes(dir).GetEnumerator(); + IEnumerator c2 = n2.Children.GetNodes(dir).GetEnumerator(); + try + { + bool m1, + m2; + while ((m1 = c1.MoveNext()) & (m2 = c2.MoveNext())) + PairedPreorderNode(c1.Current, c2.Current, state, action, dir); + System.Diagnostics.Debug.Assert( + m1 == m2, + "PairedPreorderTraverse: forests are not isomorphic (child count mismatch)" + ); + } + finally + { + c1.Dispose(); + c2.Dispose(); + } + } + } + public static void LevelOrderTraverse(this IBidirTreeNode root, Action action) where TNode : class, IBidirTreeNode { diff --git a/src/SIL.Machine/DataStructures/IDBearerBase.cs b/src/SIL.Machine/DataStructures/IDBearerBase.cs index a018efa33..ee62f45f0 100644 --- a/src/SIL.Machine/DataStructures/IDBearerBase.cs +++ b/src/SIL.Machine/DataStructures/IDBearerBase.cs @@ -21,5 +21,19 @@ public override string ToString() { return Description; } + + // Equals() is intentionally left as the default (reference equality) — this override only + // makes GetHashCode() cheap. Without it, every derived type (Feature and its subclasses, + // symbols, etc.) falls back to the CLR's identity hash, which a CPU profile showed + // contributing real self-time when these objects are used as Dictionary/HashSet keys (e.g. + // FeatureStruct._definite's Dictionary, rebuilt on every unify output). + // _id is immutable and set once at construction, so hashing on it changes nothing about + // which objects compare equal: two objects Equal by the untouched reference-equality Equals + // are the same instance, hence share the same _id, hence the same hash — the + // Equals/GetHashCode contract holds trivially. + public override int GetHashCode() + { + return _id == null ? 0 : _id.GetHashCode(); + } } } diff --git a/src/SIL.Machine/FeatureModel/FeatureStruct.cs b/src/SIL.Machine/FeatureModel/FeatureStruct.cs index 8fe64c34c..cc9c083a7 100644 --- a/src/SIL.Machine/FeatureModel/FeatureStruct.cs +++ b/src/SIL.Machine/FeatureModel/FeatureStruct.cs @@ -2,8 +2,8 @@ using System.Collections.Generic; using System.Linq; using System.Text; +using System.Threading; using SIL.Extensions; -using SIL.Machine.DataStructures; using SIL.Machine.FeatureModel.Fluent; using SIL.ObjectModel; @@ -51,15 +51,44 @@ public static IFeatureStructSyntax NewMutable(FeatureSystem featSys, FeatureStru return new FeatureStructBuilder(featSys, fs.Clone(), true); } - private readonly IDBearerDictionary _definite; + // Plain Dictionary rather than IDBearerDictionary: the latter kept a *second* parallel + // Dictionary to serve string-ID lookups, doubling the dictionary + // allocation on every unify-output / COW-inflation. String-ID lookups are rare (cold external + // API) so they now scan _definite by Feature.ID instead (see TryGetValueById/ContainsKeyById). + private Dictionary _definite; private int? _hashCode; + /// + /// On/off switch for the bit-packed flat-vector unify fast path. Default on; internal so a + /// test can flip it to verify parity against the original unification engine. Not part of + /// the public API. + /// + internal static bool FlatUnifyEnabled = true; + + // Bit-packed flat unify vector, computed lazily and cached (reset on mutation): + // _flatBits[feature.FlatIndex] = allowed-symbol bits (present) or ~0UL (absent = unconstrained). + // _flatState: 0 = not computed, 1 = computed. + // _flatComplete: every feature was bit-packable -> safe to use as the *constraint* (arc input). + // _flatSafeSegment: every NON-packable feature is non-symbolic (string/complex), which a + // symbolic input can never constrain -> safe to use as the *segment* (extras are ignored). + private ulong[] _flatBits; + private byte _flatState; + private bool _flatComplete; + private bool _flatSafeSegment; + + // Copy-on-write: a clone of a FROZEN feature struct borrows the source's (immutable) + // backing dictionary instead of deep-copying it. _shared is true until the first + // mutation inflates a private copy; _sharedSource is the frozen FS we borrowed from + // (needed to seed the re-entrancy map on inflate so the deep copy matches a normal clone). + private bool _shared; + private FeatureStruct _sharedSource; + /// /// Initializes a new instance of the class. /// public FeatureStruct() { - _definite = new IDBearerDictionary(); + _definite = new Dictionary(); } protected FeatureStruct(FeatureStruct other) @@ -78,6 +107,14 @@ private FeatureStruct(FeatureStruct other, IDictionary /// Gets the features. /// @@ -172,7 +209,7 @@ public void AddValue(Feature feature, FeatureValue value) if (value == null) throw new ArgumentNullException("value"); - CheckFrozen(); + EnsureWritable(); _definite[feature] = value; } @@ -183,7 +220,7 @@ public void AddValue(IEnumerable path, FeatureValue value) if (value == null) throw new ArgumentNullException("value"); - CheckFrozen(); + EnsureWritable(); Feature lastFeature; FeatureStruct lastFS; if (FollowPath(path, out lastFeature, out lastFS)) @@ -197,7 +234,7 @@ public void RemoveValue(Feature feature) if (feature == null) throw new ArgumentNullException("feature"); - CheckFrozen(); + EnsureWritable(); _definite.Remove(feature); } @@ -206,7 +243,7 @@ public void RemoveValue(IEnumerable path) if (path == null) throw new ArgumentNullException("path"); - CheckFrozen(); + EnsureWritable(); Feature lastFeature; FeatureStruct lastFS; if (FollowPath(path, out lastFeature, out lastFS)) @@ -217,7 +254,7 @@ public void RemoveValue(IEnumerable path) public void ReplaceVariables(VariableBindings varBindings) { - CheckFrozen(); + EnsureWritable(); ReplaceVariables(varBindings, new HashSet()); } @@ -254,7 +291,7 @@ private void ReplaceVariables(VariableBindings varBindings, ISet public void RemoveVariables() { - CheckFrozen(); + EnsureWritable(); RemoveVariables(new HashSet()); } @@ -293,7 +330,7 @@ public void PriorityUnion(FeatureStruct other, VariableBindings varBindings) if (other == null) throw new ArgumentNullException("other"); - CheckFrozen(); + EnsureWritable(); PriorityUnion(other, varBindings, new Dictionary()); } @@ -377,7 +414,7 @@ public void Union(FeatureStruct other, VariableBindings varBindings) if (other == null) throw new ArgumentNullException("other"); - CheckFrozen(); + EnsureWritable(); UnionImpl(other, varBindings, new Dictionary>()); } @@ -423,7 +460,7 @@ public void Add(FeatureStruct other, VariableBindings varBindings) if (other == null) throw new ArgumentNullException("other"); - CheckFrozen(); + EnsureWritable(); AddImpl(other, varBindings, new Dictionary>()); } @@ -477,7 +514,7 @@ public void Subtract(FeatureStruct other, VariableBindings varBindings) if (other == null) throw new ArgumentNullException("other"); - CheckFrozen(); + EnsureWritable(); SubtractImpl(other, varBindings, new Dictionary>()); } @@ -513,7 +550,7 @@ IDictionary> visited public void Clear() { - CheckFrozen(); + EnsureWritable(); _definite.Clear(); } @@ -667,12 +704,42 @@ public bool TryGetValue(string featureID, out T value) throw new ArgumentNullException("featureID"); FeatureValue val; - if (_definite.TryGetValue(featureID, out val)) + if (TryGetValueById(_definite, featureID, out val)) return Dereference(val, out value); value = null; return false; } + // String-ID lookups over the plain _definite dictionary (replaces the dropped parallel + // string-keyed dictionary). Feature IDs are unique within a struct, so first match wins. + private static bool TryGetValueById( + Dictionary definite, + string id, + out FeatureValue value + ) + { + foreach (KeyValuePair kvp in definite) + { + if (kvp.Key.ID == id) + { + value = kvp.Value; + return true; + } + } + value = null; + return false; + } + + private static bool ContainsKeyById(Dictionary definite, string id) + { + foreach (KeyValuePair kvp in definite) + { + if (kvp.Key.ID == id) + return true; + } + return false; + } + public bool TryGetValue(IEnumerable path, out T value) where T : FeatureValue { @@ -702,7 +769,7 @@ public bool TryGetValue(IEnumerable path, out T value) if (FollowPath(path, out lastID, out lastFS)) { FeatureValue val; - if (lastFS._definite.TryGetValue(lastID, out val)) + if (TryGetValueById(lastFS._definite, lastID, out val)) return Dereference(val, out value); } value = null; @@ -722,7 +789,7 @@ public bool ContainsFeature(string featureID) if (featureID == null) throw new ArgumentNullException("featureID"); - return _definite.ContainsKey(featureID); + return ContainsKeyById(_definite, featureID); } public bool ContainsFeature(IEnumerable path) @@ -745,7 +812,7 @@ public bool ContainsFeature(IEnumerable path) string lastID; FeatureStruct lastFS; if (FollowPath(path, out lastID, out lastFS)) - return lastFS._definite.ContainsKey(lastID); + return ContainsKeyById(lastFS._definite, lastID); return false; } @@ -758,7 +825,7 @@ private bool FollowPath(IEnumerable path, out string lastID, out Feature if (lastID != null) { FeatureValue curValue; - if (!lastFS._definite.TryGetValue(lastID, out curValue) || !Dereference(curValue, out lastFS)) + if (!TryGetValueById(lastFS._definite, lastID, out curValue) || !Dereference(curValue, out lastFS)) { lastID = null; lastFS = null; @@ -793,6 +860,94 @@ private bool FollowPath(IEnumerable path, out Feature lastFeature, out return true; } + // Builds (once, on a frozen struct) the bit-packed flat unify vector. _flatState becomes + // 1 (Simple: vector valid) only if every feature is a flat-indexed symbolic feature with a + // non-empty ulong value and no variable; otherwise 2 (Complex: must use the slow path). + private void EnsureFlat() + { + // Volatile.Read/Write instead of a lock: frozen structs are read concurrently from every + // parallel FST traversal thread (Input.Matches -> TryFastUnifiable), and a plain field + // write here would let one thread observe _flatState==1 before _flatBits' array + // reference is visible (store reordering), reading a stale/null array. Redundant + // concurrent computation is harmless (deterministic, frozen input) and cheaper than a + // lock; only the publish order needs the release/acquire fence. + if (Volatile.Read(ref _flatState) != 0) + return; + int maxIdx = -1; + bool complete = true; // all features bit-packable (usable as a constraint/input) + bool safeSegment = true; // every non-packable feature is non-symbolic (ignorable in a segment) + foreach (KeyValuePair featVal in _definite) + { + if ( + featVal.Key is SymbolicFeature sf + && sf.FlatIndex >= 0 + && Dereference(featVal.Value) is SymbolicFeatureValue sv + && sv.TryGetFlatBits(out _) + ) + { + if (sf.FlatIndex > maxIdx) + maxIdx = sf.FlatIndex; + } + else + { + complete = false; + // A symbolic-but-unpackable feature (variable/empty/>64 symbols) CAN be + // constrained by a symbolic input, so it can't be safely ignored in a segment. + if (featVal.Key is SymbolicFeature) + safeSegment = false; + } + } + var arr = new ulong[maxIdx + 1]; + for (int i = 0; i <= maxIdx; i++) + arr[i] = ulong.MaxValue; // absent feature = unconstrained + foreach (KeyValuePair featVal in _definite) + { + if ( + featVal.Key is SymbolicFeature sf + && sf.FlatIndex >= 0 + && Dereference(featVal.Value) is SymbolicFeatureValue sv + && sv.TryGetFlatBits(out ulong bits) + ) + { + arr[sf.FlatIndex] = bits; + } + } + _flatBits = arr; + _flatComplete = complete; + _flatSafeSegment = safeSegment; + Volatile.Write(ref _flatState, 1); + } + + // Bit-packed unifiability fast path. Returns false (not handled) when either struct isn't a + // frozen, Simple symbolic struct; otherwise sets result and returns true. Provably identical + // to IsUnifiable(other, useDefaults:false, varBindings:null) for the simple/no-variable case: + // a feature absent on either side is ~0 (the "no constraint" branch), and overlap == unifiable. + // this = the segment being matched; other = the arc-input constraint. + internal bool TryFastUnifiable(FeatureStruct other, out bool result) + { + result = false; + if (!FlatUnifyEnabled) + return false; + EnsureFlat(); + other.EnsureFlat(); + // The constraint (input) must be fully bit-packed; the segment may carry extra + // non-symbolic features the symbolic input can't constrain (so they're ignorable). + if (!other._flatComplete || !_flatSafeSegment) + return false; + ulong[] a = _flatBits; + ulong[] b = other._flatBits; + int n = a.Length > b.Length ? a.Length : b.Length; + for (int i = 0; i < n; i++) + { + ulong av = i < a.Length ? a[i] : ulong.MaxValue; + ulong bv = i < b.Length ? b[i] : ulong.MaxValue; + if ((av & bv) == 0) + return true; // result already false: a feature has no common symbol + } + result = true; + return true; + } + public bool IsUnifiable(FeatureStruct other) { return IsUnifiable(other, false); @@ -1099,6 +1254,10 @@ internal override void FindReentrances(IDictionary reentranc public new FeatureStruct Clone() { + // A clone of a frozen FS borrows its immutable backing (copy-on-write); a clone of an + // unfrozen FS must be an independent deep copy, since the caller may mutate both. + if (IsFrozen) + return new FeatureStruct(this, sharedClone: true); return new FeatureStruct(this); } @@ -1188,10 +1347,25 @@ public override bool ValueEquals(FeatureValue other) public bool IsFrozen { get; private set; } - private void CheckFrozen() + // Guards every mutation. Frozen structs stay immutable (throw). A copy-on-write shell + // that is still borrowing a frozen backing inflates a private deep copy first, so neither + // this struct's mutation nor any recursion into its children can touch shared frozen data. + private void EnsureWritable() { if (IsFrozen) throw new InvalidOperationException("The feature structure is immutable."); + // Any mutation invalidates the cached flat unify vector. + _flatState = 0; + _flatBits = null; + if (!_shared) + return; + var copies = new Dictionary { [_sharedSource] = this }; + var owned = new Dictionary(); + foreach (KeyValuePair featVal in _definite) + owned[featVal.Key] = Dereference(featVal.Value).CloneImpl(copies); + _definite = owned; + _shared = false; + _sharedSource = null; } public void Freeze() @@ -1211,7 +1385,13 @@ internal override int FreezeImpl(ISet visited) IsFrozen = true; int code = 23; - foreach (KeyValuePair kvp in _definite.OrderBy(kvp => kvp.Key.ID)) + // Ordinal, not the LINQ default (culture-aware, CompareInfo.Compare): feature IDs are + // opaque grammar identifiers, not user-facing text, and a CPU profile showed the + // culture-aware comparison contributing real self-time on this hot per-Freeze() sort + // (needed only for a deterministic hash — Dictionary iteration order isn't guaranteed). + foreach ( + KeyValuePair kvp in _definite.OrderBy(kvp => kvp.Key.ID, StringComparer.Ordinal) + ) { code = code * 31 + kvp.Key.GetHashCode(); FeatureValue value = Dereference(kvp.Value); @@ -1255,7 +1435,12 @@ internal override string ToStringImpl(ISet visited, IDictionary 0) sb.Append("["); - foreach (KeyValuePair kvp in _definite.OrderBy(kvp => kvp.Key.Description)) + foreach ( + KeyValuePair kvp in _definite.OrderBy( + kvp => kvp.Key.Description, + StringComparer.Ordinal + ) + ) { FeatureValue value = Dereference(kvp.Value); if (!firstFeature) diff --git a/src/SIL.Machine/FeatureModel/FeatureValue.cs b/src/SIL.Machine/FeatureModel/FeatureValue.cs index 19d18ebe9..c345f7429 100644 --- a/src/SIL.Machine/FeatureModel/FeatureValue.cs +++ b/src/SIL.Machine/FeatureModel/FeatureValue.cs @@ -5,8 +5,27 @@ namespace SIL.Machine.FeatureModel { public abstract class FeatureValue : ICloneable { + // Equals() is intentionally left as the default (reference equality) — every subclass + // (FeatureStruct, SimpleFeatureValue, ...) is tracked by IDENTITY in the visited-node + // dictionaries/sets used throughout unification (e.g. AddImpl/UnionImpl's + // IDictionary, CloneImpl's IDictionary): + // structurally-identical-but-distinct instances must stay distinct nodes during a graph + // traversal, so content-based equality here would be a correctness bug. This override only + // makes GetHashCode() cheap: a CPU profile showed the CLR's default identity hash (assigning + // a sync-block hash code on first use) dominating self-time, driven by these dictionaries — + // FeatureStruct instances are created on nearly every clone/unify-output. _id is a + // construction-order sequence number, unique and stable for the instance's lifetime, so it + // changes nothing about which objects compare equal (still exactly reference equality). + private static int NextId; + private readonly int _id = System.Threading.Interlocked.Increment(ref NextId); + internal FeatureValue Forward { get; set; } + public override int GetHashCode() + { + return _id; + } + internal abstract bool UnionImpl( FeatureValue other, VariableBindings varBindings, diff --git a/src/SIL.Machine/FeatureModel/StringFeatureValue.cs b/src/SIL.Machine/FeatureModel/StringFeatureValue.cs index ae05be2a9..3772efffd 100644 --- a/src/SIL.Machine/FeatureModel/StringFeatureValue.cs +++ b/src/SIL.Machine/FeatureModel/StringFeatureValue.cs @@ -234,7 +234,11 @@ protected override int GetValuesHashCode() { int code = base.GetValuesHashCode(); code = code * 31 + Not.GetHashCode(); - code = code * 31 + _values.OrderBy(str => str).GetSequenceHashCode(); + // Ordinal: these are opaque grammar string-feature values, not user-facing text, and this + // hash is computed on the Freeze() hot path (a CPU profile showed the culture-aware default + // contributing real self-time via CompareInfo.Compare — same class of fix as FeatureStruct's + // OrderBy sites). + code = code * 31 + _values.OrderBy(str => str, StringComparer.Ordinal).GetSequenceHashCode(); return code; } diff --git a/src/SIL.Machine/FeatureModel/SymbolicFeature.cs b/src/SIL.Machine/FeatureModel/SymbolicFeature.cs index 4fad07741..bee22db64 100644 --- a/src/SIL.Machine/FeatureModel/SymbolicFeature.cs +++ b/src/SIL.Machine/FeatureModel/SymbolicFeature.cs @@ -7,6 +7,29 @@ public class SymbolicFeature : Feature { private readonly PossibleSymbolCollection _possibleSymbols; + // Process-wide counter for globally-unique flat indices (see FlatIndex). + private static int NextFlatIndex = -1; + private int _flatIndex = -1; + + /// + /// Globally-unique dense index used to place this feature's allowed-symbol bits in a + /// FeatureStruct's flat unify vector. Assigned lazily and once (so it works regardless of + /// whether/when the owning FeatureSystem is frozen — loaded grammars don't always freeze it). + /// Returns -1 for features with > 64 symbols, which forces the slow unification path. + /// + internal int FlatIndex + { + get + { + if (_flatIndex < 0 && _possibleSymbols.Count <= sizeof(ulong) * 8) + { + int idx = System.Threading.Interlocked.Increment(ref NextFlatIndex); + System.Threading.Interlocked.CompareExchange(ref _flatIndex, idx, -1); + } + return _flatIndex; + } + } + public SymbolicFeature(string id, params FeatureSymbol[] possibleSymbols) : this(id, (IEnumerable)possibleSymbols) { } diff --git a/src/SIL.Machine/FeatureModel/SymbolicFeatureValue.cs b/src/SIL.Machine/FeatureModel/SymbolicFeatureValue.cs index 724911e0a..480bfe87c 100644 --- a/src/SIL.Machine/FeatureModel/SymbolicFeatureValue.cs +++ b/src/SIL.Machine/FeatureModel/SymbolicFeatureValue.cs @@ -94,6 +94,18 @@ public IEnumerable Values get { return _feature.PossibleSymbols.Where(_flags.Get); } } + // For the flat bit-packed unify fast path: this value's allowed symbols as a raw ulong + // bitset. Returns false (forcing the slow path) for variables or non-ulong (>64 symbol) + // backing, or an empty set (so a fs-only empty value can't be wrongly skipped). + internal bool TryGetFlatBits(out ulong bits) + { + bits = 0; + if (IsVariable || !(_flags is UlongSymbolicFeatureValueFlags ulong_flags)) + return false; + bits = ulong_flags.RawFlags; + return bits != 0; + } + public bool IsSupersetOf(SymbolicFeatureValue other, bool notOther = false) { return IsSupersetOf(false, other, notOther); diff --git a/src/SIL.Machine/FeatureModel/UlongSymbolicFeatureValueFlags.cs b/src/SIL.Machine/FeatureModel/UlongSymbolicFeatureValueFlags.cs index bdb1596c4..09f4d92f1 100644 --- a/src/SIL.Machine/FeatureModel/UlongSymbolicFeatureValueFlags.cs +++ b/src/SIL.Machine/FeatureModel/UlongSymbolicFeatureValueFlags.cs @@ -9,10 +9,20 @@ internal class UlongSymbolicFeatureValueFlags : ISymbolicFeatureValueFlags private readonly ulong _mask; private ulong _flags = 0; + /// The set of allowed symbols as a raw bitset (bit i = symbol with Index i). + internal ulong RawFlags => _flags; + public UlongSymbolicFeatureValueFlags(SymbolicFeature feature) { _feature = feature; - _mask = (1UL << feature.PossibleSymbols.Count) - 1UL; + int count = feature.PossibleSymbols.Count; + // A feature with exactly 64 symbols occupies bits 0..63 (the whole ulong). Computing + // the mask as `(1UL << count) - 1` would be wrong here: C# masks a ulong shift count to + // its low 6 bits, so `1UL << 64` == `1UL << 0` == 1, giving _mask == 0 — which silently + // breaks every mask-dependent op (HasAllSet, negation, and the `not`/`notOther` branches + // of IsSupersetOf/Overlaps/IntersectWith/UnionWith/ExceptWith/Not). The dispatch guard in + // SymbolicFeatureValue.CreateFlags admits counts up to 64, so this boundary is reachable. + _mask = count >= 64 ? ulong.MaxValue : (1UL << count) - 1UL; } private UlongSymbolicFeatureValueFlags(SymbolicFeature feature, ulong mask, ulong flags) diff --git a/src/SIL.Machine/FiniteState/DeterministicFsaTraversalMethod.cs b/src/SIL.Machine/FiniteState/DeterministicFsaTraversalMethod.cs index 5470fa689..577268e58 100644 --- a/src/SIL.Machine/FiniteState/DeterministicFsaTraversalMethod.cs +++ b/src/SIL.Machine/FiniteState/DeterministicFsaTraversalMethod.cs @@ -1,6 +1,5 @@ using System.Collections.Generic; using SIL.Machine.Annotations; -using SIL.Machine.FeatureModel; namespace SIL.Machine.FiniteState { @@ -8,19 +7,12 @@ internal class DeterministicFsaTraversalMethod : TraversalMethodBase> where TData : IAnnotatedData { - public DeterministicFsaTraversalMethod( - Fst fst, - TData data, - VariableBindings varBindings, - bool startAnchor, - bool endAnchor, - bool useDefaults - ) - : base(fst, data, varBindings, startAnchor, endAnchor, useDefaults) { } + public DeterministicFsaTraversalMethod(Fst fst) + : base(fst) { } - public override IEnumerable> Traverse( + public override List> Traverse( ref int annIndex, - Register[,] initRegisters, + Register[] initRegisters, IList initCmds, ISet initAnns ) @@ -75,23 +67,17 @@ protected override DeterministicFsaTraversalInstance CreateInsta private Stack> InitializeStack( ref int annIndex, - Register[,] registers, + Register[] registers, IList cmds, ISet initAnns ) { var instStack = new Stack>(); - foreach ( - DeterministicFsaTraversalInstance inst in Initialize( - ref annIndex, - registers, - cmds, - initAnns - ) - ) - { + List> insts = InitializeBuffer; + insts.Clear(); + Initialize(ref annIndex, registers, cmds, initAnns, insts); + foreach (DeterministicFsaTraversalInstance inst in insts) instStack.Push(inst); - } return instStack; } diff --git a/src/SIL.Machine/FiniteState/DeterministicFstTraversalInstance.cs b/src/SIL.Machine/FiniteState/DeterministicFstTraversalInstance.cs index fc8b85bdd..b6b94b23a 100644 --- a/src/SIL.Machine/FiniteState/DeterministicFstTraversalInstance.cs +++ b/src/SIL.Machine/FiniteState/DeterministicFstTraversalInstance.cs @@ -1,8 +1,6 @@ using System.Collections.Generic; -using System.Linq; using SIL.Extensions; using SIL.Machine.Annotations; -using SIL.Machine.DataStructures; namespace SIL.Machine.FiniteState { @@ -34,16 +32,11 @@ public override void CopyTo(TraversalInstance other) base.CopyTo(other); var otherDfst = (DeterministicFstTraversalInstance)other; - Dictionary, Annotation> outputMappings = Output - .Annotations.SelectMany(a => a.GetNodesBreadthFirst()) - .Zip(Output.Annotations.SelectMany(a => a.GetNodesBreadthFirst())) - .ToDictionary(t => t.Item1, t => t.Item2); - otherDfst.Mappings.AddRange( - _mappings.Select(kvp => new KeyValuePair, Annotation>( - kvp.Key, - outputMappings[kvp.Value] - )) - ); + // Identity map: the original zipped this.Output's node sequence with itself, so + // outputMappings[v] == v and the block reduces to copying _mappings unchanged. + // Avoids a Dictionary + two SelectMany(BFS) + Zip + Select per instance copy. + // Byte-identical; otherDfst.Mappings is empty here (GetCachedInstance -> Clear()). + otherDfst.Mappings.AddRange(_mappings); foreach (Annotation ann in _queue) otherDfst.Queue.Enqueue(ann); } diff --git a/src/SIL.Machine/FiniteState/DeterministicFstTraversalMethod.cs b/src/SIL.Machine/FiniteState/DeterministicFstTraversalMethod.cs index 534a2dcd1..10cee5e3b 100644 --- a/src/SIL.Machine/FiniteState/DeterministicFstTraversalMethod.cs +++ b/src/SIL.Machine/FiniteState/DeterministicFstTraversalMethod.cs @@ -1,6 +1,4 @@ using System.Collections.Generic; -using System.Linq; -using SIL.Extensions; using SIL.Machine.Annotations; using SIL.Machine.DataStructures; using SIL.Machine.FeatureModel; @@ -12,19 +10,12 @@ internal class DeterministicFstTraversalMethod : TraversalMethodBase> where TData : IAnnotatedData { - public DeterministicFstTraversalMethod( - Fst fst, - TData data, - VariableBindings varBindings, - bool startAnchor, - bool endAnchor, - bool useDefaults - ) - : base(fst, data, varBindings, startAnchor, endAnchor, useDefaults) { } + public DeterministicFstTraversalMethod(Fst fst) + : base(fst) { } - public override IEnumerable> Traverse( + public override List> Traverse( ref int annIndex, - Register[,] initRegisters, + Register[] initRegisters, IList initCmds, ISet initAnns ) @@ -137,28 +128,27 @@ Queue> queue private Stack> InitializeStack( ref int annIndex, - Register[,] registers, + Register[] registers, IList cmds, ISet initAnns ) { var instStack = new Stack>(); - foreach ( - DeterministicFstTraversalInstance inst in Initialize( - ref annIndex, - registers, - cmds, - initAnns - ) - ) + List> insts = InitializeBuffer; + insts.Clear(); + Initialize(ref annIndex, registers, cmds, initAnns, insts); + foreach (DeterministicFstTraversalInstance inst in insts) { inst.Output = ((ICloneable)Data).Clone(); - inst.Mappings.AddRange( - Data.Annotations.SelectMany(a => a.GetNodesBreadthFirst()) - .Zip( - inst.Output.Annotations.SelectMany(a => a.GetNodesBreadthFirst()), - (a1, a2) => new KeyValuePair, Annotation>(a1, a2) - ) + // Pair each source annotation with its clone via a lockstep preorder walk of the two + // isomorphic forests — same result as zipping the two BFS node sequences (dict order + // is irrelevant) but without the per-call Queue + SelectMany/Zip iterators + KVPs. + DataStructuresExtensions.PairedPreorderTraverse( + Data.Annotations, + inst.Output.Annotations, + inst.Mappings, + (mappings, a1, a2) => mappings[a1] = a2, + Direction.LeftToRight ); instStack.Push(inst); } diff --git a/src/SIL.Machine/FiniteState/Fst.cs b/src/SIL.Machine/FiniteState/Fst.cs index 04fb681c4..32f58c941 100644 --- a/src/SIL.Machine/FiniteState/Fst.cs +++ b/src/SIL.Machine/FiniteState/Fst.cs @@ -25,6 +25,16 @@ public class Fst : IFreezable private int _nextTag; private readonly Dictionary _groups; private readonly List _initializers; + + // Frozen-time partition of _initializers (see Freeze): the Dest!=0 commands are the per-call + // `cmds` list (read-only in traversal), and the Dest==0 commands drive the per-annotation + // SetOffset. Precomputing once at Freeze removes a List allocation + the + // filter loop from every Transduce call. Null until frozen → Transduce falls back to the + // inline build, so unfrozen callers are unaffected. Immutable after Freeze (the FST is shared + // read-only across parsing threads), so concurrent reads of the shared cmds list are safe. + private List _nonZeroDestInitializers; + private List _zeroDestInitializers; + private int _registerCount; private Direction _dir; private Func, bool> _filter; @@ -114,11 +124,11 @@ public bool IsAcceptor get { return _operations == null; } } - public bool GetOffsets(string groupName, Register[,] registers, out TOffset start, out TOffset end) + public bool GetOffsets(string groupName, Register[] registers, out TOffset start, out TOffset end) { int tag = _groups[groupName]; - Register startValue = registers[tag, 0]; - Register endValue = registers[tag + 1, 1]; + Register startValue = registers[tag * 2]; + Register endValue = registers[(tag + 1) * 2 + 1]; if ( startValue.HasOffset && endValue.HasOffset @@ -245,7 +255,7 @@ public IFstOperations Operations get { return _operations; } } - internal IEqualityComparer[,]> RegistersEqualityComparer + internal IEqualityComparer[]> RegistersEqualityComparer { get { return _registersEqualityComparer; } } @@ -312,83 +322,68 @@ private bool Transduce( out IEnumerable> results ) { - ITraversalMethod traversalMethod; - if (_operations != null) - { - if (IsDeterministic) - { - traversalMethod = new DeterministicFstTraversalMethod( - this, - data, - varBindings, - startAnchor, - endAnchor, - useDefaults - ); - } - else - { - traversalMethod = new NondeterministicFstTraversalMethod( - this, - data, - varBindings, - startAnchor, - endAnchor, - useDefaults - ); - } - } - else - { - if (IsDeterministic) - { - traversalMethod = new DeterministicFsaTraversalMethod( - this, - data, - varBindings, - startAnchor, - endAnchor, - useDefaults - ); - } - else - { - traversalMethod = new NondeterministicFsaTraversalMethod( - this, - data, - varBindings, - startAnchor, - endAnchor, - useDefaults - ); - } - } + // A fresh traversal method per Transduce call. Pooling it per-thread across a word was + // tried and reverted: the pooled method survives a Gen0 collection, promotes to Gen2, + // and the stop-the-world Gen2 serializes parallel parsing (see RUSTIFY Phase 1b). With + // allocation now driven down elsewhere, short-lived (die-in-Gen0) is the right tradeoff. + ITraversalMethod traversalMethod = CreateTraversalMethod(); + traversalMethod.Reset(data, varBindings, startAnchor, endAnchor, useDefaults); List> resultList = null; int annIndex = traversalMethod.Annotations.IndexOf(start); var initAnns = new HashSet(); + // Reuse the frozen-time initializer partition when available (the hot, shared-grammar + // path); fall back to building cmds inline for an unfrozen FST. + List nonZeroDestInit = _nonZeroDestInitializers; + // RUSTIFY lever 1: allocate the initial-register scaffold once and clear it per start + // position instead of `new Register[regCount,2]` every outer iteration. Traverse only ever + // Array.Copy's it into the initial instances (never retains it), so reuse-after-clear is + // byte-identical — and AllMatches (analysis) runs one iteration per start, so this removes + // (starts-1) register-array allocations per matcher call. + // Flat array (not Register[,]): a CPU profile showed rectangular-array allocation + // (Array.CreateInstanceMDArray) dominating self-time on this hot path — see TraversalInstance. + var initRegisters = new Register[_registerCount * 2]; + bool firstIteration = true; while (annIndex < traversalMethod.Annotations.Count && annIndex > -1) { - var initRegisters = new Register[_registerCount, 2]; + if (!firstIteration) + Array.Clear(initRegisters, 0, initRegisters.Length); + firstIteration = false; - var cmds = new List(); - foreach (TagMapCommand cmd in _initializers) + List cmds; + if (nonZeroDestInit != null) { - if (cmd.Dest == 0) + foreach (TagMapCommand cmd in _zeroDestInitializers) { - initRegisters[cmd.Dest, 0] + initRegisters[cmd.Dest * 2] .SetOffset(traversalMethod.Annotations[annIndex].Range.GetStart(_dir), true); } - else + cmds = nonZeroDestInit; + } + else + { + cmds = new List(); + foreach (TagMapCommand cmd in _initializers) { - cmds.Add(cmd); + if (cmd.Dest == 0) + { + initRegisters[cmd.Dest * 2] + .SetOffset(traversalMethod.Annotations[annIndex].Range.GetStart(_dir), true); + } + else + { + cmds.Add(cmd); + } } } - List> curResults = traversalMethod - .Traverse(ref annIndex, initRegisters, cmds, initAnns) - .ToList(); + List> curResults = traversalMethod.Traverse( + ref annIndex, + initRegisters, + cmds, + initAnns + ); if (curResults.Count > 0) { if (resultList == null) @@ -409,10 +404,31 @@ out IEnumerable> results return false; } - results = allMatches ? resultList.Distinct() : resultList; + // Distinct() materializes a lazy iterator + internal set every time it is enumerated; + // for 0/1 results there is nothing to dedupe (resultList is non-null with Count >= 1 + // here), so return the list directly and skip the iterator in that common case. + results = (allMatches && resultList.Count > 1) ? resultList.Distinct() : resultList; return true; } + private ITraversalMethod CreateTraversalMethod() + { + return CreateTraversalMethodCore(); + } + + private ITraversalMethod CreateTraversalMethodCore() + { + if (_operations != null) + { + return IsDeterministic + ? (ITraversalMethod)new DeterministicFstTraversalMethod(this) + : new NondeterministicFstTraversalMethod(this); + } + return IsDeterministic + ? (ITraversalMethod)new DeterministicFsaTraversalMethod(this) + : new NondeterministicFsaTraversalMethod(this); + } + private int ResultCompare(FstResult x, FstResult y) { int compare = x.Priority.CompareTo(y.Priority); @@ -2122,6 +2138,21 @@ public void Freeze() IsFrozen = true; foreach (State state in _states) state.Freeze(); + + // Partition the (now immutable) initializers once so Transduce reuses them instead of + // rebuilding the cmds list every call. Build into locals and publish the gating field + // (_nonZeroDestInitializers) last, so a reader never observes a partially filled list. + var zeroDest = new List(); + var nonZeroDest = new List(); + foreach (TagMapCommand cmd in _initializers) + { + if (cmd.Dest == 0) + zeroDest.Add(cmd); + else + nonZeroDest.Add(cmd); + } + _zeroDestInitializers = zeroDest; + _nonZeroDestInitializers = nonZeroDest; } public int GetFrozenHashCode() diff --git a/src/SIL.Machine/FiniteState/FstResult.cs b/src/SIL.Machine/FiniteState/FstResult.cs index 42fa9688f..ee202a840 100644 --- a/src/SIL.Machine/FiniteState/FstResult.cs +++ b/src/SIL.Machine/FiniteState/FstResult.cs @@ -7,8 +7,8 @@ namespace SIL.Machine.FiniteState { public class FstResult : IEquatable> { - private readonly IEqualityComparer[,]> _registersEqualityComparer; - private readonly Register[,] _registers; + private readonly IEqualityComparer[]> _registersEqualityComparer; + private readonly Register[] _registers; private readonly TData _output; private readonly VariableBindings _varBindings; private readonly string _id; @@ -19,9 +19,9 @@ public class FstResult : IEquatable> private readonly int _order; internal FstResult( - IEqualityComparer[,]> registersEqualityComparer, + IEqualityComparer[]> registersEqualityComparer, string id, - Register[,] registers, + Register[] registers, TData output, VariableBindings varBindings, int priority, @@ -48,7 +48,7 @@ public string ID get { return _id; } } - public Register[,] Registers + public Register[] Registers { get { return _registers; } } diff --git a/src/SIL.Machine/FiniteState/ITraversalMethod.cs b/src/SIL.Machine/FiniteState/ITraversalMethod.cs index d11dfab46..071171982 100644 --- a/src/SIL.Machine/FiniteState/ITraversalMethod.cs +++ b/src/SIL.Machine/FiniteState/ITraversalMethod.cs @@ -1,5 +1,6 @@ using System.Collections.Generic; using SIL.Machine.Annotations; +using SIL.Machine.FeatureModel; namespace SIL.Machine.FiniteState { @@ -7,9 +8,10 @@ internal interface ITraversalMethod where TData : IAnnotatedData { IList> Annotations { get; } - IEnumerable> Traverse( + void Reset(TData data, VariableBindings varBindings, bool startAnchor, bool endAnchor, bool useDefaults); + List> Traverse( ref int annIndex, - Register[,] initRegisters, + Register[] initRegisters, IList initCmds, ISet initAnns ); diff --git a/src/SIL.Machine/FiniteState/Input.cs b/src/SIL.Machine/FiniteState/Input.cs index 3fb4fc761..4f4304694 100644 --- a/src/SIL.Machine/FiniteState/Input.cs +++ b/src/SIL.Machine/FiniteState/Input.cs @@ -50,11 +50,46 @@ public bool Matches(FeatureStruct fs, bool unification, bool useDefaults, Variab { if (unification) { - return fs.IsUnifiable(_fs, useDefaults, varBindings) - && _negatedFSs.All(nfs => !fs.IsUnifiable(nfs, useDefaults)); + // Bit-packed fast path for the common phonological case (no defaults, no negation, + // both operands simple symbolic structs). Identical result, no varBindings clone, + // no dictionary walk. Falls back to the full engine otherwise. + if (!useDefaults && _negatedFSs.Count == 0 && fs.TryFastUnifiable(_fs, out bool fastResult)) + return fastResult; + + if (!fs.IsUnifiable(_fs, useDefaults, varBindings)) + return false; + return NoneUnifiable(fs, useDefaults); } - return _fs.Subsumes(fs, useDefaults, varBindings) && _negatedFSs.All(nfs => !nfs.Subsumes(fs, useDefaults)); + return _fs.Subsumes(fs, useDefaults, varBindings) && NoneSubsumed(fs, useDefaults); + } + + // Explicit loops instead of `_negatedFSs.All(nfs => ...)`: the lambda's closure (capturing fs + // and useDefaults) and the boxed HashSet.Enumerator (via the IEnumerable extension-method + // path) were allocated on every call, even for the common case where _negatedFSs is empty. + // A plain `foreach` on the concrete HashSet reference uses its unboxed struct enumerator. + private bool NoneUnifiable(FeatureStruct fs, bool useDefaults) + { + if (_negatedFSs.Count == 0) + return true; + foreach (FeatureStruct nfs in _negatedFSs) + { + if (fs.IsUnifiable(nfs, useDefaults)) + return false; + } + return true; + } + + private bool NoneSubsumed(FeatureStruct fs, bool useDefaults) + { + if (_negatedFSs.Count == 0) + return true; + foreach (FeatureStruct nfs in _negatedFSs) + { + if (nfs.Subsumes(fs, useDefaults)) + return false; + } + return true; } public bool IsSatisfiable diff --git a/src/SIL.Machine/FiniteState/NondeterministicFsaTraversalInstance.cs b/src/SIL.Machine/FiniteState/NondeterministicFsaTraversalInstance.cs index 2f084aa9e..f87c72e73 100644 --- a/src/SIL.Machine/FiniteState/NondeterministicFsaTraversalInstance.cs +++ b/src/SIL.Machine/FiniteState/NondeterministicFsaTraversalInstance.cs @@ -1,4 +1,3 @@ -using System.Collections.Generic; using SIL.Machine.Annotations; namespace SIL.Machine.FiniteState @@ -6,24 +5,33 @@ namespace SIL.Machine.FiniteState internal class NondeterministicFsaTraversalInstance : TraversalInstance where TData : IAnnotatedData { - private readonly HashSet> _visited; + // RUSTIFY lever 1: a value-type bitset over state indices instead of a HashSet — no + // per-instance set allocation (the instance is created ~2,927x/word on Sena). + private VisitedStates _visited; public NondeterministicFsaTraversalInstance(int registerCount) - : base(registerCount, false) + : base(registerCount, false) { } + + public bool IsVisited(State state) { - _visited = new HashSet>(); + return _visited.Contains(state.Index); } - public ISet> Visited + public void MarkVisited(State state) { - get { return _visited; } + _visited.Add(state.Index); + } + + public void ClearVisited() + { + _visited.Clear(); } public override void CopyTo(TraversalInstance other) { base.CopyTo(other); var otherNfsa = (NondeterministicFsaTraversalInstance)other; - otherNfsa.Visited.UnionWith(_visited); + otherNfsa._visited.UnionWith(in _visited); } public override void Clear() diff --git a/src/SIL.Machine/FiniteState/NondeterministicFsaTraversalMethod.cs b/src/SIL.Machine/FiniteState/NondeterministicFsaTraversalMethod.cs index b5d3b3d5e..e2576e168 100644 --- a/src/SIL.Machine/FiniteState/NondeterministicFsaTraversalMethod.cs +++ b/src/SIL.Machine/FiniteState/NondeterministicFsaTraversalMethod.cs @@ -1,5 +1,4 @@ -using System; -using System.Collections.Generic; +using System.Collections.Generic; using SIL.Machine.Annotations; using SIL.Machine.FeatureModel; using SIL.ObjectModel; @@ -10,19 +9,21 @@ internal class NondeterministicFsaTraversalMethod : TraversalMethodBase> where TData : IAnnotatedData { - public NondeterministicFsaTraversalMethod( - Fst fst, - TData data, - VariableBindings varBindings, - bool startAnchor, - bool endAnchor, - bool useDefaults - ) - : base(fst, data, varBindings, startAnchor, endAnchor, useDefaults) { } + // Hoisted out of Traverse: building this per call allocated a comparer object plus two bound + // delegates (KeyEquals/KeyGetHashCode are instance methods) on every Traverse call — thousands + // per word. The comparer only closes over `this` (via Fst), so one instance is reusable for the + // life of this traversal method. + private readonly IEqualityComparer _traversalKeyComparer; - public override IEnumerable> Traverse( + public NondeterministicFsaTraversalMethod(Fst fst) + : base(fst) + { + _traversalKeyComparer = AnonymousEqualityComparer.Create(KeyEquals, KeyGetHashCode); + } + + public override List> Traverse( ref int annIndex, - Register[,] initRegisters, + Register[] initRegisters, IList initCmds, ISet initAnns ) @@ -35,12 +36,10 @@ ISet initAnns ); var curResults = new List>(); - var traversed = new HashSet, int, Register[,]>>( - AnonymousEqualityComparer.Create, int, Register[,]>>( - KeyEquals, - KeyGetHashCode - ) - ); + // The dedup key is a value type (was Tuple<,,>): the HashSet stores it inline in its slot + // array, so there is no per-push heap object — `traversed.Add` is the hottest allocation in + // nondeterministic traversal. Byte-identical equality/hash (same fields, same comparers). + var traversed = new HashSet(_traversalKeyComparer); while (instStack.Count != 0) { NondeterministicFsaTraversalInstance inst = instStack.Pop(); @@ -53,7 +52,7 @@ ISet initAnns bool isInstReusable = i == inst.State.Arcs.Count - 1; if (arc.Input.IsEpsilon) { - if (!inst.Visited.Contains(arc.Target)) + if (!inst.IsVisited(arc.Target)) { NondeterministicFsaTraversalInstance ti; if (isInstReusable) @@ -68,22 +67,15 @@ ISet initAnns ti.VariableBindings = varBindings; } - ti.Visited.Add(arc.Target); + ti.MarkVisited(arc.Target); NondeterministicFsaTraversalInstance newInst = EpsilonAdvance( ti, arc, curResults ); - Tuple, int, Register[,]> key = Tuple.Create( - newInst.State, - newInst.AnnotationIndex, - newInst.Registers - ); - if (!traversed.Contains(key)) - { + var key = new TraversalKey(newInst.State, newInst.AnnotationIndex, newInst.Registers); + if (traversed.Add(key)) instStack.Push(newInst); - traversed.Add(key); - } if (isInstReusable) releaseInstance = false; varBindings = null; @@ -108,17 +100,10 @@ NondeterministicFsaTraversalInstance newInst in Advance( ) ) { - newInst.Visited.Clear(); - Tuple, int, Register[,]> key = Tuple.Create( - newInst.State, - newInst.AnnotationIndex, - newInst.Registers - ); - if (!traversed.Contains(key)) - { + newInst.ClearVisited(); + var key = new TraversalKey(newInst.State, newInst.AnnotationIndex, newInst.Registers); + if (traversed.Add(key)) instStack.Push(newInst); - traversed.Add(key); - } } if (isInstReusable) releaseInstance = false; @@ -142,44 +127,52 @@ protected override NondeterministicFsaTraversalInstance CreateIn return new NondeterministicFsaTraversalInstance(Fst.RegisterCount); } - private bool KeyEquals( - Tuple, int, Register[,]> x, - Tuple, int, Register[,]> y - ) + // Value-type dedup key (was Tuple): stored inline in the `traversed` + // HashSet so a push no longer allocates a heap Tuple. Holds the instance's live Registers by + // reference exactly as the Tuple did (same reference + hash-at-Add semantics). + private readonly struct TraversalKey { - return x.Item1.Equals(y.Item1) - && x.Item2.Equals(y.Item2) - && Fst.RegistersEqualityComparer.Equals(x.Item3, y.Item3); + public readonly State State; + public readonly int AnnotationIndex; + public readonly Register[] Registers; + + public TraversalKey(State state, int annotationIndex, Register[] registers) + { + State = state; + AnnotationIndex = annotationIndex; + Registers = registers; + } + } + + private bool KeyEquals(TraversalKey x, TraversalKey y) + { + return x.State.Equals(y.State) + && x.AnnotationIndex.Equals(y.AnnotationIndex) + && Fst.RegistersEqualityComparer.Equals(x.Registers, y.Registers); } - private int KeyGetHashCode(Tuple, int, Register[,]> m) + private int KeyGetHashCode(TraversalKey m) { int code = 23; - code = code * 31 + m.Item1.GetHashCode(); - code = code * 31 + m.Item2.GetHashCode(); - code = code * 31 + Fst.RegistersEqualityComparer.GetHashCode(m.Item3); + code = code * 31 + m.State.GetHashCode(); + code = code * 31 + m.AnnotationIndex.GetHashCode(); + code = code * 31 + Fst.RegistersEqualityComparer.GetHashCode(m.Registers); return code; } private Stack> InitializeStack( ref int annIndex, - Register[,] registers, + Register[] registers, IList cmds, ISet initAnns ) { var instStack = new Stack>(); - foreach ( - NondeterministicFsaTraversalInstance inst in Initialize( - ref annIndex, - registers, - cmds, - initAnns - ) - ) - { + List> insts = InitializeBuffer; + insts.Clear(); + Initialize(ref annIndex, registers, cmds, initAnns, insts); + foreach (NondeterministicFsaTraversalInstance inst in insts) instStack.Push(inst); - } return instStack; } diff --git a/src/SIL.Machine/FiniteState/NondeterministicFstTraversalInstance.cs b/src/SIL.Machine/FiniteState/NondeterministicFstTraversalInstance.cs index 3583c6ccf..8ce17b7a2 100644 --- a/src/SIL.Machine/FiniteState/NondeterministicFstTraversalInstance.cs +++ b/src/SIL.Machine/FiniteState/NondeterministicFstTraversalInstance.cs @@ -1,29 +1,38 @@ using System.Collections.Generic; -using System.Linq; using SIL.Extensions; using SIL.Machine.Annotations; -using SIL.Machine.DataStructures; namespace SIL.Machine.FiniteState { internal class NondeterministicFstTraversalInstance : TraversalInstance where TData : IAnnotatedData { - private readonly HashSet> _visited; + // RUSTIFY lever 1: value-type bitset over state indices instead of a HashSet (no + // per-instance set allocation). + private VisitedStates _visited; private readonly Dictionary, Annotation> _mappings; private readonly List> _outputs; public NondeterministicFstTraversalInstance(int registerCount) : base(registerCount, false) { - _visited = new HashSet>(); _mappings = new Dictionary, Annotation>(); _outputs = new List>(); } - public ISet> Visited + public bool IsVisited(State state) { - get { return _visited; } + return _visited.Contains(state.Index); + } + + public void MarkVisited(State state) + { + _visited.Add(state.Index); + } + + public void ClearVisited() + { + _visited.Clear(); } public IDictionary, Annotation> Mappings @@ -42,17 +51,15 @@ public override void CopyTo(TraversalInstance other) var otherNfst = (NondeterministicFstTraversalInstance)other; - otherNfst._visited.UnionWith(_visited); - Dictionary, Annotation> outputMappings = Output - .Annotations.SelectMany(a => a.GetNodesBreadthFirst()) - .Zip(Output.Annotations.SelectMany(a => a.GetNodesBreadthFirst())) - .ToDictionary(t => t.Item1, t => t.Item2); - otherNfst._mappings.AddRange( - _mappings.Select(kvp => new KeyValuePair, Annotation>( - kvp.Key, - outputMappings[kvp.Value] - )) - ); + otherNfst._visited.UnionWith(in _visited); + // The original built `outputMappings` by zipping this.Output's node sequence with itself + // — a deterministic (Queue-based BFS) enumeration paired element-for-element, i.e. the + // identity map — so `outputMappings[v] == v` and the whole block reduces to copying + // _mappings unchanged. Doing that directly avoids a Dictionary + two SelectMany(BFS, + // each allocating a Queue + iterator) + Zip + Select per instance copy (very hot in + // nondeterministic traversal). Byte-identical; otherNfst._mappings is empty here + // (GetCachedInstance -> Clear()). + otherNfst._mappings.AddRange(_mappings); otherNfst._outputs.AddRange(_outputs); } diff --git a/src/SIL.Machine/FiniteState/NondeterministicFstTraversalMethod.cs b/src/SIL.Machine/FiniteState/NondeterministicFstTraversalMethod.cs index e171f4410..a4e1da9be 100644 --- a/src/SIL.Machine/FiniteState/NondeterministicFstTraversalMethod.cs +++ b/src/SIL.Machine/FiniteState/NondeterministicFstTraversalMethod.cs @@ -13,19 +13,12 @@ internal class NondeterministicFstTraversalMethod : TraversalMethodBase> where TData : IAnnotatedData { - public NondeterministicFstTraversalMethod( - Fst fst, - TData data, - VariableBindings varBindings, - bool startAnchor, - bool endAnchor, - bool useDefaults - ) - : base(fst, data, varBindings, startAnchor, endAnchor, useDefaults) { } + public NondeterministicFstTraversalMethod(Fst fst) + : base(fst) { } - public override IEnumerable> Traverse( + public override List> Traverse( ref int annIndex, - Register[,] initRegisters, + Register[] initRegisters, IList initCmds, ISet initAnns ) @@ -38,12 +31,11 @@ ISet initAnns ); var curResults = new List>(); - var traversed = new HashSet< - Tuple, int, Register[,], Output[]> - >( - AnonymousEqualityComparer.Create< - Tuple, int, Register[,], Output[]> - >(KeyEquals, KeyGetHashCode) + // Value-type dedup key (was Tuple<,,,>): stored inline in the HashSet, so a push no longer + // allocates a heap Tuple. The per-push Outputs snapshot array remains (the key must capture + // the outputs at push time, since the instance's Outputs list keeps growing afterward). + var traversed = new HashSet( + AnonymousEqualityComparer.Create(KeyEquals, KeyGetHashCode) ); while (instStack.Count != 0) { @@ -57,7 +49,7 @@ ISet initAnns bool isInstReusable = i == inst.State.Arcs.Count - 1; if (arc.Input.IsEpsilon) { - if (!inst.Visited.Contains(arc.Target)) + if (!inst.IsVisited(arc.Target)) { NondeterministicFstTraversalInstance ti; if (isInstReusable) @@ -79,24 +71,23 @@ ISet initAnns ti.Outputs.Add(arc.Outputs[0]); } - ti.Visited.Add(arc.Target); + ti.MarkVisited(arc.Target); NondeterministicFstTraversalInstance newInst = EpsilonAdvance( inst, arc, curResults ); - Tuple, int, Register[,], Output[]> key = - Tuple.Create( - newInst.State, - newInst.AnnotationIndex, - newInst.Registers, - newInst.Outputs.ToArray() - ); - if (!traversed.Contains(key)) - { + var key = new TraversalKey( + newInst.State, + newInst.AnnotationIndex, + newInst.Registers, + newInst.Outputs.ToArray() + ); + // Add returns false if already present; this single hash/lookup replaces + // the Contains-then-Add pair (the structural key hash over registers + + // outputs is expensive and this is the innermost traversal loop). + if (traversed.Add(key)) instStack.Push(newInst); - traversed.Add(key); - } if (isInstReusable) releaseInstance = false; varBindings = null; @@ -128,19 +119,16 @@ NondeterministicFstTraversalInstance newInst in Advance( ) ) { - newInst.Visited.Clear(); - Tuple, int, Register[,], Output[]> key = - Tuple.Create( - newInst.State, - newInst.AnnotationIndex, - newInst.Registers, - newInst.Outputs.ToArray() - ); - if (!traversed.Contains(key)) - { + newInst.ClearVisited(); + var key = new TraversalKey( + newInst.State, + newInst.AnnotationIndex, + newInst.Registers, + newInst.Outputs.ToArray() + ); + // Single hash/lookup (Add returns false if present) — see note above. + if (traversed.Add(key)) instStack.Push(newInst); - traversed.Add(key); - } } if (isInstReusable) releaseInstance = false; @@ -164,51 +152,71 @@ protected override NondeterministicFstTraversalInstance CreateIn return new NondeterministicFstTraversalInstance(Fst.RegisterCount); } - private bool KeyEquals( - Tuple, int, Register[,], Output[]> x, - Tuple, int, Register[,], Output[]> y - ) + // Value-type dedup key (was Tuple): stored inline in the + // `traversed` HashSet so a push no longer allocates a heap Tuple. Holds the instance's live + // Registers by reference and a snapshot of its Outputs, exactly as the Tuple did. + private readonly struct TraversalKey { - return x.Item1.Equals(y.Item1) - && x.Item2.Equals(y.Item2) - && Fst.RegistersEqualityComparer.Equals(x.Item3, y.Item3) - && x.Item4.SequenceEqual(y.Item4); + public readonly State State; + public readonly int AnnotationIndex; + public readonly Register[] Registers; + public readonly Output[] Outputs; + + public TraversalKey( + State state, + int annotationIndex, + Register[] registers, + Output[] outputs + ) + { + State = state; + AnnotationIndex = annotationIndex; + Registers = registers; + Outputs = outputs; + } } - private int KeyGetHashCode(Tuple, int, Register[,], Output[]> m) + private bool KeyEquals(TraversalKey x, TraversalKey y) + { + return x.State.Equals(y.State) + && x.AnnotationIndex.Equals(y.AnnotationIndex) + && Fst.RegistersEqualityComparer.Equals(x.Registers, y.Registers) + && x.Outputs.SequenceEqual(y.Outputs); + } + + private int KeyGetHashCode(TraversalKey m) { int code = 23; - code = code * 31 + m.Item1.GetHashCode(); - code = code * 31 + m.Item2.GetHashCode(); - code = code * 31 + Fst.RegistersEqualityComparer.GetHashCode(m.Item3); - code = code * 31 + m.Item4.GetSequenceHashCode(); + code = code * 31 + m.State.GetHashCode(); + code = code * 31 + m.AnnotationIndex.GetHashCode(); + code = code * 31 + Fst.RegistersEqualityComparer.GetHashCode(m.Registers); + code = code * 31 + m.Outputs.GetSequenceHashCode(); return code; } private Stack> InitializeStack( ref int annIndex, - Register[,] registers, + Register[] registers, IList cmds, ISet initAnns ) { var instStack = new Stack>(); - foreach ( - NondeterministicFstTraversalInstance inst in Initialize( - ref annIndex, - registers, - cmds, - initAnns - ) - ) + List> insts = InitializeBuffer; + insts.Clear(); + Initialize(ref annIndex, registers, cmds, initAnns, insts); + foreach (NondeterministicFstTraversalInstance inst in insts) { inst.Output = ((ICloneable)Data).Clone(); - inst.Mappings.AddRange( - Data.Annotations.SelectMany(a => a.GetNodesBreadthFirst()) - .Zip( - inst.Output.Annotations.SelectMany(a => a.GetNodesBreadthFirst()), - (a1, a2) => new KeyValuePair, Annotation>(a1, a2) - ) + // Pair each source annotation with its clone via a lockstep preorder walk of the two + // isomorphic forests — same result as zipping the two BFS node sequences (dict order + // is irrelevant) but without the per-call Queue + SelectMany/Zip iterators + KVPs. + DataStructuresExtensions.PairedPreorderTraverse( + Data.Annotations, + inst.Output.Annotations, + inst.Mappings, + (mappings, a1, a2) => mappings[a1] = a2, + Direction.LeftToRight ); instStack.Push(inst); } diff --git a/src/SIL.Machine/FiniteState/RegistersEqualityComparer.cs b/src/SIL.Machine/FiniteState/RegistersEqualityComparer.cs index 5337cab37..36aab51d3 100644 --- a/src/SIL.Machine/FiniteState/RegistersEqualityComparer.cs +++ b/src/SIL.Machine/FiniteState/RegistersEqualityComparer.cs @@ -1,45 +1,60 @@ -using System.Collections.Generic; +using System.Collections.Generic; namespace SIL.Machine.FiniteState { - internal class RegistersEqualityComparer : IEqualityComparer[,]> + // Registers is a flat Register[] of length 2*registerCount (see RUSTIFY MD-array note + // on TraversalInstance/Fst.Transduce): index i's pair lives at [2*i] (start) / [2*i+1] (end). + internal class RegistersEqualityComparer : IEqualityComparer[]> { private readonly IEqualityComparer _offsetEqualityComparer; + // Devirtualizes the common case: EqualityComparer.Default.Equals/GetHashCode are + // JIT-inlined for a value-type TOffset (HermitCrab's int), so skip the interface-dispatch + // field entirely when the caller passed the default comparer. + private readonly bool _isDefault; + public RegistersEqualityComparer(IEqualityComparer offsetEqualityComparer) { _offsetEqualityComparer = offsetEqualityComparer; + _isDefault = ReferenceEquals(offsetEqualityComparer, EqualityComparer.Default); } - public bool Equals(Register[,] x, Register[,] y) + public bool Equals(Register[] x, Register[] y) { - for (int i = 0; i < x.GetLength(0); i++) + for (int i = 0; i < x.Length; i++) { - for (int j = 0; j < 2; j++) - { - if (!x[i, j].ValueEquals(y[i, j], _offsetEqualityComparer)) - return false; - } + if (!RegisterEquals(x[i], y[i])) + return false; } return true; } - public int GetHashCode(Register[,] obj) + private bool RegisterEquals(Register x, Register y) + { + return _isDefault + ? x.ValueEquals(y, EqualityComparer.Default) + : x.ValueEquals(y, _offsetEqualityComparer); + } + + public int GetHashCode(Register[] obj) { int code = 23; - for (int i = 0; i < obj.GetLength(0); i++) + for (int i = 0; i < obj.Length; i++) { - for (int j = 0; j < 2; j++) + if (obj[i].HasOffset) + { + code = + code * 31 + + ( + _isDefault + ? EqualityComparer.Default.GetHashCode(obj[i].Offset) + : _offsetEqualityComparer.GetHashCode(obj[i].Offset) + ); + code = code * 31 + obj[i].IsStart.GetHashCode(); + } + else { - if (obj[i, j].HasOffset) - { - code = code * 31 + _offsetEqualityComparer.GetHashCode(obj[i, j].Offset); - code = code * 31 + obj[i, j].IsStart.GetHashCode(); - } - else - { - code = code * 31 + 0; - } + code = code * 31 + 0; } } return code; diff --git a/src/SIL.Machine/FiniteState/State.cs b/src/SIL.Machine/FiniteState/State.cs index f01aa3959..ce7676c0c 100644 --- a/src/SIL.Machine/FiniteState/State.cs +++ b/src/SIL.Machine/FiniteState/State.cs @@ -96,6 +96,18 @@ public override string ToString() return string.Format("State {0}", _index); } + // Without this override, GetHashCode() falls back to the CLR's default identity hash + // (RuntimeHelpers.GetHashCode's sync-block-index path) — a CPU profile showed that call + // dominating self-time on the hot nondeterministic-traversal dedup path (TraversalKey's + // hash folds in State.GetHashCode() once per pushed instance). _index is a stable, + // already-unique-per-Fst int assigned once at construction, so it is a valid, far cheaper + // hash; Equals() is intentionally left as reference equality (state objects are singletons + // within their Fst, never recreated), so the Equals/GetHashCode contract still holds. + public override int GetHashCode() + { + return _index; + } + private void CheckFrozen() { if (IsFrozen) diff --git a/src/SIL.Machine/FiniteState/TraversalInstance.cs b/src/SIL.Machine/FiniteState/TraversalInstance.cs index 100f728d5..3e6b27e68 100644 --- a/src/SIL.Machine/FiniteState/TraversalInstance.cs +++ b/src/SIL.Machine/FiniteState/TraversalInstance.cs @@ -10,12 +10,17 @@ namespace SIL.Machine.FiniteState internal abstract class TraversalInstance where TData : IAnnotatedData { - private readonly Register[,] _registers; + // Flat (SZ, single-dimension zero-lower-bound) array instead of Register[,]: the + // CLR allocates rectangular (multi-dim) arrays through the general-purpose + // Array.CreateInstanceMDArray runtime helper, which a CPU profile showed dominating + // self-time on this hot path — SZ arrays get the JIT-inlined fast allocation path instead. + // Index i's (start, end) pair lives at [2*i] / [2*i+1]. + private readonly Register[] _registers; private readonly List _priorities; protected TraversalInstance(int registerCount, bool deterministic) { - _registers = new Register[registerCount, 2]; + _registers = new Register[registerCount * 2]; if (!deterministic) _priorities = new List(); } @@ -29,7 +34,7 @@ public IList Priorities get { return _priorities; } } - public Register[,] Registers + public Register[] Registers { get { return _registers; } } diff --git a/src/SIL.Machine/FiniteState/TraversalMethodBase.cs b/src/SIL.Machine/FiniteState/TraversalMethodBase.cs index c5934d991..fb7a969e7 100644 --- a/src/SIL.Machine/FiniteState/TraversalMethodBase.cs +++ b/src/SIL.Machine/FiniteState/TraversalMethodBase.cs @@ -13,54 +13,112 @@ internal abstract class TraversalMethodBase : ITraversalM where TInst : TraversalInstance { private readonly Fst _fst; - private readonly TData _data; - private readonly VariableBindings _varBindings; - private readonly bool _startAnchor; - private readonly bool _endAnchor; - private readonly bool _useDefaults; - private readonly List> _annotations; + private TData _data; + private VariableBindings _varBindings; + private bool _startAnchor; + private bool _endAnchor; + private bool _useDefaults; + + // Either this method's own scratch list (built by Reset) or a shared filtered view cached on + // a frozen AnnotationList (see Reset). When shared (_annotationsShared), it must never be + // mutated — traversal only reads it after Reset, so the only guarded site is Reset's Clear(). + private List> _annotations; + private bool _annotationsShared; + + // Instance free-list, kept across Reset() calls so a traversal method pooled for the + // duration of one word (see Fst.Transduce + Morpher per-word reset) reuses instances across + // the thousands of Transduce calls that word triggers. private readonly Queue _cachedInstances; - protected TraversalMethodBase( - Fst fst, - TData data, - VariableBindings varBindings, - bool startAnchor, - bool endAnchor, - bool useDefaults - ) + // Cached delegate for the per-annotation insertion sort in Reset(). Allocated once here + // rather than per Reset() call so the depth-first walk uses the allocation-free + // PreorderTraverse(action) form instead of GetNodesDepthFirst(), whose yield state machine + // was heap-allocated on every Transduce (Reset runs once per Transduce, thousands per word). + private readonly Action> _insertAnnotation; + + protected TraversalMethodBase(Fst fst) { _fst = fst; + // _annotations is created lazily in Reset: on the (common) cached-view hit path this + // method never needs a scratch list of its own. + _cachedInstances = new Queue(); + _insertAnnotation = InsertAnnotation; + } + + /// + /// Re-targets this (pooled) traversal method at a new input without reallocating it or its + /// instance free-list. Rebuilds the per-input annotation list; keeps . + /// + public void Reset(TData data, VariableBindings varBindings, bool startAnchor, bool endAnchor, bool useDefaults) + { _data = data; _varBindings = varBindings; _startAnchor = startAnchor; _endAnchor = endAnchor; _useDefaults = useDefaults; - _annotations = new List>(); - // insertion sort - foreach (Annotation topAnn in _data.Annotations.GetNodes(_fst.Direction)) + + // The filtered+sorted list built below depends only on (annotation list, filter, + // direction) — NOT on which FST asks — and on the sena grammar ~89% of Transduce calls + // re-derive a view that was already built for the same frozen list (COW clones share the + // frozen source's projection, and rule filters are a handful of compiler-cached lambdas). + // Frozen lists are immutable, so a cached view is final; unfrozen lists never cache + // (their annotations' FeatureStructs can be edited in place, silently invalidating a + // cached view). + AnnotationList annList = _data.Annotations; + bool cacheable = annList.IsFrozen; + if (cacheable) { - foreach (Annotation ann in topAnn.GetNodesDepthFirst(_fst.Direction)) + List> cached = annList.GetFilteredView(_fst.Filter, _fst.Direction); + if (cached != null) { - if (!_fst.Filter(ann)) - continue; - - int i = _annotations.Count - 1; - while (i >= 0 && CompareAnnotations(_annotations[i], ann) > 0) - { - if (i + 1 == _annotations.Count) - _annotations.Add(_annotations[i]); - else - _annotations[i + 1] = _annotations[i]; - i--; - } - if (i + 1 == _annotations.Count) - _annotations.Add(ann); - else - _annotations[i + 1] = ann; + _annotations = cached; + _annotationsShared = true; + return; } } - _cachedInstances = new Queue(); + + if (_annotations == null || _annotationsShared) + { + _annotations = new List>(); + _annotationsShared = false; + } + else + { + _annotations.Clear(); + } + // insertion sort (PreorderTraverse with a cached delegate — same depth-first order as + // GetNodesDepthFirst but no per-call yield-iterator allocation; see _insertAnnotation). + foreach (Annotation topAnn in annList.GetNodes(_fst.Direction)) + topAnn.PreorderTraverse(_insertAnnotation, _fst.Direction); + + if (cacheable) + { + // Publish for the next Transduce against the same frozen list. This method keeps + // using the (now-shared) list read-only; mark it shared so a hypothetical re-Reset + // of this method starts a fresh scratch list instead of clearing the published one. + annList.AddFilteredView(_fst.Filter, _fst.Direction, _annotations); + _annotationsShared = true; + } + } + + private void InsertAnnotation(Annotation ann) + { + if (!_fst.Filter(ann)) + return; + + int i = _annotations.Count - 1; + while (i >= 0 && CompareAnnotations(_annotations[i], ann) > 0) + { + if (i + 1 == _annotations.Count) + _annotations.Add(_annotations[i]); + else + _annotations[i + 1] = _annotations[i]; + i--; + } + if (i + 1 == _annotations.Count) + _annotations.Add(ann); + else + _annotations[i + 1] = ann; } private int CompareAnnotations(Annotation x, Annotation y) @@ -87,33 +145,56 @@ public IList> Annotations get { return _annotations; } } - public abstract IEnumerable> Traverse( + public abstract List> Traverse( ref int annIndex, - Register[,] initRegisters, + Register[] initRegisters, IList initCmds, ISet initAnns ); + private static void ApplyCommand( + Register[] registers, + TagMapCommand cmd, + Register start, + Register end + ) + { + if (cmd.Src == TagMapCommand.CurrentPosition) + { + registers[cmd.Dest * 2] = start; + registers[cmd.Dest * 2 + 1] = end; + } + else + { + registers[cmd.Dest * 2] = registers[cmd.Src * 2]; + registers[cmd.Dest * 2 + 1] = registers[cmd.Src * 2 + 1]; + } + } + protected static void ExecuteCommands( - Register[,] registers, + Register[] registers, IEnumerable cmds, Register start, Register end ) { foreach (TagMapCommand cmd in cmds) - { - if (cmd.Src == TagMapCommand.CurrentPosition) - { - registers[cmd.Dest, 0] = start; - registers[cmd.Dest, 1] = end; - } - else - { - registers[cmd.Dest, 0] = registers[cmd.Src, 0]; - registers[cmd.Dest, 1] = registers[cmd.Src, 1]; - } - } + ApplyCommand(registers, cmd, start, end); + } + + // Concrete-List overload: the hot callers (arc.Commands, state.Finishers) pass a List, so an + // index for-loop avoids boxing the List.Enumerator struct that the IEnumerable foreach incurs + // on every arc-advance. Overload resolution routes List args here; the cold IList init path keeps + // the IEnumerable overload above. + protected static void ExecuteCommands( + Register[] registers, + List cmds, + Register start, + Register end + ) + { + for (int i = 0; i < cmds.Count; i++) + ApplyCommand(registers, cmds[i], start, end); } protected bool CheckInputMatch(Arc arc, int annIndex, VariableBindings varBindings) @@ -129,7 +210,7 @@ protected bool CheckInputMatch(Arc arc, int annIndex, VariableBi private void CheckAccepting( int annIndex, - Register[,] registers, + Register[] registers, TData output, VariableBindings varBindings, State state, @@ -141,7 +222,7 @@ IList priorities { Annotation ann = annIndex < _annotations.Count ? _annotations[annIndex] : _data.Annotations.GetEnd(_fst.Direction); - var matchRegisters = (Register[,])registers.Clone(); + var matchRegisters = (Register[])registers.Clone(); ExecuteCommands(matchRegisters, state.Finishers, new Register(), new Register()); if (state.AcceptInfos.Count > 0) { @@ -190,14 +271,17 @@ IList priorities } } - protected IEnumerable Initialize( + // De-iterator (RUSTIFY lever 1): fills the caller-provided buffer instead of allocating a fresh + // List per call (plus a nested List per recursive optional-skip). The buffer is reused per + // Transduce by the traversal method (see InitializeStack); recursion appends to the same buffer. + protected void Initialize( ref int annIndex, - Register[,] registers, + Register[] registers, IList cmds, - ISet initAnns + ISet initAnns, + List output ) { - var insts = new List(); TOffset offset = _annotations[annIndex].Range.GetStart(_fst.Direction); if (_startAnchor) @@ -212,11 +296,7 @@ ISet initAnns { int nextIndex = GetNextNonoverlappingAnnotationIndex(i); if (nextIndex != _annotations.Count) - { - insts.AddRange( - Initialize(ref nextIndex, (Register[,])registers.Clone(), cmds, initAnns) - ); - } + Initialize(ref nextIndex, (Register[])registers.Clone(), cmds, initAnns, output); } } } @@ -237,20 +317,46 @@ ISet initAnns Array.Copy(registers, inst.Registers, registers.Length); if (!_fst.IgnoreVariables) inst.VariableBindings = _varBindings != null ? _varBindings.Clone() : new VariableBindings(); - insts.Add(inst); + output.Add(inst); initAnns.Add(annIndex); } } + } - return insts; + // RUSTIFY lever 1 (de-iterator): Advance was a `yield`-based iterator, so every call (one per + // matched arc, recursively for optional-skip forks — millions/word) allocated an iterator state + // machine. It now fills a reusable per-method buffer instead. The traversal method is created + // fresh per Transduce (dies in Gen0), so the buffer carries no cross-word retention (the Phase-1b + // regression), and Advance is not re-entrant within one method (a re-entrant Transduce gets its + // own method instance + buffer). Byte-identical: same results in the same order. + // One reusable result buffer per traversal method (per-Transduce → no cross-word retention; can't + // be a thread-static — CheckAccepting's Acceptable predicate can re-enter Transduce). Shared by + // Initialize and Advance: Initialize fills it once at the start of Traverse and the caller fully + // consumes it building the work stack before the main loop's first Advance reuses it, so they + // never overlap. + private readonly List _buffer = new List(); + + protected List InitializeBuffer => _buffer; + + protected List Advance( + TInst inst, + VariableBindings varBindings, + Arc arc, + ICollection> curResults + ) + { + _buffer.Clear(); + AdvanceInto(inst, varBindings, arc, curResults, false, _buffer); + return _buffer; } - protected IEnumerable Advance( + private void AdvanceInto( TInst inst, VariableBindings varBindings, Arc arc, ICollection> curResults, - bool optional = false + bool optional, + List output ) { inst.Priorities?.Add(arc.Priority); @@ -271,8 +377,10 @@ protected IEnumerable Advance( if (nextIndex < _annotations.Count) { - var anns = new List(); bool cloneOutputs = false; + // The same-offset window is a contiguous index range [nextIndex, annsEnd); track its + // end bound instead of materializing a List per Advance call (hot path). + int annsEnd = nextIndex; for ( int i = nextIndex; i < _annotations.Count && _annotations[i].Range.GetStart(_fst.Direction).Equals(nextOffset); @@ -283,13 +391,12 @@ protected IEnumerable Advance( { TInst ti = CopyInstance(inst); ti.AnnotationIndex = i; - foreach (TInst ni in Advance(ti, varBindings, arc, curResults, true)) - { - yield return ni; + int before = output.Count; + AdvanceInto(ti, varBindings, arc, curResults, true, output); + if (output.Count > before) cloneOutputs = true; - } } - anns.Add(i); + annsEnd = i + 1; } ExecuteCommands( @@ -314,13 +421,13 @@ protected IEnumerable Advance( inst.State = arc.Target; bool first = true; - foreach (int curIndex in anns) + for (int curIndex = nextIndex; curIndex < annsEnd; curIndex++) { TInst ni = first ? inst : CopyInstance(inst); ni.AnnotationIndex = curIndex; if (varBindings != null) inst.VariableBindings = cloneOutputs ? varBindings.Clone() : varBindings; - yield return ni; + output.Add(ni); cloneOutputs = true; first = false; } @@ -346,7 +453,7 @@ protected IEnumerable Advance( inst.State = arc.Target; inst.AnnotationIndex = nextIndex; inst.VariableBindings = varBindings; - yield return inst; + output.Add(inst); } } @@ -384,7 +491,7 @@ ICollection> curResults protected void CheckAcceptingStartState( ISet anns, - Register[,] registers, + Register[] registers, ICollection> curResults ) { diff --git a/src/SIL.Machine/FiniteState/VisitedStates.cs b/src/SIL.Machine/FiniteState/VisitedStates.cs new file mode 100644 index 000000000..72a220ad1 --- /dev/null +++ b/src/SIL.Machine/FiniteState/VisitedStates.cs @@ -0,0 +1,58 @@ +using System; + +namespace SIL.Machine.FiniteState +{ + /// + /// A value-type set of FST state indices used by the nondeterministic traversal to avoid epsilon + /// loops. States have a dense (0..N-1), so membership is a + /// bitset: states 0–63 live in an inline ulong field (zero heap allocation — the common case, + /// HC rule FSTs have only a handful of states) and an overflow ulong[] is allocated lazily only + /// for FSTs with 64+ states. RUSTIFY lever 1: replaces the per-instance HashSet<State> + /// (~1.17M allocated per word on Sena) so creating a traversal instance no longer allocates a set. + /// + internal struct VisitedStates + { + private ulong _bits0; // states 0..63 + private ulong[] _overflow; // states 64.., word i covers states [64*(i+1) .. 64*(i+1)+63] + + public bool Contains(int index) + { + if (index < 64) + return (_bits0 & (1UL << index)) != 0; + int w = index / 64 - 1; + return _overflow != null && w < _overflow.Length && (_overflow[w] & (1UL << (index & 63))) != 0; + } + + public void Add(int index) + { + if (index < 64) + { + _bits0 |= 1UL << index; + return; + } + int w = index / 64 - 1; + if (_overflow == null || w >= _overflow.Length) + Array.Resize(ref _overflow, w + 1); + _overflow[w] |= 1UL << (index & 63); + } + + public void Clear() + { + _bits0 = 0; + if (_overflow != null) + Array.Clear(_overflow, 0, _overflow.Length); + } + + public void UnionWith(in VisitedStates other) + { + _bits0 |= other._bits0; + if (other._overflow != null) + { + if (_overflow == null || _overflow.Length < other._overflow.Length) + Array.Resize(ref _overflow, other._overflow.Length); + for (int i = 0; i < other._overflow.Length; i++) + _overflow[i] |= other._overflow[i]; + } + } + } +} diff --git a/src/SIL.Machine/Matching/Matcher.cs b/src/SIL.Machine/Matching/Matcher.cs index 9d73cfe4e..3c0174a14 100644 --- a/src/SIL.Machine/Matching/Matcher.cs +++ b/src/SIL.Machine/Matching/Matcher.cs @@ -19,6 +19,12 @@ public class Matcher private Fst _fsa; private readonly IEqualityComparer> _matchComparer; + // Memoizes match.ID -> split parts. IDs come from the (fixed, small) set of accepting-state + // labels, so re-splitting the same string on every result is wasted allocation. The matcher is + // shared read-only across parallel parses, so the cache must be concurrent. + private readonly System.Collections.Concurrent.ConcurrentDictionary _idSplitCache = + new System.Collections.Concurrent.ConcurrentDictionary(); + public Matcher(Pattern pattern) : this(pattern, new MatcherSettings()) { } @@ -201,7 +207,9 @@ private Match CreatePatternMatch(TData input, FstResult() + : _idSplitCache.GetOrAdd(match.ID, id => id.Split('*')), match.VariableBindings, match.NextAnnotation ); diff --git a/src/SIL.Machine/Rules/ParallelCombinationRuleCascade.cs b/src/SIL.Machine/Rules/ParallelCombinationRuleCascade.cs index 9a68ea244..b698989f1 100644 --- a/src/SIL.Machine/Rules/ParallelCombinationRuleCascade.cs +++ b/src/SIL.Machine/Rules/ParallelCombinationRuleCascade.cs @@ -29,8 +29,16 @@ IEqualityComparer comparer ) : base(rules, multiApp, comparer) { } + /// + /// Caps the parallelism used by . Default -1 (unbounded, the .NET + /// default). Set to the morpher's MaxDegreeOfParallelism so the cap is actually honored + /// rather than the parallel path running at the default scheduler degree. + /// + public int MaxDegreeOfParallelism { get; set; } = -1; + public override IEnumerable Apply(TData input) { + var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = MaxDegreeOfParallelism }; var output = new ConcurrentStack(); var from = new ConcurrentStack>>(); from.Push(Tuple.Create(input, !MultipleApplication ? new HashSet() : null)); @@ -40,6 +48,7 @@ public override IEnumerable Apply(TData input) to.Clear(); Parallel.ForEach( from, + parallelOptions, work => { for (int i = 0; i < Rules.Count; i++) diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/AffixTemplateTests.cs b/tests/SIL.Machine.Morphology.HermitCrab.Tests/AffixTemplateTests.cs index 54786c82f..bf564246f 100644 --- a/tests/SIL.Machine.Morphology.HermitCrab.Tests/AffixTemplateTests.cs +++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/AffixTemplateTests.cs @@ -1,5 +1,4 @@ using NUnit.Framework; -using SIL.Machine.Annotations; using SIL.Machine.FeatureModel; using SIL.Machine.Matching; using SIL.Machine.Morphology.HermitCrab.MorphologicalRules; @@ -60,8 +59,8 @@ public void RealizationalRule() { Lhs = { - Pattern.New("1").Annotation(any).OneOrMore.Value, - Pattern.New("2").Annotation(alvStop).Value, + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern.New("2").Annotation(alvStop).Value, }, Rhs = { new CopyFromInput("1"), new CopyFromInput("2"), new InsertSegments(Table3, "ɯd") }, } @@ -69,14 +68,14 @@ public void RealizationalRule() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(voicelessCons).Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(voicelessCons).Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "t") }, } ); edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "d") }, } ); @@ -97,8 +96,8 @@ public void RealizationalRule() { Lhs = { - Pattern.New("1").Annotation(any).OneOrMore.Value, - Pattern.New("2").Annotation(labiodental).Value, + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern.New("2").Annotation(labiodental).Value, }, Rhs = { new CopyFromInput("1"), new ModifyFromInput("2", voiced), new InsertSegments(Table3, "z") }, } @@ -106,7 +105,7 @@ public void RealizationalRule() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(strident).Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(strident).Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "ɯz") }, } ); @@ -115,8 +114,8 @@ public void RealizationalRule() { Lhs = { - Pattern.New("1").Annotation(any).OneOrMore.Value, - Pattern.New("2").Annotation(voicelessCons).Value, + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern.New("2").Annotation(voicelessCons).Value, }, Rhs = { new CopyFromInput("1"), new CopyFromInput("2"), new InsertSegments(Table3, "s") }, } @@ -124,7 +123,7 @@ public void RealizationalRule() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "z") }, } ); @@ -143,7 +142,7 @@ public void RealizationalRule() evidential.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "v") }, } ); @@ -245,8 +244,8 @@ public void NonFinalTemplate() { Lhs = { - Pattern.New("1").Annotation(any).OneOrMore.Value, - Pattern.New("2").Annotation(alvStop).Value, + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern.New("2").Annotation(alvStop).Value, }, Rhs = { new CopyFromInput("1"), new CopyFromInput("2"), new InsertSegments(Table3, "ɯd") }, } @@ -254,14 +253,14 @@ public void NonFinalTemplate() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(voicelessCons).Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(voicelessCons).Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "t") }, } ); edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "d") }, } ); @@ -284,7 +283,7 @@ public void NonFinalTemplate() nominalizer.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "v") }, } ); @@ -303,8 +302,8 @@ public void NonFinalTemplate() crule.Subrules.Add( new CompoundingSubrule { - HeadLhs = { Pattern.New("head").Annotation(any).OneOrMore.Value }, - NonHeadLhs = { Pattern.New("nonHead").Annotation(any).OneOrMore.Value }, + HeadLhs = { Pattern.New("head").Annotation(any).OneOrMore.Value }, + NonHeadLhs = { Pattern.New("nonHead").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("head"), new InsertSegments(Table3, "+"), new CopyFromInput("nonHead") }, } ); @@ -319,7 +318,7 @@ public void NonFinalTemplate() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "s") }, } ); @@ -363,7 +362,7 @@ public void AffixTemplateAppliedAfterMorphologicalRule() nominalizer.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "v") }, } ); @@ -379,7 +378,7 @@ public void AffixTemplateAppliedAfterMorphologicalRule() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "s") }, } ); @@ -415,7 +414,7 @@ public void SameRuleUsedInMultipleTemplates() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "d") }, } ); @@ -447,7 +446,7 @@ public void SameRuleUsedInMultipleTemplates() nominalizer.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "v") }, } ); diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/HermitCrabTestBase.cs b/tests/SIL.Machine.Morphology.HermitCrab.Tests/HermitCrabTestBase.cs index 2d1afc619..065d47fef 100644 --- a/tests/SIL.Machine.Morphology.HermitCrab.Tests/HermitCrabTestBase.cs +++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/HermitCrabTestBase.cs @@ -1,6 +1,5 @@ using System.Text; using NUnit.Framework; -using SIL.Machine.Annotations; using SIL.Machine.FeatureModel; using SIL.Machine.Matching; using SIL.ObjectModel; @@ -682,11 +681,7 @@ public void FixtureSetUp() entry .Allomorphs[0] .Environments.Add( - new AllomorphEnvironment( - ConstraintType.Require, - null, - Pattern.New().Annotation(vowel).Value - ) + new AllomorphEnvironment(ConstraintType.Require, null, Pattern.New().Annotation(vowel).Value) ); entry = AddEntry( @@ -710,26 +705,18 @@ public void FixtureSetUp() new AllomorphEnvironment( ConstraintType.Require, null, - Pattern.New().Annotation(unroundedVowel).Value + Pattern.New().Annotation(unroundedVowel).Value ) ); entry .Allomorphs[1] .Environments.Add( - new AllomorphEnvironment( - ConstraintType.Require, - null, - Pattern.New().Annotation(vowel).Value - ) + new AllomorphEnvironment(ConstraintType.Require, null, Pattern.New().Annotation(vowel).Value) ); entry .Allomorphs[2] .Environments.Add( - new AllomorphEnvironment( - ConstraintType.Require, - null, - Pattern.New().Annotation(vowel).Value - ) + new AllomorphEnvironment(ConstraintType.Require, null, Pattern.New().Annotation(vowel).Value) ); entry = AddEntry( diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/LexEntryTests.cs b/tests/SIL.Machine.Morphology.HermitCrab.Tests/LexEntryTests.cs index 191a6b5ed..e789d1c0d 100644 --- a/tests/SIL.Machine.Morphology.HermitCrab.Tests/LexEntryTests.cs +++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/LexEntryTests.cs @@ -24,7 +24,7 @@ public void DisjunctiveAllomorphs() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+ɯd") }, } ); @@ -62,14 +62,14 @@ public void FreeFluctuation() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+t") }, } ); edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+"), new InsertSimpleContext(d) }, } ); @@ -101,7 +101,7 @@ public void StemNames() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+ɯd") }, } ); @@ -121,7 +121,7 @@ public void StemNames() tSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+t") }, } ); @@ -141,7 +141,7 @@ public void StemNames() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+s") }, } ); @@ -180,7 +180,7 @@ public void BoundRootAllomorph() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+ɯd") }, } ); @@ -196,7 +196,7 @@ public void AllomorphEnvironments() var vowel = FeatureStruct.New(Language.PhonologicalFeatureSystem).Symbol("voc+").Value; LexEntry headEntry = Entries["32"]; - Pattern envPattern = Pattern.New().Annotation(vowel).Value; + Pattern envPattern = Pattern.New().Annotation(vowel).Value; var env = new AllomorphEnvironment(ConstraintType.Require, null, envPattern); headEntry.PrimaryAllomorph.Environments.Add(env); @@ -276,7 +276,7 @@ public void PartialEntry() nominalizer.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "v") }, } ); @@ -297,7 +297,7 @@ public void PartialEntry() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "s") }, } ); diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs index eb8944ad0..8245d17a1 100644 --- a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs +++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs @@ -25,7 +25,7 @@ public void AnalyzeWord_CanAnalyze_ReturnsCorrectAnalysis() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+d") }, } ); @@ -54,7 +54,7 @@ public void AnalyzeWord_CanAnalyzeLinear_ReturnsCorrectAnalysis() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+d") }, } ); @@ -71,7 +71,7 @@ public void AnalyzeWord_CanAnalyzeLinear_ReturnsCorrectAnalysis() tSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+t") }, } ); @@ -82,10 +82,10 @@ public void AnalyzeWord_CanAnalyzeLinear_ReturnsCorrectAnalysis() var rule1 = new RewriteRule { Name = "rule1", - Lhs = Pattern.New().Annotation(Character(Table1, "t")).Value, + Lhs = Pattern.New().Annotation(Character(Table1, "t")).Value, }; rule1.Subrules.Add( - new RewriteSubrule { Rhs = Pattern.New().Annotation(Character(Table1, "d")).Value } + new RewriteSubrule { Rhs = Pattern.New().Annotation(Character(Table1, "d")).Value } ); Morphophonemic.PhonologicalRules.Add(rule1); @@ -113,7 +113,7 @@ public void AnalyzeWord_CannotAnalyze_ReturnsEmptyEnumerable() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+d") }, } ); @@ -143,7 +143,7 @@ public void AnalyzeWord_CannotAnalyzeDueToAllomorphCooccurenceFailure_ReturnsEmp edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+d") }, } ); @@ -202,7 +202,7 @@ public void AnalyzeWord_CannotAnalyzeDueToMorphemeCooccurenceFailure_ReturnsEmpt edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+d") }, } ); @@ -253,7 +253,7 @@ public void AnalyzeWord_CanGuess_ReturnsCorrectAnalysis() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+d") }, } ); @@ -288,7 +288,7 @@ public void GenerateWords_CanGenerate_ReturnsCorrectWord() siPrefix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new InsertSegments(Table3, "si+"), new CopyFromInput("1") }, } ); @@ -304,7 +304,7 @@ public void GenerateWords_CanGenerate_ReturnsCorrectWord() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+ɯd") }, } ); @@ -333,7 +333,7 @@ public void GenerateWords_CannotGenerate_ReturnsEmptyEnumerable() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+ɯd") }, } ); @@ -454,4 +454,103 @@ IList GetNodes(string pattern) Shape shape = new Segments(Table2, pattern, true).Shape; return shape.GetNodes(shape.Range).ToList(); } + + [Test] + public void AnalyzeWord_SingleThreaded_MatchesParallel() + { + // Build a small Unordered grammar (the order FieldWorks uses, which exercises the + // parallel analysis cascade and parallel affix-template unapplication). + var any = FeatureStruct.New().Symbol(HCFeatureSystem.Segment).Value; + var edSuffix = new AffixProcessRule + { + Id = "PAST", + Name = "ed_suffix", + Gloss = "PAST", + RequiredSyntacticFeatureStruct = FeatureStruct.New(Language.SyntacticFeatureSystem).Symbol("V").Value, + }; + edSuffix.Allomorphs.Add( + new AffixProcessAllomorph + { + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+d") }, + } + ); + Morphophonemic.MorphologicalRules.Add(edSuffix); + + var parallel = new Morpher(TraceManager, Language); // default: Environment.ProcessorCount + var singleThreaded = new Morpher(TraceManager, Language, maxDegreeOfParallelism: 1); + + Assert.That(singleThreaded.MaxDegreeOfParallelism, Is.EqualTo(1)); + + // The single-threaded cascade (MaxDegreeOfParallelism == 1) must produce the same analyses + // as the parallel cascade. + IEnumerable singleResult = singleThreaded.AnalyzeWord("sagd").ToList(); + IEnumerable parallelResult = parallel.AnalyzeWord("sagd").ToList(); + Assert.That( + singleResult, + Is.EquivalentTo(parallelResult), + "single-threaded analysis must match the parallel analysis" + ); + } + + [Test] + public void AnalyzeWord_ConcurrentRepeatedParsing_IsDeterministic() + { + // Concurrency safety net for the copy-on-write refactors (Plans A & B): many threads + // parse against one shared frozen grammar whose FeatureStructs become shared into + // per-parse clones. A COW race would show up as a nondeterministic analysis. Unordered + // order exercises the parallel cascade + affix-template paths. + var any = FeatureStruct.New().Symbol(HCFeatureSystem.Segment).Value; + var edSuffix = new AffixProcessRule + { + Id = "PAST", + Name = "ed_suffix", + Gloss = "PAST", + RequiredSyntacticFeatureStruct = FeatureStruct.New(Language.SyntacticFeatureSystem).Symbol("V").Value, + }; + edSuffix.Allomorphs.Add( + new AffixProcessAllomorph + { + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+d") }, + } + ); + Morphophonemic.MorphologicalRules.Add(edSuffix); + + var morpher = new Morpher(TraceManager, Language); + var words = new[] { "sagd", "sag", "tag", "tagd", "gag", "xyzzy" }; + Dictionary baseline = words.ToDictionary(w => w, w => AnalysisSignature(morpher, w)); + + for (int iter = 0; iter < 50; iter++) + { + var results = new System.Collections.Concurrent.ConcurrentDictionary(); + System.Threading.Tasks.Parallel.ForEach( + Enumerable.Range(0, 250), + i => + { + string w = words[i % words.Length]; + results[w] = AnalysisSignature(morpher, w); + } + ); + foreach (string w in words) + { + Assert.That( + results[w], + Is.EqualTo(baseline[w]), + $"nondeterministic analysis for '{w}' on iteration {iter}" + ); + } + } + } + + private static string AnalysisSignature(Morpher morpher, string word) + { + return string.Join( + "|", + morpher + .AnalyzeWord(word) + .Select(a => string.Join("+", a.Morphemes.Select(m => m.Id)) + ":" + a.RootMorphemeIndex) + .OrderBy(s => s, System.StringComparer.Ordinal) + ); + } } diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorphologicalRules/AffixProcessRuleTests.cs b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorphologicalRules/AffixProcessRuleTests.cs index 329074617..2d399183b 100644 --- a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorphologicalRules/AffixProcessRuleTests.cs +++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorphologicalRules/AffixProcessRuleTests.cs @@ -1,6 +1,5 @@ using NUnit.Framework; using SIL.Extensions; -using SIL.Machine.Annotations; using SIL.Machine.FeatureModel; using SIL.Machine.Matching; using SIL.Machine.Morphology.HermitCrab.PhonologicalRules; @@ -24,7 +23,7 @@ public void MorphosyntacticRules() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "s") }, } ); @@ -89,7 +88,7 @@ public void MorphosyntacticRules() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "d") }, } ); @@ -121,7 +120,7 @@ public void PercolationRules() rule1.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "z") }, } ); @@ -331,21 +330,21 @@ public void SuffixRules() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(strident).Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(strident).Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "ɯz") }, } ); sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(voicelessCons).Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(voicelessCons).Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "s") }, } ); sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "z") }, } ); @@ -367,8 +366,8 @@ public void SuffixRules() { Lhs = { - Pattern.New("1").Annotation(any).OneOrMore.Value, - Pattern.New("2").Annotation(alvStop).Value, + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern.New("2").Annotation(alvStop).Value, }, Rhs = { new CopyFromInput("1"), new CopyFromInput("2"), new InsertSegments(Table3, "+ɯd") }, } @@ -376,14 +375,14 @@ public void SuffixRules() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(voicelessCons).Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(voicelessCons).Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+t") }, } ); edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+"), new InsertSimpleContext(d) }, } ); @@ -392,13 +391,13 @@ public void SuffixRules() var prule1 = new RewriteRule { Name = "rule1", - Lhs = Pattern.New().Annotation(Character(Table3, "t")).Value, + Lhs = Pattern.New().Annotation(Character(Table3, "t")).Value, }; prule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(unasp).Value, - LeftEnvironment = Pattern.New().Annotation(cons).Value, + Rhs = Pattern.New().Annotation(unasp).Value, + LeftEnvironment = Pattern.New().Annotation(cons).Value, } ); Allophonic.PhonologicalRules.Add(prule1); @@ -445,7 +444,7 @@ public void SuffixRules() { Lhs = { - Pattern + Pattern .New("1") .Annotation(any) .OneOrMore.Annotation( @@ -536,21 +535,21 @@ public void PrefixRules() sPrefix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(strident).Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(strident).Annotation(any).OneOrMore.Value }, Rhs = { new InsertSegments(Table3, "zi"), new CopyFromInput("1") }, } ); sPrefix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(voicelessCons).Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(voicelessCons).Annotation(any).OneOrMore.Value }, Rhs = { new InsertSegments(Table3, "s"), new CopyFromInput("1") }, } ); sPrefix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new InsertSegments(Table3, "z"), new CopyFromInput("1") }, } ); @@ -570,21 +569,21 @@ public void PrefixRules() edPrefix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(alvStop).Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(alvStop).Annotation(any).OneOrMore.Value }, Rhs = { new InsertSegments(Table3, "di+"), new CopyFromInput("1") }, } ); edPrefix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(voicelessCons).Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(voicelessCons).Annotation(any).OneOrMore.Value }, Rhs = { new InsertSegments(Table3, "t+"), new CopyFromInput("1") }, } ); edPrefix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new InsertSegments(Table3, "d+"), new CopyFromInput("1") }, } ); @@ -593,9 +592,9 @@ public void PrefixRules() var aspiration = new RewriteRule { Name = "aspiration", - Lhs = Pattern.New().Annotation(voicelessStop).Value, + Lhs = Pattern.New().Annotation(voicelessStop).Value, }; - aspiration.Subrules.Add(new RewriteSubrule { Rhs = Pattern.New().Annotation(unasp).Value }); + aspiration.Subrules.Add(new RewriteSubrule { Rhs = Pattern.New().Annotation(unasp).Value }); Allophonic.PhonologicalRules.Add(aspiration); var morpher = new Morpher(TraceManager, Language); @@ -620,7 +619,7 @@ public void PrefixRules() { Lhs = { - Pattern + Pattern .New("1") .Annotation( FeatureStruct @@ -659,8 +658,8 @@ public void PrefixRules() { Lhs = { - Pattern.New("1").Annotation(cons).Value, - Pattern + Pattern.New("1").Annotation(cons).Value, + Pattern .New("2") .Annotation( FeatureStruct @@ -670,7 +669,7 @@ public void PrefixRules() .Value ) .Value, - Pattern.New("3").Annotation(any).OneOrMore.Value, + Pattern.New("3").Annotation(any).OneOrMore.Value, }, Rhs = { @@ -728,9 +727,9 @@ public void InfixRules() { Lhs = { - Pattern.New("1").Annotation(cons).Value, - Pattern.New("2").Annotation(cons).Value, - Pattern.New("3").Annotation(cons).Value, + Pattern.New("1").Annotation(cons).Value, + Pattern.New("2").Annotation(cons).Value, + Pattern.New("3").Annotation(cons).Value, }, Rhs = { @@ -760,9 +759,9 @@ public void InfixRules() { Lhs = { - Pattern.New("1").Annotation(cons).Value, - Pattern.New("2").Annotation(cons).Value, - Pattern.New("3").Annotation(cons).Value, + Pattern.New("1").Annotation(cons).Value, + Pattern.New("2").Annotation(cons).Value, + Pattern.New("3").Annotation(cons).Value, }, Rhs = { @@ -792,8 +791,8 @@ public void InfixRules() { Lhs = { - Pattern.New("1").Annotation(cons).Annotation(cons).Value, - Pattern.New("2").Annotation(cons).Value, + Pattern.New("1").Annotation(cons).Annotation(cons).Value, + Pattern.New("2").Annotation(cons).Value, }, Rhs = { @@ -822,9 +821,9 @@ public void InfixRules() { Lhs = { - Pattern.New("1").Annotation(cons).Value, - Pattern.New("2").Annotation(cons).Value, - Pattern.New("3").Annotation(cons).Value, + Pattern.New("1").Annotation(cons).Value, + Pattern.New("2").Annotation(cons).Value, + Pattern.New("3").Annotation(cons).Value, }, Rhs = { @@ -841,9 +840,9 @@ public void InfixRules() var aspiration = new RewriteRule { Name = "aspiration", - Lhs = Pattern.New().Annotation(voicelessStop).Value, + Lhs = Pattern.New().Annotation(voicelessStop).Value, }; - aspiration.Subrules.Add(new RewriteSubrule { Rhs = Pattern.New().Annotation(unasp).Value }); + aspiration.Subrules.Add(new RewriteSubrule { Rhs = Pattern.New().Annotation(unasp).Value }); Allophonic.PhonologicalRules.Add(aspiration); var morpher = new Morpher(TraceManager, Language); @@ -902,8 +901,8 @@ public void SimulfixRules() { Lhs = { - Pattern.New("1").Annotation(any).OneOrMore.Value, - Pattern.New("2").Annotation(p).Value, + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern.New("2").Annotation(p).Value, }, Rhs = { new CopyFromInput("1"), new ModifyFromInput("2", voiced) }, } @@ -919,8 +918,8 @@ public void SimulfixRules() { Lhs = { - Pattern.New("1").Annotation(p).Value, - Pattern.New("2").Annotation(any).OneOrMore.Value, + Pattern.New("1").Annotation(p).Value, + Pattern.New("2").Annotation(any).OneOrMore.Value, }, Rhs = { new ModifyFromInput("1", voiced), new CopyFromInput("2") }, } @@ -936,9 +935,9 @@ public void SimulfixRules() { Lhs = { - Pattern.New("1").Annotation(cons).Optional.Value, - Pattern.New("2").Annotation(vowel).Value, - Pattern.New("3").Annotation(any).OneOrMore.Value, + Pattern.New("1").Annotation(cons).Optional.Value, + Pattern.New("2").Annotation(vowel).Value, + Pattern.New("3").Annotation(any).OneOrMore.Value, }, Rhs = { new CopyFromInput("1"), new ModifyFromInput("2", nonround), new CopyFromInput("3") }, } @@ -953,9 +952,9 @@ public void SimulfixRules() { Lhs = { - Pattern.New("1").Annotation(cons).Optional.Value, - Pattern.New("2").Annotation(vowel).Range(1, 2).Value, - Pattern.New("3").Annotation(any).OneOrMore.Value, + Pattern.New("1").Annotation(cons).Optional.Value, + Pattern.New("2").Annotation(vowel).Range(1, 2).Value, + Pattern.New("3").Annotation(any).OneOrMore.Value, }, Rhs = { new CopyFromInput("1"), new ModifyFromInput("2", nonround), new CopyFromInput("3") }, } @@ -1007,8 +1006,8 @@ public void ReduplicationRules() { Lhs = { - Pattern.New("1").Annotation(cons).Annotation(vowel).Value, - Pattern.New("2").Annotation(any).OneOrMore.Value, + Pattern.New("1").Annotation(cons).Annotation(vowel).Value, + Pattern.New("2").Annotation(any).OneOrMore.Value, }, Rhs = { new CopyFromInput("1"), new CopyFromInput("1"), new CopyFromInput("2") }, } @@ -1021,14 +1020,14 @@ public void ReduplicationRules() var voicing = new RewriteRule { Name = "voicing", - Lhs = Pattern.New().Annotation(Character(Table1, "s")).Value, + Lhs = Pattern.New().Annotation(Character(Table1, "s")).Value, }; voicing.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(voiced).Value, - LeftEnvironment = Pattern.New().Annotation(vowel).Value, - RightEnvironment = Pattern.New().Annotation(vowel).Value, + Rhs = Pattern.New().Annotation(voiced).Value, + LeftEnvironment = Pattern.New().Annotation(vowel).Value, + RightEnvironment = Pattern.New().Annotation(vowel).Value, } ); Allophonic.PhonologicalRules.Add(voicing); @@ -1039,13 +1038,13 @@ public void ReduplicationRules() var affrication = new RewriteRule { Name = "affrication", - Lhs = Pattern.New().Annotation(Character(Table1, "s")).Value, + Lhs = Pattern.New().Annotation(Character(Table1, "s")).Value, }; affrication.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(affricate).Value, - LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, + Rhs = Pattern.New().Annotation(affricate).Value, + LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, } ); Allophonic.PhonologicalRules.Add(affrication); @@ -1059,8 +1058,8 @@ public void ReduplicationRules() { Lhs = { - Pattern.New("1").Annotation(any).OneOrMore.Value, - Pattern.New("2").Annotation(vowel).Annotation(cons).Value, + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern.New("2").Annotation(vowel).Annotation(cons).Value, }, Rhs = { new CopyFromInput("1"), new CopyFromInput("2"), new CopyFromInput("2") }, } @@ -1077,8 +1076,8 @@ public void ReduplicationRules() { Lhs = { - Pattern.New("1").Annotation(any).ZeroOrMore.Value, - Pattern.New("2").Annotation(cons).Annotation(vowel).Annotation(cons).Value, + Pattern.New("1").Annotation(any).ZeroOrMore.Value, + Pattern.New("2").Annotation(cons).Annotation(vowel).Annotation(cons).Value, }, Rhs = { new CopyFromInput("1"), new CopyFromInput("2"), new CopyFromInput("2") }, } @@ -1094,8 +1093,8 @@ public void ReduplicationRules() { Lhs = { - Pattern.New("1").Annotation(any).OneOrMore.Value, - Pattern.New("2").Annotation(vowel).Annotation(cons).Value, + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern.New("2").Annotation(vowel).Annotation(cons).Value, }, Rhs = { new CopyFromInput("1"), new CopyFromInput("2"), new CopyFromInput("2") }, } @@ -1104,13 +1103,13 @@ public void ReduplicationRules() var gDelete = new RewriteRule { Name = "g_delete", - Lhs = Pattern.New().Annotation(Character(Table1, "g")).Value, + Lhs = Pattern.New().Annotation(Character(Table1, "g")).Value, }; gDelete.Subrules.Add( new RewriteSubrule { - LeftEnvironment = Pattern.New().Annotation(vowel).Value, - RightEnvironment = Pattern.New().Annotation(vowel).Value, + LeftEnvironment = Pattern.New().Annotation(vowel).Value, + RightEnvironment = Pattern.New().Annotation(vowel).Value, } ); Allophonic.PhonologicalRules.Add(gDelete); @@ -1122,7 +1121,7 @@ public void ReduplicationRules() gDelete.Subrules.Add( new RewriteSubrule { - RightEnvironment = Pattern.New().Annotation(HCFeatureSystem.RightSideAnchor).Value, + RightEnvironment = Pattern.New().Annotation(HCFeatureSystem.RightSideAnchor).Value, } ); @@ -1138,9 +1137,9 @@ public void ReduplicationRules() { Lhs = { - Pattern.New("1").Annotation(cons).Value, - Pattern.New("2").Annotation(vowel).Annotation(vowel).Value, - Pattern.New("3").Annotation(cons).Value, + Pattern.New("1").Annotation(cons).Value, + Pattern.New("2").Annotation(vowel).Annotation(vowel).Value, + Pattern.New("3").Annotation(cons).Value, }, Rhs = { @@ -1189,8 +1188,8 @@ public void TruncateRules() { Lhs = { - Pattern.New("1").Annotation(any).OneOrMore.Value, - Pattern.New("2").Annotation(Character(Table3, "g")).Value, + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern.New("2").Annotation(Character(Table3, "g")).Value, }, Rhs = { new CopyFromInput("1") }, } @@ -1206,8 +1205,8 @@ public void TruncateRules() { Lhs = { - Pattern.New("1").Annotation(Character(Table3, "s")).Value, - Pattern.New("2").Annotation(any).OneOrMore.Value, + Pattern.New("1").Annotation(Character(Table3, "s")).Value, + Pattern.New("2").Annotation(any).OneOrMore.Value, }, Rhs = { new CopyFromInput("2") }, } @@ -1222,8 +1221,8 @@ public void TruncateRules() { Lhs = { - Pattern.New("1").Annotation(fricative).Value, - Pattern.New("2").Annotation(any).OneOrMore.Value, + Pattern.New("1").Annotation(fricative).Value, + Pattern.New("2").Annotation(any).OneOrMore.Value, }, Rhs = { new CopyFromInput("2") }, } @@ -1238,8 +1237,8 @@ public void TruncateRules() { Lhs = { - Pattern.New("1").Annotation(any).OneOrMore.Value, - Pattern.New("2").Annotation(velarStop).Value, + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern.New("2").Annotation(velarStop).Value, }, Rhs = { new CopyFromInput("1") }, } @@ -1254,8 +1253,8 @@ public void TruncateRules() { Lhs = { - Pattern.New("1").Annotation(Character(Table3, "s")).Optional.Value, - Pattern.New("2").Annotation(any).OneOrMore.Value, + Pattern.New("1").Annotation(Character(Table3, "s")).Optional.Value, + Pattern.New("2").Annotation(any).OneOrMore.Value, }, Rhs = { new InsertSegments(Table3, "g"), new CopyFromInput("2") }, } @@ -1285,19 +1284,19 @@ public void RequiredEnvironments() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "s") }, Environments = { new AllomorphEnvironment( ConstraintType.Require, null, - Pattern.New().Annotation(Character(Table3, "a")).Value + Pattern.New().Annotation(Character(Table3, "a")).Value ), new AllomorphEnvironment( ConstraintType.Require, null, - Pattern.New().Annotation(Character(Table3, "ɯ")).Value + Pattern.New().Annotation(Character(Table3, "ɯ")).Value ), }, } @@ -1305,7 +1304,7 @@ public void RequiredEnvironments() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "z") }, } ); @@ -1325,7 +1324,7 @@ public void RequiredEnvironments() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+ɯd") }, } ); @@ -1357,7 +1356,7 @@ public void RequiredSyntacticFeatureStruct() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "s") }, RequiredSyntacticFeatureStruct = FeatureStruct .New(Language.SyntacticFeatureSystem) @@ -1369,7 +1368,7 @@ public void RequiredSyntacticFeatureStruct() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "z") }, } ); @@ -1389,7 +1388,7 @@ public void RequiredSyntacticFeatureStruct() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+ɯd") }, } ); @@ -1421,14 +1420,14 @@ public void FreeFluctuation() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "s") }, } ); sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "z") }, } ); @@ -1452,7 +1451,7 @@ public void CircumfixRules() circumfix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new InsertSegments(Table3, "ta"), new CopyFromInput("1"), new InsertSegments(Table3, "od") }, } ); @@ -1462,7 +1461,7 @@ public void CircumfixRules() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "s") }, } ); @@ -1504,8 +1503,8 @@ public void BoundaryRules() { Lhs = { - Pattern.New("1").Annotation(any).OneOrMore.Value, - Pattern + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern .New("2") .Annotation( FeatureStruct @@ -1554,10 +1553,10 @@ public void WordSynthesisWithBoundaryAtBeginning() { Lhs = { - Pattern.New("1").Annotation(any).ZeroOrMore.Value, - Pattern.New("2").Annotation(cons).Value, - Pattern.New("3").Annotation(vowel).Value, - Pattern.New("4").Annotation(cons).Value, + Pattern.New("1").Annotation(any).ZeroOrMore.Value, + Pattern.New("2").Annotation(cons).Value, + Pattern.New("3").Annotation(vowel).Value, + Pattern.New("4").Annotation(cons).Value, }, Rhs = { @@ -1586,7 +1585,7 @@ public void WordSynthesisWithBoundaryAtBeginning() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+ɯd") }, } ); @@ -1623,8 +1622,8 @@ public void PartialRule() { Lhs = { - Pattern.New("1").Annotation(any).OneOrMore.Value, - Pattern.New("2").Annotation(alvStop).Value, + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern.New("2").Annotation(alvStop).Value, }, Rhs = { new CopyFromInput("1"), new CopyFromInput("2"), new InsertSegments(Table3, "ɯd") }, } @@ -1632,14 +1631,14 @@ public void PartialRule() edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(voicelessCons).Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(voicelessCons).Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "t") }, } ); edSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "d") }, } ); @@ -1663,7 +1662,7 @@ public void PartialRule() sSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "s") }, } ); @@ -1680,7 +1679,7 @@ public void PartialRule() nominalizer.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "v") }, } ); @@ -1696,7 +1695,7 @@ public void PartialRule() uSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "u") }, } ); @@ -1711,7 +1710,7 @@ public void PartialRule() pSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "p") }, } ); @@ -1760,14 +1759,14 @@ public void DisjunctiveAllomorphs() esSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(vowel).Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Annotation(vowel).Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "s") }, } ); esSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "ɯs") }, } ); @@ -1782,14 +1781,14 @@ public void DisjunctiveAllomorphs() guSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "gun") }, Environments = { new AllomorphEnvironment( ConstraintType.Require, null, - Pattern.New().Annotation(vowel).Value + Pattern.New().Annotation(vowel).Value ), }, } @@ -1797,7 +1796,7 @@ public void DisjunctiveAllomorphs() guSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "gu") }, } ); @@ -1830,7 +1829,7 @@ public void SubsumedAffix() uSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "u") }, } ); @@ -1847,8 +1846,8 @@ public void SubsumedAffix() { Lhs = { - Pattern.New("1").Annotation(any).OneOrMore.Value, - Pattern.New("2").Annotation(vowel).Value, + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern.New("2").Annotation(vowel).Value, }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "s") }, } @@ -1866,7 +1865,7 @@ public void SubsumedAffix() nominalizer.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "v") }, } ); @@ -1883,8 +1882,8 @@ public void SubsumedAffix() { Lhs = { - Pattern.New("1").Annotation(any).OneOrMore.Value, - Pattern.New("2").Annotation(vowel).Value, + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern.New("2").Annotation(vowel).Value, }, Rhs = { new CopyFromInput("1") }, } @@ -1927,8 +1926,8 @@ public void ModifyFromInputRules() { Lhs = { - Pattern.New("1").Annotation(any).OneOrMore.Value, - Pattern.New("2").Annotation(vowel).Value, + Pattern.New("1").Annotation(any).OneOrMore.Value, + Pattern.New("2").Annotation(vowel).Value, }, Rhs = { @@ -1994,9 +1993,9 @@ public void NonContiguousRules() { Lhs = { - Pattern.New("1").Annotation(cons).Value, - Pattern.New("2").Annotation(cons).Value, - Pattern.New("3").Annotation(cons).Value, + Pattern.New("1").Annotation(cons).Value, + Pattern.New("2").Annotation(cons).Value, + Pattern.New("3").Annotation(cons).Value, }, Rhs = { @@ -2015,13 +2014,13 @@ public void NonContiguousRules() { Name = "rule1", ApplicationMode = RewriteApplicationMode.Iterative, - Lhs = Pattern.New().Annotation(lowVowel).Value, + Lhs = Pattern.New().Annotation(lowVowel).Value, }; rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(i).Value, - RightEnvironment = Pattern.New().Annotation(voicedCons).Value, + Rhs = Pattern.New().Annotation(i).Value, + RightEnvironment = Pattern.New().Annotation(voicedCons).Value, } ); Allophonic.PhonologicalRules.Add(rule1); diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorphologicalRules/CompoundingRuleTests.cs b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorphologicalRules/CompoundingRuleTests.cs index 93192e758..8f5523508 100644 --- a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorphologicalRules/CompoundingRuleTests.cs +++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorphologicalRules/CompoundingRuleTests.cs @@ -1,5 +1,4 @@ using NUnit.Framework; -using SIL.Machine.Annotations; using SIL.Machine.FeatureModel; using SIL.Machine.Matching; @@ -16,8 +15,8 @@ public void SimpleRules() rule1.Subrules.Add( new CompoundingSubrule { - HeadLhs = { Pattern.New("head").Annotation(any).OneOrMore.Value }, - NonHeadLhs = { Pattern.New("nonHead").Annotation(any).OneOrMore.Value }, + HeadLhs = { Pattern.New("head").Annotation(any).OneOrMore.Value }, + NonHeadLhs = { Pattern.New("nonHead").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("head"), new InsertSegments(Table3, "+"), new CopyFromInput("nonHead") }, } ); @@ -33,8 +32,8 @@ public void SimpleRules() rule1.Subrules.Add( new CompoundingSubrule { - HeadLhs = { Pattern.New("head").Annotation(any).OneOrMore.Value }, - NonHeadLhs = { Pattern.New("nonHead").Annotation(any).OneOrMore.Value }, + HeadLhs = { Pattern.New("head").Annotation(any).OneOrMore.Value }, + NonHeadLhs = { Pattern.New("nonHead").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("nonHead"), new InsertSegments(Table3, "+"), new CopyFromInput("head") }, } ); @@ -61,7 +60,7 @@ public void SimpleRules() prefix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new InsertSegments(Table3, "di+"), new CopyFromInput("1") }, } ); @@ -78,8 +77,8 @@ public void SimpleRules() rule1.Subrules.Add( new CompoundingSubrule { - HeadLhs = { Pattern.New("head").Annotation(any).OneOrMore.Value }, - NonHeadLhs = { Pattern.New("nonHead").Annotation(any).OneOrMore.Value }, + HeadLhs = { Pattern.New("head").Annotation(any).OneOrMore.Value }, + NonHeadLhs = { Pattern.New("nonHead").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("head"), new InsertSegments(Table3, "+"), new CopyFromInput("nonHead") }, } ); @@ -96,8 +95,8 @@ public void SimpleRules() rule2.Subrules.Add( new CompoundingSubrule { - HeadLhs = { Pattern.New("head").Annotation(any).OneOrMore.Value }, - NonHeadLhs = { Pattern.New("nonHead").Annotation(any).OneOrMore.Value }, + HeadLhs = { Pattern.New("head").Annotation(any).OneOrMore.Value }, + NonHeadLhs = { Pattern.New("nonHead").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("nonHead"), new InsertSegments(Table3, "+"), new CopyFromInput("head") }, } ); @@ -124,8 +123,8 @@ public void MorphosyntacticRules() rule1.Subrules.Add( new CompoundingSubrule { - HeadLhs = { Pattern.New("head").Annotation(any).OneOrMore.Value }, - NonHeadLhs = { Pattern.New("nonHead").Annotation(any).OneOrMore.Value }, + HeadLhs = { Pattern.New("head").Annotation(any).OneOrMore.Value }, + NonHeadLhs = { Pattern.New("nonHead").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("head"), new InsertSegments(Table3, "+"), new CopyFromInput("nonHead") }, } ); @@ -181,8 +180,8 @@ public void ProdRestrictRule() rule1.Subrules.Add( new CompoundingSubrule { - HeadLhs = { Pattern.New("head").Annotation(any).OneOrMore.Value }, - NonHeadLhs = { Pattern.New("nonHead").Annotation(any).OneOrMore.Value }, + HeadLhs = { Pattern.New("head").Annotation(any).OneOrMore.Value }, + NonHeadLhs = { Pattern.New("nonHead").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("head"), new InsertSegments(Table3, "+"), new CopyFromInput("nonHead") }, } ); diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/PhonologicalRules/MetathesisRuleTests.cs b/tests/SIL.Machine.Morphology.HermitCrab.Tests/PhonologicalRules/MetathesisRuleTests.cs index 54d0e7451..ea7c8954f 100644 --- a/tests/SIL.Machine.Morphology.HermitCrab.Tests/PhonologicalRules/MetathesisRuleTests.cs +++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/PhonologicalRules/MetathesisRuleTests.cs @@ -1,5 +1,4 @@ using NUnit.Framework; -using SIL.Machine.Annotations; using SIL.Machine.FeatureModel; using SIL.Machine.Matching; using SIL.Machine.Morphology.HermitCrab.MorphologicalRules; @@ -14,7 +13,7 @@ public void SimpleRule() var rule1 = new MetathesisRule { Name = "rule1", - Pattern = Pattern + Pattern = Pattern .New() .Group("1", group => group.Annotation(Character(Table3, "i"))) .Group("2", group => group.Annotation(Character(Table3, "u"))) @@ -36,7 +35,7 @@ public void ComplexRule() var rule1 = new MetathesisRule { Name = "rule1", - Pattern = Pattern + Pattern = Pattern .New() .Group("1", group => group.Annotation(Character(Table3, "i"))) .Group("middle", group => group.Annotation(Character(Table3, "+"))) @@ -53,7 +52,7 @@ public void ComplexRule() uSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+u") }, } ); @@ -70,7 +69,7 @@ public void SimpleRuleNotUnapplied() var prule = new MetathesisRule { Name = "rule1", - Pattern = Pattern + Pattern = Pattern .New() .Group("1", group => group.Annotation(Character(Table3, "i"))) .Group("2", group => group.Annotation(Character(Table3, "u"))) @@ -85,7 +84,7 @@ public void SimpleRuleNotUnapplied() iSuffix.Allomorphs.Add( new AffixProcessAllomorph { - Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, + Lhs = { Pattern.New("1").Annotation(any).OneOrMore.Value }, Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "i") }, } ); diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/PhonologicalRules/RewriteRuleTests.cs b/tests/SIL.Machine.Morphology.HermitCrab.Tests/PhonologicalRules/RewriteRuleTests.cs index f7c56ecaa..68913bba7 100644 --- a/tests/SIL.Machine.Morphology.HermitCrab.Tests/PhonologicalRules/RewriteRuleTests.cs +++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/PhonologicalRules/RewriteRuleTests.cs @@ -1,5 +1,4 @@ using NUnit.Framework; -using SIL.Machine.Annotations; using SIL.Machine.DataStructures; using SIL.Machine.FeatureModel; using SIL.Machine.Matching; @@ -25,35 +24,35 @@ public void SimpleRules() var rule1 = new RewriteRule { Name = "rule1", - Lhs = Pattern.New().Annotation(Character(Table1, "t")).Value, + Lhs = Pattern.New().Annotation(Character(Table1, "t")).Value, }; Allophonic.PhonologicalRules.Add(rule1); rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(asp).Value, - LeftEnvironment = Pattern.New().Annotation(nonCons).Value, + Rhs = Pattern.New().Annotation(asp).Value, + LeftEnvironment = Pattern.New().Annotation(nonCons).Value, } ); var rule2 = new RewriteRule { Name = "rule2", - Lhs = Pattern.New().Annotation(Character(Table3, "p")).Value, + Lhs = Pattern.New().Annotation(Character(Table3, "p")).Value, }; Allophonic.PhonologicalRules.Add(rule2); Morphophonemic.PhonologicalRules.Add(rule2); rule2.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(asp).Value, + Rhs = Pattern.New().Annotation(asp).Value, // the following should be a NOOP because it accepts the empty string. - LeftEnvironment = Pattern + LeftEnvironment = Pattern .New() .Annotation(nonCons) .Optional.Annotation(nonCons) .Optional.Value, - RightEnvironment = Pattern.New().Annotation(nonCons).Value, + RightEnvironment = Pattern.New().Annotation(nonCons).Value, } ); @@ -105,17 +104,13 @@ public void LongDistanceRules() .Symbol("voc+") .Value; - var rule3 = new RewriteRule - { - Name = "rule3", - Lhs = Pattern.New().Annotation(highVowel).Value, - }; + var rule3 = new RewriteRule { Name = "rule3", Lhs = Pattern.New().Annotation(highVowel).Value }; Allophonic.PhonologicalRules.Add(rule3); rule3.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(backRnd).Value, - LeftEnvironment = Pattern + Rhs = Pattern.New().Annotation(backRnd).Value, + LeftEnvironment = Pattern .New() .Annotation(rndVowel) .Annotation(cons) @@ -132,8 +127,8 @@ public void LongDistanceRules() rule3.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(backRnd).Value, - RightEnvironment = Pattern + Rhs = Pattern.New().Annotation(backRnd).Value, + RightEnvironment = Pattern .New() .Annotation(cons) .Annotation(lowVowel) @@ -150,8 +145,8 @@ public void LongDistanceRules() rule3.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(backRnd).Value, - LeftEnvironment = Pattern + Rhs = Pattern.New().Annotation(backRnd).Value, + LeftEnvironment = Pattern .New() .Annotation(highVowel) .Annotation(cons) @@ -188,12 +183,12 @@ public void AnchorRules() .Symbol("voc+") .Value; - var rule3 = new RewriteRule { Name = "rule3", Lhs = Pattern.New().Annotation(cons).Value }; + var rule3 = new RewriteRule { Name = "rule3", Lhs = Pattern.New().Annotation(cons).Value }; rule3.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(vlUnasp).Value, - RightEnvironment = Pattern.New().Annotation(HCFeatureSystem.RightSideAnchor).Value, + Rhs = Pattern.New().Annotation(vlUnasp).Value, + RightEnvironment = Pattern.New().Annotation(HCFeatureSystem.RightSideAnchor).Value, } ); Allophonic.PhonologicalRules.Add(rule3); @@ -205,8 +200,8 @@ public void AnchorRules() rule3.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(vlUnasp).Value, - RightEnvironment = Pattern + Rhs = Pattern.New().Annotation(vlUnasp).Value, + RightEnvironment = Pattern .New() .Annotation(vowel) .Annotation(cons) @@ -222,8 +217,8 @@ public void AnchorRules() rule3.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(vlUnasp).Value, - LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, + Rhs = Pattern.New().Annotation(vlUnasp).Value, + LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, } ); @@ -234,8 +229,8 @@ public void AnchorRules() rule3.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(vlUnasp).Value, - LeftEnvironment = Pattern + Rhs = Pattern.New().Annotation(vlUnasp).Value, + LeftEnvironment = Pattern .New() .Annotation(HCFeatureSystem.LeftSideAnchor) .Annotation(cons) @@ -292,17 +287,13 @@ public void QuantifierRules() .Symbol("round+") .Value; - var rule3 = new RewriteRule - { - Name = "rule3", - Lhs = Pattern.New().Annotation(highVowel).Value, - }; + var rule3 = new RewriteRule { Name = "rule3", Lhs = Pattern.New().Annotation(highVowel).Value }; Allophonic.PhonologicalRules.Add(rule3); rule3.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(backRnd).Value, - RightEnvironment = Pattern + Rhs = Pattern.New().Annotation(backRnd).Value, + RightEnvironment = Pattern .New() .Group(g => g.Annotation(cons).Annotation(lowVowel)) .LazyRange(1, 2) @@ -312,17 +303,13 @@ public void QuantifierRules() } ); - var rule4 = new RewriteRule - { - Name = "rule4", - Lhs = Pattern.New().Annotation(highVowel).Value, - }; + var rule4 = new RewriteRule { Name = "rule4", Lhs = Pattern.New().Annotation(highVowel).Value }; Allophonic.PhonologicalRules.Add(rule4); rule4.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(backRnd).Value, - LeftEnvironment = Pattern + Rhs = Pattern.New().Annotation(backRnd).Value, + LeftEnvironment = Pattern .New() .Annotation(rndVowel) .Group(g => g.Annotation(cons).Annotation(lowVowel)) @@ -340,17 +327,13 @@ public void QuantifierRules() Allophonic.PhonologicalRules.Clear(); - var rule1 = new RewriteRule - { - Name = "rule1", - Lhs = Pattern.New().Annotation(highVowel).Value, - }; + var rule1 = new RewriteRule { Name = "rule1", Lhs = Pattern.New().Annotation(highVowel).Value }; Allophonic.PhonologicalRules.Add(rule1); rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(backRnd).Value, - LeftEnvironment = Pattern + Rhs = Pattern.New().Annotation(backRnd).Value, + LeftEnvironment = Pattern .New() .Annotation(backRndVowel) .Annotation(highVowel) @@ -401,24 +384,24 @@ public void MultipleSegmentRules() var rule1 = new RewriteRule { Name = "rule1", - Lhs = Pattern.New().Annotation(highVowel).Annotation(highVowel).Value, + Lhs = Pattern.New().Annotation(highVowel).Annotation(highVowel).Value, }; Allophonic.PhonologicalRules.Add(rule1); rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(backRnd).Annotation(backRnd).Value, - LeftEnvironment = Pattern.New().Annotation(backRndVowel).Value, + Rhs = Pattern.New().Annotation(backRnd).Annotation(backRnd).Value, + LeftEnvironment = Pattern.New().Annotation(backRndVowel).Value, } ); var morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("buuubuuu"), "27"); - var rule2 = new RewriteRule { Name = "rule2", Lhs = Pattern.New().Annotation(t).Value }; + var rule2 = new RewriteRule { Name = "rule2", Lhs = Pattern.New().Annotation(t).Value }; Allophonic.PhonologicalRules.Add(rule2); rule2.Subrules.Add( - new RewriteSubrule { RightEnvironment = Pattern.New().Annotation(backRndVowel).Value } + new RewriteSubrule { RightEnvironment = Pattern.New().Annotation(backRndVowel).Value } ); morpher = new Morpher(TraceManager, Language); @@ -447,11 +430,11 @@ public void MultipleDeletionRules() var rule1 = new RewriteRule { Name = "rule1", - Lhs = Pattern.New().Annotation(highVowel).Annotation(highVowel).Value, + Lhs = Pattern.New().Annotation(highVowel).Annotation(highVowel).Value, }; Allophonic.PhonologicalRules.Add(rule1); rule1.Subrules.Add( - new RewriteSubrule { LeftEnvironment = Pattern.New().Annotation(backRndVowel).Value } + new RewriteSubrule { LeftEnvironment = Pattern.New().Annotation(backRndVowel).Value } ); var morpher = new Morpher(TraceManager, Language); @@ -490,14 +473,14 @@ public void MergeRules() var rule1 = new RewriteRule { Name = "rule1", - Lhs = Pattern.New().Annotation(highVowel).Annotation(highVowel).Value, + Lhs = Pattern.New().Annotation(highVowel).Annotation(highVowel).Value, }; Allophonic.PhonologicalRules.Add(rule1); rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(t).Value, - LeftEnvironment = Pattern.New().Annotation(backRndVowel).Value, + Rhs = Pattern.New().Annotation(t).Value, + LeftEnvironment = Pattern.New().Annotation(backRndVowel).Value, } ); @@ -537,17 +520,10 @@ public void MultipleMergeRules() var rule1 = new RewriteRule { Name = "rule1", - Lhs = Pattern - .New() - .Annotation(backRndVowel) - .Annotation(highVowel) - .Annotation(highVowel) - .Value, + Lhs = Pattern.New().Annotation(backRndVowel).Annotation(highVowel).Annotation(highVowel).Value, }; Allophonic.PhonologicalRules.Add(rule1); - rule1.Subrules.Add( - new RewriteSubrule { Rhs = Pattern.New().Annotation(t).Annotation(t).Value } - ); + rule1.Subrules.Add(new RewriteSubrule { Rhs = Pattern.New().Annotation(t).Annotation(t).Value }); var morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("bttbtt"), "27"); @@ -572,17 +548,10 @@ public void ExpandRules() .Symbol("round+") .Value; - var rule1 = new RewriteRule - { - Name = "rule1", - Lhs = Pattern.New().Annotation(backRndVowel).Value, - }; + var rule1 = new RewriteRule { Name = "rule1", Lhs = Pattern.New().Annotation(backRndVowel).Value }; Allophonic.PhonologicalRules.Add(rule1); rule1.Subrules.Add( - new RewriteSubrule - { - Rhs = Pattern.New().Annotation(highVowel).Annotation(highVowel).Value, - } + new RewriteSubrule { Rhs = Pattern.New().Annotation(highVowel).Annotation(highVowel).Value } ); var morpher = new Morpher(TraceManager, Language); @@ -679,17 +648,13 @@ public void BoundaryRules() .Symbol("asp+") .Value; - var rule1 = new RewriteRule - { - Name = "rule1", - Lhs = Pattern.New().Annotation(highVowel).Value, - }; + var rule1 = new RewriteRule { Name = "rule1", Lhs = Pattern.New().Annotation(highVowel).Value }; Morphophonemic.PhonologicalRules.Add(rule1); rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(backRnd).Value, - LeftEnvironment = Pattern + Rhs = Pattern.New().Annotation(backRnd).Value, + LeftEnvironment = Pattern .New() .Annotation(backRndVowel) .Annotation(Character(Table3, "+")) @@ -704,8 +669,8 @@ public void BoundaryRules() rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(unbackUnrnd).Value, - RightEnvironment = Pattern + Rhs = Pattern.New().Annotation(unbackUnrnd).Value, + RightEnvironment = Pattern .New() .Annotation(Character(Table3, "+")) .Annotation(unbackUnrndVowel) @@ -720,8 +685,8 @@ public void BoundaryRules() rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(backRnd).Value, - LeftEnvironment = Pattern.New().Annotation(backRndVowel).Value, + Rhs = Pattern.New().Annotation(backRnd).Value, + LeftEnvironment = Pattern.New().Annotation(backRndVowel).Value, } ); @@ -732,33 +697,26 @@ public void BoundaryRules() rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(unbackUnrnd).Value, - RightEnvironment = Pattern.New().Annotation(unbackUnrndVowel).Value, + Rhs = Pattern.New().Annotation(unbackUnrnd).Value, + RightEnvironment = Pattern.New().Annotation(unbackUnrndVowel).Value, } ); morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("biib"), "30", "31"); - rule1.Lhs = Pattern.New().Annotation(Character(Table3, "i")).Value; + rule1.Lhs = Pattern.New().Annotation(Character(Table3, "i")).Value; rule1.Subrules.Clear(); rule1.Subrules.Add( - new RewriteSubrule - { - RightEnvironment = Pattern.New().Annotation(Character(Table3, "b")).Value, - } + new RewriteSubrule { RightEnvironment = Pattern.New().Annotation(Character(Table3, "b")).Value } ); - var rule2 = new RewriteRule - { - Name = "rule2", - Lhs = Pattern.New().Annotation(backVowel).Value, - }; + var rule2 = new RewriteRule { Name = "rule2", Lhs = Pattern.New().Annotation(backVowel).Value }; rule2.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(Character(Table3, "a")).Value, - RightEnvironment = Pattern + Rhs = Pattern.New().Annotation(Character(Table3, "a")).Value, + RightEnvironment = Pattern .New() .Group(group => group.Annotation(Character(Table3, "+")).Annotation(Character(Table3, "b"))) .Value, @@ -769,22 +727,19 @@ public void BoundaryRules() morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("bab"), "30"); - rule1.Lhs = Pattern.New().Annotation(Character(Table3, "u")).Value; + rule1.Lhs = Pattern.New().Annotation(Character(Table3, "u")).Value; rule1.Subrules.Clear(); rule1.Subrules.Add( - new RewriteSubrule - { - LeftEnvironment = Pattern.New().Annotation(Character(Table3, "b")).Value, - } + new RewriteSubrule { LeftEnvironment = Pattern.New().Annotation(Character(Table3, "b")).Value } ); - rule2.Lhs = Pattern.New().Annotation(unrndVowel).Value; + rule2.Lhs = Pattern.New().Annotation(unrndVowel).Value; rule2.Subrules.Clear(); rule2.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(lowBack).Value, - LeftEnvironment = Pattern + Rhs = Pattern.New().Annotation(lowBack).Value, + LeftEnvironment = Pattern .New() .Annotation(Character(Table3, "b")) .Annotation(Character(Table3, "+")) @@ -797,7 +752,7 @@ public void BoundaryRules() Morphophonemic.PhonologicalRules.Remove(rule2); - rule1.Lhs = Pattern + rule1.Lhs = Pattern .New() .Annotation(bilabialCons) .Annotation(Character(Table3, "+")) @@ -806,41 +761,41 @@ public void BoundaryRules() rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern + Rhs = Pattern .New() .Annotation(unvdUnasp) .Annotation(Character(Table3, "+")) .Annotation(unvdUnasp) .Value, - LeftEnvironment = Pattern.New().Annotation(vowel).Value, - RightEnvironment = Pattern.New().Annotation(vowel).Value, + LeftEnvironment = Pattern.New().Annotation(vowel).Value, + RightEnvironment = Pattern.New().Annotation(vowel).Value, } ); morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("appa"), "39"); - rule1.Lhs = Pattern.New().Annotation(bilabialCons).Annotation(bilabialCons).Value; + rule1.Lhs = Pattern.New().Annotation(bilabialCons).Annotation(bilabialCons).Value; rule1.Subrules.Clear(); rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(unvdUnasp).Annotation(unvdUnasp).Value, - LeftEnvironment = Pattern.New().Annotation(vowel).Value, - RightEnvironment = Pattern.New().Annotation(vowel).Value, + Rhs = Pattern.New().Annotation(unvdUnasp).Annotation(unvdUnasp).Value, + LeftEnvironment = Pattern.New().Annotation(vowel).Value, + RightEnvironment = Pattern.New().Annotation(vowel).Value, } ); morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("appa"), "40"); - rule1.Lhs = Pattern.New().Annotation(cons).Value; + rule1.Lhs = Pattern.New().Annotation(cons).Value; rule1.Subrules.Clear(); rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(asp).Value, - LeftEnvironment = Pattern.New().Annotation(Character(Table3, "+")).Value, + Rhs = Pattern.New().Annotation(asp).Value, + LeftEnvironment = Pattern.New().Annotation(Character(Table3, "+")).Value, } ); @@ -854,13 +809,13 @@ public void BoundaryRules() rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern + Rhs = Pattern .New() .Annotation(Character(Table1, "t")) .Annotation(Character(Table1, "a")) .Value, - LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, - RightEnvironment = Pattern + LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, + RightEnvironment = Pattern .New() .Annotation(cons) .Annotation(vowel) @@ -909,15 +864,15 @@ public void CommonFeatureRules() var rule1 = new RewriteRule { Name = "rule1", - Lhs = Pattern.New().Annotation(Character(Table1, "p")).Value, + Lhs = Pattern.New().Annotation(Character(Table1, "p")).Value, }; Allophonic.PhonologicalRules.Add(rule1); rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(vdLabFric).Value, - LeftEnvironment = Pattern.New().Annotation(vowel).Value, - RightEnvironment = Pattern.New().Annotation(vowel).Value, + Rhs = Pattern.New().Annotation(vdLabFric).Value, + LeftEnvironment = Pattern.New().Annotation(vowel).Value, + RightEnvironment = Pattern.New().Annotation(vowel).Value, } ); @@ -928,9 +883,9 @@ public void CommonFeatureRules() rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(Character(Table1, "v")).Value, - LeftEnvironment = Pattern.New().Annotation(vowel).Value, - RightEnvironment = Pattern.New().Annotation(vowel).Value, + Rhs = Pattern.New().Annotation(Character(Table1, "v")).Value, + LeftEnvironment = Pattern.New().Annotation(vowel).Value, + RightEnvironment = Pattern.New().Annotation(vowel).Value, } ); @@ -1005,16 +960,12 @@ public void AlphaVariableRules() .Symbol("nasal-") .Value; - var rule1 = new RewriteRule - { - Name = "rule1", - Lhs = Pattern.New().Annotation(highVowel).Value, - }; + var rule1 = new RewriteRule { Name = "rule1", Lhs = Pattern.New().Annotation(highVowel).Value }; Morphophonemic.PhonologicalRules.Add(rule1); rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern + Rhs = Pattern .New() .Annotation( FeatureStruct @@ -1027,7 +978,7 @@ public void AlphaVariableRules() .Value ) .Value, - LeftEnvironment = Pattern + LeftEnvironment = Pattern .New() .Annotation( FeatureStruct @@ -1046,12 +997,12 @@ public void AlphaVariableRules() var morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("bububu"), "42", "43"); - rule1.Lhs = Pattern.New().Annotation(nasalCons).Value; + rule1.Lhs = Pattern.New().Annotation(nasalCons).Value; rule1.Subrules.Clear(); rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern + Rhs = Pattern .New() .Annotation( FeatureStruct @@ -1062,7 +1013,7 @@ public void AlphaVariableRules() .Value ) .Value, - RightEnvironment = Pattern + RightEnvironment = Pattern .New() .Annotation( FeatureStruct @@ -1081,7 +1032,7 @@ public void AlphaVariableRules() Morphophonemic.PhonologicalRules.Clear(); Allophonic.PhonologicalRules.Add(rule1); - rule1.Lhs = Pattern + rule1.Lhs = Pattern .New() .Annotation( FeatureStruct @@ -1095,8 +1046,8 @@ public void AlphaVariableRules() rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(asp).Value, - LeftEnvironment = Pattern + Rhs = Pattern.New().Annotation(asp).Value, + LeftEnvironment = Pattern .New() .Annotation( FeatureStruct @@ -1109,18 +1060,18 @@ public void AlphaVariableRules() .Value, } ); - rule1.Subrules.Add(new RewriteSubrule { Rhs = Pattern.New().Annotation(unasp).Value }); + rule1.Subrules.Add(new RewriteSubrule { Rhs = Pattern.New().Annotation(unasp).Value }); morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("pipʰ"), "41"); - rule1.Lhs = Pattern.New().Value; + rule1.Lhs = Pattern.New().Value; rule1.Subrules.Clear(); rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(Character(Table1, "f")).Value, - LeftEnvironment = Pattern + Rhs = Pattern.New().Annotation(Character(Table1, "f")).Value, + LeftEnvironment = Pattern .New() .Annotation( FeatureStruct @@ -1134,7 +1085,7 @@ public void AlphaVariableRules() .Value ) .Value, - RightEnvironment = Pattern + RightEnvironment = Pattern .New() .Annotation( FeatureStruct @@ -1161,7 +1112,7 @@ public void AlphaVariableRules() rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern + Rhs = Pattern .New() .Annotation( FeatureStruct @@ -1171,7 +1122,7 @@ public void AlphaVariableRules() .Value ) .Value, - LeftEnvironment = Pattern + LeftEnvironment = Pattern .New() .Annotation( FeatureStruct @@ -1181,7 +1132,7 @@ public void AlphaVariableRules() .Value ) .Value, - RightEnvironment = Pattern.New().Annotation(HCFeatureSystem.RightSideAnchor).Value, + RightEnvironment = Pattern.New().Annotation(HCFeatureSystem.RightSideAnchor).Value, } ); @@ -1242,8 +1193,8 @@ public void EpenthesisRules() rule4.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(highFrontUnrndVowel).Value, - LeftEnvironment = Pattern.New().Annotation(highVowel).Value, + Rhs = Pattern.New().Annotation(highFrontUnrndVowel).Value, + LeftEnvironment = Pattern.New().Annotation(highVowel).Value, } ); @@ -1254,8 +1205,8 @@ public void EpenthesisRules() rule4.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(Character(Table1, "i")).Value, - RightEnvironment = Pattern.New().Annotation(highVowel).Value, + Rhs = Pattern.New().Annotation(Character(Table1, "i")).Value, + RightEnvironment = Pattern.New().Annotation(highVowel).Value, } ); @@ -1267,9 +1218,9 @@ public void EpenthesisRules() rule4.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(highFrontUnrndVowel).Value, - LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, - RightEnvironment = Pattern.New().Annotation(cons).Value, + Rhs = Pattern.New().Annotation(highFrontUnrndVowel).Value, + LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, + RightEnvironment = Pattern.New().Annotation(cons).Value, } ); @@ -1280,9 +1231,9 @@ public void EpenthesisRules() rule4.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(highFrontUnrndVowel).Value, - LeftEnvironment = Pattern.New().Annotation(cons).Value, - RightEnvironment = Pattern.New().Annotation(HCFeatureSystem.RightSideAnchor).Value, + Rhs = Pattern.New().Annotation(highFrontUnrndVowel).Value, + LeftEnvironment = Pattern.New().Annotation(cons).Value, + RightEnvironment = Pattern.New().Annotation(HCFeatureSystem.RightSideAnchor).Value, } ); @@ -1293,9 +1244,9 @@ public void EpenthesisRules() rule4.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(highFrontUnrndVowel).Value, - LeftEnvironment = Pattern.New().Annotation(cons).Value, - RightEnvironment = Pattern.New().Annotation(highBackRndVowel).Value, + Rhs = Pattern.New().Annotation(highFrontUnrndVowel).Value, + LeftEnvironment = Pattern.New().Annotation(cons).Value, + RightEnvironment = Pattern.New().Annotation(highBackRndVowel).Value, } ); @@ -1307,7 +1258,7 @@ public void EpenthesisRules() rule4.Subrules.Add( new RewriteSubrule { - Rhs = Pattern + Rhs = Pattern .New() .Annotation( FeatureStruct @@ -1319,7 +1270,7 @@ public void EpenthesisRules() .Value ) .Value, - LeftEnvironment = Pattern + LeftEnvironment = Pattern .New() .Annotation( FeatureStruct @@ -1341,12 +1292,8 @@ public void EpenthesisRules() rule4.Subrules.Add( new RewriteSubrule { - Rhs = Pattern - .New() - .Annotation(highFrontUnrndVowel) - .Annotation(highFrontUnrndVowel) - .Value, - LeftEnvironment = Pattern.New().Annotation(highVowel).Value, + Rhs = Pattern.New().Annotation(highFrontUnrndVowel).Annotation(highFrontUnrndVowel).Value, + LeftEnvironment = Pattern.New().Annotation(highVowel).Value, } ); @@ -1359,8 +1306,8 @@ public void EpenthesisRules() rule4.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(highFrontUnrndVowel).Value, - LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, + Rhs = Pattern.New().Annotation(highFrontUnrndVowel).Value, + LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, } ); @@ -1369,13 +1316,13 @@ public void EpenthesisRules() Allophonic.PhonologicalRules.Clear(); - var rule1 = new RewriteRule { Name = "rule1", Lhs = Pattern.New().Annotation(vowel).Value }; + var rule1 = new RewriteRule { Name = "rule1", Lhs = Pattern.New().Annotation(vowel).Value }; Allophonic.PhonologicalRules.Add(rule1); rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(highBackRnd).Value, - LeftEnvironment = Pattern.New().Annotation(highBackRndVowel).Value, + Rhs = Pattern.New().Annotation(highBackRnd).Value, + LeftEnvironment = Pattern.New().Annotation(highBackRndVowel).Value, } ); @@ -1384,9 +1331,9 @@ public void EpenthesisRules() rule2.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(Character(Table1, "t")).Value, - LeftEnvironment = Pattern.New().Annotation(vowel).Value, - RightEnvironment = Pattern.New().Annotation(vowel).Value, + Rhs = Pattern.New().Annotation(Character(Table1, "t")).Value, + LeftEnvironment = Pattern.New().Annotation(vowel).Value, + RightEnvironment = Pattern.New().Annotation(vowel).Value, } ); @@ -1454,11 +1401,11 @@ public void DeletionRules() var rule4 = new RewriteRule { Name = "rule4", - Lhs = Pattern.New().Annotation(highFrontUnrndVowel).Value, + Lhs = Pattern.New().Annotation(highFrontUnrndVowel).Value, }; Allophonic.PhonologicalRules.Add(rule4); rule4.Subrules.Add( - new RewriteSubrule { LeftEnvironment = Pattern.New().Annotation(highVowel).Value } + new RewriteSubrule { LeftEnvironment = Pattern.New().Annotation(highVowel).Value } ); var morpher = new Morpher(TraceManager, Language); @@ -1468,25 +1415,19 @@ public void DeletionRules() AssertMorphsEqual(morpher.ParseWord("bubu"), "24", "25", "26", "27", "19"); rule4.Subrules.Clear(); - rule4.Subrules.Add( - new RewriteSubrule { RightEnvironment = Pattern.New().Annotation(cons).Value } - ); + rule4.Subrules.Add(new RewriteSubrule { RightEnvironment = Pattern.New().Annotation(cons).Value }); morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("bubu"), "25", "19"); - rule4.Lhs = Pattern - .New() - .Annotation(highFrontUnrndVowel) - .Annotation(highFrontUnrndVowel) - .Value; + rule4.Lhs = Pattern.New().Annotation(highFrontUnrndVowel).Annotation(highFrontUnrndVowel).Value; morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("bubu"), "29", "19"); rule4.Subrules.Clear(); rule4.Subrules.Add( - new RewriteSubrule { LeftEnvironment = Pattern.New().Annotation(highBackRndVowel).Value } + new RewriteSubrule { LeftEnvironment = Pattern.New().Annotation(highBackRndVowel).Value } ); morpher = new Morpher(TraceManager, Language); @@ -1495,20 +1436,20 @@ public void DeletionRules() Allophonic.PhonologicalRules.Clear(); Morphophonemic.PhonologicalRules.Add(rule4); - rule4.Lhs = Pattern.New().Annotation(Character(Table3, "b")).Value; + rule4.Lhs = Pattern.New().Annotation(Character(Table3, "b")).Value; rule4.Subrules.Clear(); rule4.Subrules.Add( new RewriteSubrule { - LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, - RightEnvironment = Pattern.New().Annotation(Character(Table3, "+")).Value, + LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, + RightEnvironment = Pattern.New().Annotation(Character(Table3, "+")).Value, } ); var rule5 = new RewriteRule { Name = "rule5", - Lhs = Pattern + Lhs = Pattern .New() .Annotation(Character(Table3, "u")) .Annotation(Character(Table3, "b")) @@ -1519,22 +1460,22 @@ public void DeletionRules() rule5.Subrules.Add( new RewriteSubrule { - LeftEnvironment = Pattern.New().Annotation(Character(Table3, "+")).Value, - RightEnvironment = Pattern.New().Annotation(HCFeatureSystem.RightSideAnchor).Value, + LeftEnvironment = Pattern.New().Annotation(Character(Table3, "+")).Value, + RightEnvironment = Pattern.New().Annotation(HCFeatureSystem.RightSideAnchor).Value, } ); var rule1 = new RewriteRule { Name = "rule1", - Lhs = Pattern.New().Annotation(Character(Table3, "t")).Value, + Lhs = Pattern.New().Annotation(Character(Table3, "t")).Value, }; Morphophonemic.PhonologicalRules.Add(rule1); rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(asp).Value, - LeftEnvironment = Pattern.New().Annotation(nonCons).Value, + Rhs = Pattern.New().Annotation(asp).Value, + LeftEnvironment = Pattern.New().Annotation(nonCons).Value, } ); @@ -1546,15 +1487,15 @@ public void DeletionRules() Allophonic.PhonologicalRules.Add(rule5); Allophonic.PhonologicalRules.Add(rule1); - rule4.Subrules[0].LeftEnvironment = Pattern.New().Value; + rule4.Subrules[0].LeftEnvironment = Pattern.New().Value; - rule5.Lhs = Pattern + rule5.Lhs = Pattern .New() .Annotation(Character(Table3, "u")) .Annotation(Character(Table3, "b")) .Annotation(Character(Table3, "i")) .Value; - rule5.Subrules[0].RightEnvironment = Pattern.New().Value; + rule5.Subrules[0].RightEnvironment = Pattern.New().Value; morpher = new Morpher(TraceManager, Language); Assert.That(morpher.ParseWord("b"), Is.Empty); @@ -1563,7 +1504,7 @@ public void DeletionRules() Allophonic.PhonologicalRules.Add(rule4); Morphophonemic.PhonologicalRules.Add(rule5); - rule4.Lhs = Pattern + rule4.Lhs = Pattern .New() .Annotation( FeatureStruct @@ -1583,7 +1524,7 @@ public void DeletionRules() rule4.Subrules.Add( new RewriteSubrule { - RightEnvironment = Pattern + RightEnvironment = Pattern .New() .Annotation( FeatureStruct @@ -1602,14 +1543,14 @@ public void DeletionRules() } ); - rule5.Lhs = Pattern.New().Annotation(cons).Value; + rule5.Lhs = Pattern.New().Annotation(cons).Value; rule5.Subrules.Clear(); rule5.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(voiced).Value, - LeftEnvironment = Pattern.New().Annotation(vowel).Value, - RightEnvironment = Pattern.New().Annotation(vowel).Value, + Rhs = Pattern.New().Annotation(voiced).Value, + LeftEnvironment = Pattern.New().Annotation(vowel).Value, + RightEnvironment = Pattern.New().Annotation(vowel).Value, } ); @@ -1741,31 +1682,27 @@ public void DisjunctiveRules() .Symbol("cont-") .Value; - var disrule1 = new RewriteRule - { - Name = "disrule1", - Lhs = Pattern.New().Annotation(stop).Value, - }; + var disrule1 = new RewriteRule { Name = "disrule1", Lhs = Pattern.New().Annotation(stop).Value }; Allophonic.PhonologicalRules.Add(disrule1); disrule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(asp).Value, - LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, + Rhs = Pattern.New().Annotation(asp).Value, + LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, } ); - disrule1.Subrules.Add(new RewriteSubrule { Rhs = Pattern.New().Annotation(unasp).Value }); + disrule1.Subrules.Add(new RewriteSubrule { Rhs = Pattern.New().Annotation(unasp).Value }); var morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("pʰip"), "41"); - disrule1.Lhs = Pattern.New().Annotation(highVowel).Value; + disrule1.Lhs = Pattern.New().Annotation(highVowel).Value; disrule1.Subrules.Clear(); disrule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(backRnd).Value, - LeftEnvironment = Pattern + Rhs = Pattern.New().Annotation(backRnd).Value, + LeftEnvironment = Pattern .New() .Annotation(backRndVowel) .Group(g => g.Annotation(cons).Annotation(highFrontVowel)) @@ -1776,8 +1713,8 @@ public void DisjunctiveRules() disrule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(frontRnd).Value, - LeftEnvironment = Pattern + Rhs = Pattern.New().Annotation(frontRnd).Value, + LeftEnvironment = Pattern .New() .Annotation(frontRndVowel) .Group(g => g.Annotation(cons).Annotation(highFrontVowel)) @@ -1788,8 +1725,8 @@ public void DisjunctiveRules() disrule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(backUnrnd).Value, - LeftEnvironment = Pattern + Rhs = Pattern.New().Annotation(backUnrnd).Value, + LeftEnvironment = Pattern .New() .Annotation(backUnrndVowel) .Group(g => g.Annotation(cons).Annotation(highFrontVowel)) @@ -1800,8 +1737,8 @@ public void DisjunctiveRules() disrule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(frontUnrnd).Value, - LeftEnvironment = Pattern + Rhs = Pattern.New().Annotation(frontUnrnd).Value, + LeftEnvironment = Pattern .New() .Annotation(frontUnrndVowel) .Group(g => g.Annotation(cons).Annotation(highFrontVowel)) @@ -1813,56 +1750,56 @@ public void DisjunctiveRules() morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("bububu"), "42", "43"); - disrule1.Lhs = Pattern.New().Annotation(stop).Value; + disrule1.Lhs = Pattern.New().Annotation(stop).Value; disrule1.Subrules.Clear(); disrule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(asp).Value, - LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, + Rhs = Pattern.New().Annotation(asp).Value, + LeftEnvironment = Pattern.New().Annotation(HCFeatureSystem.LeftSideAnchor).Value, } ); disrule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(unasp).Value, - RightEnvironment = Pattern.New().Annotation(HCFeatureSystem.RightSideAnchor).Value, + Rhs = Pattern.New().Annotation(unasp).Value, + RightEnvironment = Pattern.New().Annotation(HCFeatureSystem.RightSideAnchor).Value, } ); morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("pʰip"), "41"); - disrule1.Lhs = Pattern.New().Annotation(p).Value; + disrule1.Lhs = Pattern.New().Annotation(p).Value; disrule1.Subrules.Clear(); disrule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(vd).Value, - LeftEnvironment = Pattern.New().Annotation(vowel).Value, + Rhs = Pattern.New().Annotation(vd).Value, + LeftEnvironment = Pattern.New().Annotation(vowel).Value, } ); disrule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(asp).Value, - RightEnvironment = Pattern.New().Annotation(HCFeatureSystem.RightSideAnchor).Value, + Rhs = Pattern.New().Annotation(asp).Value, + RightEnvironment = Pattern.New().Annotation(HCFeatureSystem.RightSideAnchor).Value, } ); morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("bubu"), "46", "19"); - disrule1.Lhs = Pattern.New().Annotation(voicelessStop).Value; + disrule1.Lhs = Pattern.New().Annotation(voicelessStop).Value; disrule1.Subrules.Clear(); disrule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(asp).Value, - LeftEnvironment = Pattern.New().Annotation(voicelessStop).Value, + Rhs = Pattern.New().Annotation(asp).Value, + LeftEnvironment = Pattern.New().Annotation(voicelessStop).Value, } ); - disrule1.Subrules.Add(new RewriteSubrule { Rhs = Pattern.New().Annotation(unasp).Value }); + disrule1.Subrules.Add(new RewriteSubrule { Rhs = Pattern.New().Annotation(unasp).Value }); morpher = new Morpher(TraceManager, Language); AssertMorphsEqual(morpher.ParseWord("ktʰb"), "49"); @@ -1904,13 +1841,13 @@ public void MultipleApplicationRules() { Name = "rule1", ApplicationMode = RewriteApplicationMode.Simultaneous, - Lhs = Pattern.New().Annotation(highVowel).Value, + Lhs = Pattern.New().Annotation(highVowel).Value, }; rule1.Subrules.Add( new RewriteSubrule { - Rhs = Pattern.New().Annotation(backRnd).Value, - LeftEnvironment = Pattern.New().Annotation(i).Annotation(cons).Value, + Rhs = Pattern.New().Annotation(backRnd).Value, + LeftEnvironment = Pattern.New().Annotation(i).Annotation(cons).Value, } ); Allophonic.PhonologicalRules.Add(rule1); diff --git a/tests/SIL.Machine.Tests/Annotations/AnnotationTests.cs b/tests/SIL.Machine.Tests/Annotations/AnnotationTests.cs index db74eee40..304266a7a 100644 --- a/tests/SIL.Machine.Tests/Annotations/AnnotationTests.cs +++ b/tests/SIL.Machine.Tests/Annotations/AnnotationTests.cs @@ -266,4 +266,375 @@ public void FindDepthFirst() Assert.That(annList.FindDepthFirst(50, Direction.RightToLeft, out result), Is.True); Assert.That(result, Is.EqualTo(annList.Last.Prev)); } + + // Copy-on-write safety net for the Shape/ShapeNode refactor (Plan B): cloning a frozen + // Shape and mutating a cloned node's FeatureStruct must not change the source shape. + private static Shape BuildShape(FeatureSystem featSys) + { + var shape = new Shape(end => new ShapeNode(FeatureStruct.New().Value)); + shape.Add(FeatureStruct.New(featSys).Symbol("a1").Value); + shape.Add(FeatureStruct.New(featSys).Symbol("a2").Value); + shape.Add(FeatureStruct.New(featSys).Symbol("a3").Value); + shape.Freeze(); + return shape; + } + + [Test] + public void CloneShape_MutateClonedNodeFeatureStruct_LeavesSourceShapeUnchanged() + { + var featSys = new FeatureSystem + { + new SymbolicFeature("a", new FeatureSymbol("a1"), new FeatureSymbol("a2"), new FeatureSymbol("a3")), + new SymbolicFeature("b", new FeatureSymbol("b1"), new FeatureSymbol("b2")), + }; + featSys.Freeze(); + + Shape source = BuildShape(featSys); + Shape expected = BuildShape(featSys); + Shape clone = source.Clone(); + + // CopyTo fidelity: same node count and value-equal to the source. + Assert.That(clone.Count, Is.EqualTo(source.Count)); + Assert.That(clone.ValueEquals(source), Is.True); + + // Mutate the first cloned node's feature struct (the in-place pattern HermitCrab uses). + clone.First.Annotation.FeatureStruct.AddValue( + featSys.GetFeature("b"), + new SymbolicFeatureValue(featSys.GetSymbol("b1")) + ); + + // The source shape must be byte-for-byte unchanged. + Assert.That(source.ValueEquals(expected), Is.True, "frozen source shape changed by a clone-node mutation"); + Assert.That(source.First.Annotation.FeatureStruct.ContainsFeature(featSys.GetFeature("b")), Is.False); + Assert.That(clone.First.Annotation.FeatureStruct.ContainsFeature(featSys.GetFeature("b")), Is.True); + } + + [Test] + public void LargeShape_GrowsBackingArrays_PreservesLinkAndCloneIntegrity() + { + var featSys = new FeatureSystem { new SymbolicFeature("a", new FeatureSymbol("a1"), new FeatureSymbol("a2")) }; + featSys.Freeze(); + + // Append well past the initial backing capacity (4) through several doublings, so the + // parallel _nodes/_next/_prev/_frozen arrays are Array.Resize'd multiple times. + var shape = new Shape(end => new ShapeNode(FeatureStruct.New().Value)); + var added = new List(); + for (int i = 0; i < 50; i++) + added.Add(shape.Add(FeatureStruct.New(featSys).Symbol(i % 2 == 0 ? "a1" : "a2").Value)); + + Assert.That(shape.Count, Is.EqualTo(50)); + + // Forward links (First..Last content) preserve insertion order + node identity across growth. + var forward = new List(); + for (ShapeNode n = shape.First; n != shape.End; n = n.Next) + forward.Add(n); + Assert.That(forward, Is.EqualTo(added)); + + // Backward links consistent (Last..First reversed equals insertion order). + var backward = new List(); + for (ShapeNode n = shape.Last; n != shape.Begin; n = n.Prev) + backward.Add(n); + backward.Reverse(); + Assert.That(backward, Is.EqualTo(added)); + + // GetNodes over the content range yields the same nodes. + Assert.That(shape.GetNodes(shape.First, shape.Last).ToList(), Is.EqualTo(added)); + + // Each handle round-trips through the dense index (NodeAt(OffsetOf(n)) == n). + foreach (ShapeNode n in added) + Assert.That(shape.NodeAt(shape.OffsetOf(n)), Is.EqualTo(n)); + + // Mid-list insert then remove exercise the flat-backing mutators + slot bookkeeping. + var inserted = new ShapeNode(FeatureStruct.New(featSys).Symbol("a1").Value); + shape.AddAfter(added[24], inserted); + Assert.That(shape.Remove(added[9]), Is.True); + Assert.That(shape.Count, Is.EqualTo(50)); // 50 + 1 inserted - 1 removed + + var afterMutation = new List(); + for (ShapeNode n = shape.First; n != shape.End; n = n.Next) + afterMutation.Add(n); + Assert.That(afterMutation, Does.Not.Contain(added[9])); + Assert.That(afterMutation, Does.Contain(inserted)); + int idx24 = afterMutation.IndexOf(added[24]); + Assert.That(afterMutation[idx24 + 1], Is.EqualTo(inserted), "inserted node must follow its anchor"); + + // A clone of the large (unfrozen) shape is value-equal and independent. + Shape clone = shape.Clone(); + Assert.That(clone.Count, Is.EqualTo(shape.Count)); + Assert.That(clone.ValueEquals(shape), Is.True); + } + + // RUSTIFY Stage 2 thesis check: the FST flip from TOffset = ShapeNode to TOffset = int maps each + // annotation [startNode, endNode] to the half-open int range [startNode.Tag, endNode.Tag + 1]. + // The whole flip's correctness rests on that mapping preserving the range relationships the FST + // traversal depends on (ordering via CompareTo, Overlaps, Contains) — for SPARSE tags (an + // appended, unfrozen shape: rewrite rules mutate + match unfrozen) AND dense tags (frozen). This + // validates that thesis empirically before any code is built on it. + private static System.Collections.Generic.List> BuildSpannedShape( + FeatureSystem featSys, + bool freeze + ) + { + var shape = new Shape(end => new ShapeNode(FeatureStruct.New().Value)); + shape.Add(FeatureStruct.New(featSys).Symbol("a1").Value); + ShapeNode n1 = shape.Add(FeatureStruct.New(featSys).Symbol("a2").Value); + ShapeNode n2 = shape.Add(FeatureStruct.New(featSys).Symbol("a3").Value); + shape.Add(FeatureStruct.New(featSys).Symbol("a1").Value); + // a spanning (morph-like) annotation over the two middle nodes — exercises start != end + shape.Annotations.Add(Range.Create(n1, n2), FeatureStruct.New(featSys).Symbol("a2").Value); + if (freeze) + shape.Freeze(); + + // every annotation (leaves + the span + its children), excluding the Begin/End anchors whose + // int.MinValue/int.MaxValue tags are handled separately in the real projection + var anns = new System.Collections.Generic.List>(); + foreach (Annotation top in shape.Annotations) + { + foreach (Annotation a in top.GetNodesDepthFirst()) + { + if (a.Range.Start.Tag != int.MinValue && a.Range.End.Tag != int.MaxValue) + anns.Add(a); + } + } + return anns; + } + + private static Shape BuildSpannedShapeObject(FeatureSystem featSys, bool freeze) + { + var shape = new Shape(end => new ShapeNode(FeatureStruct.New().Value)); + shape.Add(FeatureStruct.New(featSys).Symbol("a1").Value); + ShapeNode n1 = shape.Add(FeatureStruct.New(featSys).Symbol("a2").Value); + ShapeNode n2 = shape.Add(FeatureStruct.New(featSys).Symbol("a3").Value); + shape.Add(FeatureStruct.New(featSys).Symbol("a1").Value); + shape.Annotations.Add(Range.Create(n1, n2), FeatureStruct.New(featSys).Symbol("a2").Value); + if (freeze) + shape.Freeze(); + return shape; + } + + private static void AssertProjectionMatches(Shape shape, Annotation src, Annotation proj) + { + // offset = dense node position; a node [s,e] -> half-open [off(s), off(e)+1) + Range expected = Range.Create(shape.OffsetOf(src.Range.Start), shape.OffsetOf(src.Range.End) + 1); + Assert.That(proj.Range, Is.EqualTo(expected), "projected range"); + Assert.That(proj.Optional, Is.EqualTo(src.Optional), "projected optional"); + // FeatureStruct is shared by reference so in-place edits stay visible to the int view + Assert.That(proj.FeatureStruct, Is.SameAs(src.FeatureStruct), "projected FeatureStruct identity"); + Assert.That(proj.Children.Count, Is.EqualTo(src.IsLeaf ? 0 : src.Children.Count), "projected child count"); + if (!src.IsLeaf) + { + Annotation[] sc = src.Children.ToArray(); + Annotation[] pc = proj.Children.ToArray(); + for (int k = 0; k < sc.Length; k++) + AssertProjectionMatches(shape, sc[k], pc[k]); + } + } + + [TestCase(false)] + [TestCase(true)] + public void IntAnnotationProjection_MirrorsShapeNodeAnnotations(bool freeze) + { + var featSys = new FeatureSystem + { + new SymbolicFeature("a", new FeatureSymbol("a1"), new FeatureSymbol("a2"), new FeatureSymbol("a3")), + }; + featSys.Freeze(); + + Shape shape = BuildSpannedShapeObject(featSys, freeze); + + AnnotationList proj = shape.IntAnnotations; + Assert.That(proj.Count, Is.EqualTo(shape.Annotations.Count), "top-level count"); + + // top-level annotations correspond in order (the int range mapping preserves ordering) + Annotation[] srcTop = shape.Annotations.ToArray(); + Annotation[] projTop = proj.ToArray(); + for (int k = 0; k < srcTop.Length; k++) + AssertProjectionMatches(shape, srcTop[k], projTop[k]); + + // NodeAt/OffsetOf round-trip every node (dense offset), including margins + foreach (ShapeNode node in shape) + Assert.That(shape.NodeAt(shape.OffsetOf(node)), Is.SameAs(node), "NodeAt(OffsetOf(node)) round-trip"); + Assert.That(shape.NodeAt(shape.OffsetOf(shape.Begin)), Is.SameAs(shape.Begin)); + Assert.That(shape.NodeAt(shape.OffsetOf(shape.End)), Is.SameAs(shape.End)); + + // the projection is cached against the annotation Version + Assert.That(shape.IntAnnotations, Is.SameAs(proj), "projection cached when unchanged"); + if (!freeze) + { + shape.Add(FeatureStruct.New(featSys).Symbol("a1").Value); + Assert.That(shape.IntAnnotations, Is.Not.SameAs(proj), "projection rebuilt after a mutation"); + } + } + + [TestCase(false)] + [TestCase(true)] + public void IntOffsetRangeMapping_PreservesShapeNodeRangeRelationships(bool freeze) + { + var featSys = new FeatureSystem + { + new SymbolicFeature("a", new FeatureSymbol("a1"), new FeatureSymbol("a2"), new FeatureSymbol("a3")), + }; + featSys.Freeze(); + + System.Collections.Generic.List> anns = BuildSpannedShape(featSys, freeze); + Assert.That(anns.Count, Is.GreaterThanOrEqualTo(4)); + + // sanity: appended (unfrozen) tags really are sparse, not 0..N-1 + if (!freeze) + { + var tags = anns.Select(a => a.Range.Start.Tag).Distinct().OrderBy(t => t).ToArray(); + Assert.That(tags.Length > 1 && tags[1] - tags[0] > 1, Is.True, "expected sparse appended tags"); + } + + static Range ToInt(Annotation a) => Range.Create(a.Range.Start.Tag, a.Range.End.Tag + 1); + + foreach (Annotation x in anns) + { + foreach (Annotation y in anns) + { + Range xs = x.Range, + ys = y.Range; + Range xi = ToInt(x), + yi = ToInt(y); + + Assert.That( + System.Math.Sign(xi.CompareTo(yi)), + Is.EqualTo(System.Math.Sign(xs.CompareTo(ys))), + $"CompareTo sign diverged: shape={xs}.CompareTo({ys}) int={xi}.CompareTo({yi})" + ); + Assert.That( + xi.Overlaps(yi), + Is.EqualTo(xs.Overlaps(ys)), + $"Overlaps diverged: shape={xs}/{ys} int={xi}/{yi}" + ); + Assert.That( + xi.Contains(yi), + Is.EqualTo(xs.Contains(ys)), + $"Contains diverged: shape={xs}/{ys} int={xi}/{yi}" + ); + } + } + } + + [TestCase(false)] + [TestCase(true)] + public void IntRange_StartsAtBoundaryAnchorInEachDirection(bool freeze) + { + var featSys = new FeatureSystem + { + new SymbolicFeature("a", new FeatureSymbol("a1"), new FeatureSymbol("a2"), new FeatureSymbol("a3")), + }; + featSys.Freeze(); + + Shape shape = BuildSpannedShapeObject(featSys, freeze); + + // A directional match begins at IntRange.GetStart(dir); that offset must resolve to the + // boundary anchor itself (Begin for LtR, End for RtL). The End anchor's dense node range is + // half-open [off(End), off(End)+1), so its RtL start coordinate is off(End)+1 — IntRange must + // carry the +1, or a RtL match would begin at the last content node and skip any edit adjacent + // to End (e.g. inserting a deleted segment after the final vowel during analysis). + Assert.That( + shape.IntRange.GetStart(Direction.LeftToRight), + Is.EqualTo(shape.MatchStartOffset(shape.Begin, Direction.LeftToRight)), + "a LtR match must start at the Begin anchor" + ); + Assert.That( + shape.IntRange.GetStart(Direction.RightToLeft), + Is.EqualTo(shape.MatchStartOffset(shape.End, Direction.RightToLeft)), + "a RtL match must start at the End anchor" + ); + } + + [Test] + public void Optional_FlipInvalidatesIntProjection() + { + var featSys = new FeatureSystem + { + new SymbolicFeature("a", new FeatureSymbol("a1"), new FeatureSymbol("a2"), new FeatureSymbol("a3")), + }; + featSys.Freeze(); + + // unfrozen: Optional is only ever flipped on a mutable shape (during analysis/unapplication) + Shape shape = BuildSpannedShapeObject(featSys, freeze: false); + + AnnotationList proj = shape.IntAnnotations; + ShapeNode node = shape.First; + Assert.That(node.Annotation.Optional, Is.False); + + // Flipping Optional is a non-structural change. The int projection copies Optional by value and + // caches against the annotation Version, so the flip must invalidate the cache — otherwise the + // matcher keeps seeing the stale flag and never forks the optional-skip instances. + node.Annotation.Optional = true; + + AnnotationList proj2 = shape.IntAnnotations; + Assert.That(proj2, Is.Not.SameAs(proj), "projection rebuilt after an Optional flip"); + int off = shape.OffsetOf(node); + Annotation projNode = proj2.Single(a => a.Range.Start == off); + Assert.That(projNode.Optional, Is.True, "rebuilt projection reflects the flipped Optional flag"); + } + + [Test] + public void CopyOnWriteClone_NeverInflated_ServesProjectionFromSource() + { + var featSys = new FeatureSystem + { + new SymbolicFeature("a", new FeatureSymbol("a1"), new FeatureSymbol("a2"), new FeatureSymbol("a3")), + }; + featSys.Freeze(); + + // RUSTIFY Stage 3 (III): a clone of a FROZEN shape is copy-on-write — it copies nothing and serves + // the int-offset projection from its frozen source, so a traverse-only clone never materializes. + Shape src = BuildSpannedShapeObject(featSys, freeze: true); + AnnotationList srcProj = src.IntAnnotations; + + Shape clone = src.Clone(); + Assert.That(clone.Count, Is.EqualTo(src.Count), "COW clone reports the source content count"); + Assert.That(clone.IntAnnotations, Is.SameAs(srcProj), "COW clone serves the source's projection"); + Assert.That(clone.IntRange, Is.EqualTo(src.IntRange), "COW clone serves the source's int range"); + } + + [Test] + public void CopyOnWriteClone_MutationInflatesAndDoesNotCorruptSource() + { + var featSys = new FeatureSystem + { + new SymbolicFeature("a", new FeatureSymbol("a1"), new FeatureSymbol("a2"), new FeatureSymbol("a3")), + }; + featSys.Freeze(); + + Shape src = BuildSpannedShapeObject(featSys, freeze: true); + int srcCount = src.Count; + AnnotationList srcProj = src.IntAnnotations; + + Shape clone = src.Clone(); + // Touch a handle, then mutate — this must inflate the clone's own node graph, leaving the shared + // frozen source untouched (the corruption case the gating exists to prevent). + ShapeNode first = clone.First; + clone.AddAfter(first, FeatureStruct.New(featSys).Symbol("a1").Value); + + Assert.That(clone.Count, Is.EqualTo(srcCount + 1), "the clone was mutated"); + Assert.That(src.Count, Is.EqualTo(srcCount), "the frozen source count is unchanged"); + Assert.That(src.IntAnnotations, Is.SameAs(srcProj), "the frozen source projection is unchanged"); + } + + [Test] + public void CopyOnWriteClone_FrozenBySharing_HashStableAcrossInflation() + { + var featSys = new FeatureSystem + { + new SymbolicFeature("a", new FeatureSymbol("a1"), new FeatureSymbol("a2"), new FeatureSymbol("a3")), + }; + featSys.Freeze(); + + Shape src = BuildSpannedShapeObject(featSys, freeze: true); + int srcHash = src.GetFrozenHashCode(); + + Shape clone = src.Clone(); + clone.Freeze(); // no-op: adopts the source's frozen state + hash without materializing nodes + Assert.That(clone.GetFrozenHashCode(), Is.EqualTo(srcHash), "frozen-by-sharing hash equals the source"); + + // Forcing inflation (handle access) re-materializes + re-freezes; the hash must be unchanged. + ShapeNode _ = clone.First; + Assert.That(clone.GetFrozenHashCode(), Is.EqualTo(srcHash), "hash stable across COW inflation"); + } } diff --git a/tests/SIL.Machine.Tests/FeatureModel/FeatureStructTests.cs b/tests/SIL.Machine.Tests/FeatureModel/FeatureStructTests.cs index c733110ee..2f5a6430d 100644 --- a/tests/SIL.Machine.Tests/FeatureModel/FeatureStructTests.cs +++ b/tests/SIL.Machine.Tests/FeatureModel/FeatureStructTests.cs @@ -1017,6 +1017,37 @@ public void BitArray() SkipAndCheck(featPos, 17, "v1"); } + [Test] + public void UlongSymbolicFeatureValueFlags_SixtyFourSymbols_MaskCoversWholeUlong() + { + // Regression for the 64-symbol boundary: SymbolicFeatureValue.CreateFlags routes a feature + // with up to 64 symbols to the ulong implementation, whose mask was computed as + // `(1UL << 64) - 1`. A ulong shift count is masked to its low 6 bits, so that is `1UL << 0` + // minus 1 == 0, breaking every mask-dependent op. The existing BitArray() test only checks + // positive first/last values (Set/Get), so it never exercised the mask at this boundary. + var symbols = new System.Collections.Generic.List(); + for (int i = 0; i < 64; i++) + symbols.Add(new FeatureSymbol("s" + i)); + var feature = new SymbolicFeature("f64", symbols); + + var all = new UlongSymbolicFeatureValueFlags(feature); + all.Set(feature.PossibleSymbols); + // A value holding every allowed symbol is fully instantiated / unconstrained. + Assert.That(all.HasAllSet(), Is.True, "64-symbol full set must satisfy HasAllSet (mask must be all-ones)"); + + // Negating the full set must yield the empty set (not everything, and not itself). + ISymbolicFeatureValueFlags none = all.Not(); + Assert.That(none.HasAnySet(), Is.False, "Not(full set) must be empty at 64 symbols"); + + // Negating a single-symbol value must select exactly the other 63. + var single = new UlongSymbolicFeatureValueFlags(feature); + single.Set(new[] { symbols[0] }); + ISymbolicFeatureValueFlags rest = single.Not(); + Assert.That(rest.Get(symbols[0]), Is.False); + Assert.That(rest.Get(symbols[63]), Is.True); + Assert.That(rest.HasAllSet(), Is.False); + } + private static void SkipAndCheck(SymbolicFeature featPos, int iSkip, string sFirst) { var symbols = featPos.PossibleSymbols.Skip(iSkip); @@ -1032,4 +1063,212 @@ private static void CheckFirstAndLastValues(FeatureSystem featSys, string sFirst FeatureStruct fs2 = FeatureStruct.NewMutable(featSys).Symbol("ncp").Value; Assert.That(fs2.ToString(), Is.EqualTo("[POS:ncp]")); } + + // --------------------------------------------------------------------------------- + // Copy-on-write characterization tests (safety net for the COW FeatureStruct refactor). + // Invariant under test: cloning a FROZEN feature struct and mutating the clone must never + // alter the (potentially shared) frozen source — INCLUDING nested children. A naive/shallow + // copy-on-write would let the internal recursive mutators (which have no per-level frozen + // check) write into a shared frozen child and silently corrupt the source. "No exception" + // is therefore insufficient: every test asserts the SOURCE is byte-for-byte unchanged. + // --------------------------------------------------------------------------------- + + private static FeatureSystem CowFeatSys() + { + var featSys = new FeatureSystem + { + new ComplexFeature("cx1"), + new ComplexFeature("cx2"), + new SymbolicFeature("a", new FeatureSymbol("a1"), new FeatureSymbol("a2"), new FeatureSymbol("a3")), + new SymbolicFeature("b", new FeatureSymbol("b1"), new FeatureSymbol("b2")), + new SymbolicFeature("c", new FeatureSymbol("c1"), new FeatureSymbol("c2")), + }; + featSys.Freeze(); + return featSys; + } + + // frozen [cx1:[a:a1 b:b1]] + private static FeatureStruct BuildNestedFrozen(FeatureSystem featSys) + { + return FeatureStruct.New(featSys).Feature("cx1").EqualTo(cx1 => cx1.Symbol("a1").Symbol("b1")).Value; + } + + // frozen [a:a1 b:b1] + private static FeatureStruct BuildFlatFrozen(FeatureSystem featSys) + { + return FeatureStruct.New(featSys).Symbol("a1").Symbol("b1").Value; + } + + private static void AssertSourceUnchanged(FeatureStruct source, FeatureStruct expected) + { + Assert.That(source.ValueEquals(expected), Is.True, "frozen source value changed by a clone mutation"); + Assert.That( + source.ToString(), + Is.EqualTo(expected.ToString()), + "frozen source string changed by a clone mutation" + ); + } + + [Test] + public void Clone_FrozenNested_PriorityUnionOnClone_LeavesSourceUnchanged() + { + FeatureSystem featSys = CowFeatSys(); + FeatureStruct source = BuildNestedFrozen(featSys); + FeatureStruct other = FeatureStruct.New(featSys).Feature("cx1").EqualTo(cx1 => cx1.Symbol("a2")).Value; + + FeatureStruct clone = source.Clone(); + clone.PriorityUnion(other); // recurses into and mutates the (shared) cx1 child + + AssertSourceUnchanged(source, BuildNestedFrozen(featSys)); + Assert.That(clone.ValueEquals(source), Is.False, "clone was not actually mutated"); + } + + [Test] + public void Clone_FrozenNested_UnionOnClone_LeavesSourceUnchanged() + { + FeatureSystem featSys = CowFeatSys(); + FeatureStruct source = BuildNestedFrozen(featSys); + FeatureStruct other = FeatureStruct.New(featSys).Feature("cx1").EqualTo(cx1 => cx1.Symbol("a1")).Value; + + FeatureStruct clone = source.Clone(); + clone.Union(other); + + AssertSourceUnchanged(source, BuildNestedFrozen(featSys)); + } + + [Test] + public void Clone_FrozenNested_SubtractOnClone_LeavesSourceUnchanged() + { + FeatureSystem featSys = CowFeatSys(); + FeatureStruct source = BuildNestedFrozen(featSys); + FeatureStruct other = FeatureStruct.New(featSys).Feature("cx1").EqualTo(cx1 => cx1.Symbol("a1")).Value; + + FeatureStruct clone = source.Clone(); + clone.Subtract(other); + + AssertSourceUnchanged(source, BuildNestedFrozen(featSys)); + } + + [Test] + public void Clone_FrozenFlat_AddValueOnClone_LeavesSourceUnchanged() + { + FeatureSystem featSys = CowFeatSys(); + FeatureStruct source = BuildFlatFrozen(featSys); + + FeatureStruct clone = source.Clone(); + clone.AddValue(featSys.GetFeature("c"), new SymbolicFeatureValue(featSys.GetSymbol("c1"))); + + AssertSourceUnchanged(source, BuildFlatFrozen(featSys)); + Assert.That(source.ContainsFeature(featSys.GetFeature("c")), Is.False); + Assert.That(clone.ContainsFeature(featSys.GetFeature("c")), Is.True); + } + + [Test] + public void Clone_FrozenFlat_RemoveValueOnClone_LeavesSourceUnchanged() + { + FeatureSystem featSys = CowFeatSys(); + FeatureStruct source = BuildFlatFrozen(featSys); + + FeatureStruct clone = source.Clone(); + clone.RemoveValue(featSys.GetFeature("b")); + + AssertSourceUnchanged(source, BuildFlatFrozen(featSys)); + Assert.That(source.ContainsFeature(featSys.GetFeature("b")), Is.True); + Assert.That(clone.ContainsFeature(featSys.GetFeature("b")), Is.False); + } + + [Test] + public void Clone_FrozenFlat_ClearOnClone_LeavesSourceUnchanged() + { + FeatureSystem featSys = CowFeatSys(); + FeatureStruct source = BuildFlatFrozen(featSys); + + FeatureStruct clone = source.Clone(); + clone.Clear(); + + AssertSourceUnchanged(source, BuildFlatFrozen(featSys)); + Assert.That(source.IsEmpty, Is.False); + Assert.That(clone.IsEmpty, Is.True); + } + + [Test] + public void Clone_OfFrozen_IsMutable() + { + FeatureSystem featSys = CowFeatSys(); + FeatureStruct source = BuildFlatFrozen(featSys); + + FeatureStruct clone = source.Clone(); + + // a fresh clone is NOT frozen: it has no valid frozen hash but it can be mutated + Assert.Throws(() => clone.GetFrozenHashCode()); + Assert.DoesNotThrow(() => + clone.AddValue(featSys.GetFeature("c"), new SymbolicFeatureValue(featSys.GetSymbol("c1"))) + ); + // and the frozen source still rejects mutation + Assert.Throws(() => + source.AddValue(featSys.GetFeature("c"), new SymbolicFeatureValue(featSys.GetSymbol("c1"))) + ); + } + + [Test] + public void Clone_OfFrozen_NeverMutated_EqualsSourceBothDirections() + { + FeatureSystem featSys = CowFeatSys(); + FeatureStruct source = BuildNestedFrozen(featSys); + + FeatureStruct clone = source.Clone(); + + Assert.That(source.ValueEquals(clone), Is.True); + Assert.That(clone.ValueEquals(source), Is.True); + Assert.That(FreezableEqualityComparer.Default.Equals(source, clone), Is.True); + } + + [Test] + public void Clone_FrozenReentrant_MutateClone_PreservesSharingAndLeavesSourceUnchanged() + { + FeatureSystem featSys = CowFeatSys(); + // [cx1:[a:a1](1) cx2->1] — cx1 and cx2 are the SAME structure (re-entrant) + Func build = () => + FeatureStruct + .New(featSys) + .Feature("cx1") + .EqualTo(1, cx1 => cx1.Symbol("a1")) + .Feature("cx2") + .ReferringTo(1) + .Value; + FeatureStruct source = build(); + + FeatureStruct clone = source.Clone(); + clone.AddValue(featSys.GetFeature("b"), new SymbolicFeatureValue(featSys.GetSymbol("b1"))); + + AssertSourceUnchanged(source, build()); + // the clone must still share its cx1/cx2 substructure after the inflate + Assert.That( + ReferenceEquals(clone.GetValue("cx1"), clone.GetValue("cx2")), + Is.True, + "re-entrant substructure sharing was lost by clone" + ); + } + + [Test] + public void Clone_FrozenWithVariable_ReplaceVariablesOnClone_LeavesSourceVariableIntact() + { + var featSys = new FeatureSystem + { + new SymbolicFeature("a", new FeatureSymbol("a+", "+"), new FeatureSymbol("a-", "-")), + new SymbolicFeature("b", new FeatureSymbol("b+", "+"), new FeatureSymbol("b-", "-")), + }; + featSys.Freeze(); + Func build = () => + FeatureStruct.New(featSys).Feature("a").EqualToVariable("var1").Symbol("b-").Value; + FeatureStruct source = build(); + + var bindings = new VariableBindings(); + bindings["var1"] = new SymbolicFeatureValue(featSys.GetSymbol("a+")); + FeatureStruct clone = source.Clone(); + clone.ReplaceVariables(bindings); + + AssertSourceUnchanged(source, build()); + Assert.That(clone.ValueEquals(source), Is.False, "ReplaceVariables did not change the clone"); + } } diff --git a/tests/SIL.Machine.Tests/FiniteState/FstTests.cs b/tests/SIL.Machine.Tests/FiniteState/FstTests.cs index f3fc213b1..db480b741 100644 --- a/tests/SIL.Machine.Tests/FiniteState/FstTests.cs +++ b/tests/SIL.Machine.Tests/FiniteState/FstTests.cs @@ -240,4 +240,72 @@ public void Transduce() Assert.That(resultsArray.Length, Is.EqualTo(2)); Assert.That(resultsArray.Select(r => r.Output.String), Is.EquivalentTo(new[] { "cas+.p", "cas.p" })); } + + [Test] + public void TransduceNondeterministic_MatchesDeterminized() + { + // Exercises the nondeterministic FST traversal (NondeterministicFstTraversalMethod + + // VisitedStates) by transducing an FST directly, without Determinize() first. Oracle: + // the accepted outputs must match the determinized FST's (determinization preserves the + // relation), which is the same nas-assimilation transducer used by Transduce(). + var fst = new Fst(_operations) { UseUnification = false }; + fst.StartState = fst.CreateAcceptingState(); + fst.StartState.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("nas-", "nas?").Value, fst.StartState); + fst.StartState.Arcs.Add( + FeatureStruct.New(PhoneticFeatSys).Symbol("nas+").Symbol("cor+", "cor-").Value, + fst.StartState + ); + State s1 = fst.StartState.Arcs.Add( + FeatureStruct.New(PhoneticFeatSys).Symbol("cor?").Symbol("nas+").Value, + FeatureStruct.New(PhoneticFeatSys).Symbol("cor-").Value, + fst.CreateState() + ); + s1.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("cor-").Value, fst.StartState); + State s2 = fst.StartState.Arcs.Add( + FeatureStruct.New(PhoneticFeatSys).Symbol("cor?").Symbol("nas+").Value, + FeatureStruct.New(PhoneticFeatSys).Symbol("cor+").Value, + fst.CreateAcceptingState() + ); + s2.Arcs.Add( + FeatureStruct.New(PhoneticFeatSys).Symbol("cor?").Symbol("nas+").Value, + FeatureStruct.New(PhoneticFeatSys).Symbol("cor+").Value, + s2 + ); + s2.Arcs.Add( + FeatureStruct.New(PhoneticFeatSys).Symbol("nas-", "nas?").Symbol("cor+", "cor?").Value, + fst.StartState + ); + s2.Arcs.Add(FeatureStruct.New(PhoneticFeatSys).Symbol("nas+").Symbol("cor+").Value, fst.StartState); + s2.Arcs.Add( + FeatureStruct.New(PhoneticFeatSys).Symbol("cor?").Symbol("nas+").Value, + FeatureStruct.New(PhoneticFeatSys).Symbol("cor-").Value, + s1 + ); + + Assert.That(fst.IsDeterministic, Is.False, "the raw FST must take the nondeterministic traversal path"); + Fst dfst = fst.Determinize(); + + // This transducer has no epsilon-input arcs, so the raw nondeterministic traversal and the + // determinized FST accept exactly the same (input, output) relation. (Epsilon-input FSTs + // are always determinized before transducing in production, so raw-NFST transduce of those + // is out of scope here.) + foreach (string input in new[] { "caNp", "caN", "carp" }) + { + AnnotatedStringData ndData = CreateStringData(input); + IEnumerable> ndResults; + Assert.That( + fst.Transduce(ndData, ndData.Annotations.First, null, true, true, true, out ndResults), + Is.True, + $"nondeterministic transduce of '{input}' should succeed" + ); + AnnotatedStringData dData = CreateStringData(input); + IEnumerable> dResults; + Assert.That(dfst.Transduce(dData, dData.Annotations.First, null, true, true, true, out dResults), Is.True); + Assert.That( + ndResults.Select(r => r.Output.String).Distinct(), + Is.EquivalentTo(dResults.Select(r => r.Output.String).Distinct()), + $"nondeterministic and determinized transduce of '{input}' must accept the same outputs" + ); + } + } }