Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,13 @@ tests/SIL.Machine.Tests/Corpora/TestData/usfm/target/*
tests/SIL.Machine.Tests/Corpora/TestData/project/*
tests/SIL.Machine.Tests/Corpora/TestData/pretranslations.json
.idea

# Local-only HermitCrab benchmark fixtures (real Sena/Indonesian grammars + word lists, used
# for ad hoc perf/allocation testing) + FieldWorks project backups. Large and/or not licensed
# for this repo, so they stay untracked; any [Explicit] benchmark that wants them falls back to
# the tracked samples/data/en-hc.xml when they're absent.
*.fwbackup
samples/data/sena-hc.xml
samples/data/sena-words.txt
samples/data/indonesian-hc.xml
samples/data/indonesian-words.txt
4 changes: 2 additions & 2 deletions src/SIL.Machine.Morphology.HermitCrab/AffixTemplate.cs
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,12 @@ public Stratum Stratum
}
}

public override IRule<Word, ShapeNode> CompileAnalysisRule(Morpher morpher)
public override IRule<Word, int> CompileAnalysisRule(Morpher morpher)
{
return new AnalysisAffixTemplateRule(morpher, this);
}

public override IRule<Word, ShapeNode> CompileSynthesisRule(Morpher morpher)
public override IRule<Word, int> CompileSynthesisRule(Morpher morpher)
{
return new SynthesisAffixTemplateRule(morpher, this);
}
Expand Down
46 changes: 28 additions & 18 deletions src/SIL.Machine.Morphology.HermitCrab/AllomorphEnvironment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,22 @@ namespace SIL.Machine.Morphology.HermitCrab
public class AllomorphEnvironment : IEquatable<AllomorphEnvironment>
{
private readonly ConstraintType _type;
private readonly Pattern<Word, ShapeNode> _leftEnv;
private readonly Matcher<Word, ShapeNode> _leftEnvMatcher;
private readonly Pattern<Word, ShapeNode> _rightEnv;
private readonly Matcher<Word, ShapeNode> _rightEnvMatcher;

public AllomorphEnvironment(
ConstraintType type,
Pattern<Word, ShapeNode> leftEnv,
Pattern<Word, ShapeNode> rightEnv
)
private readonly Pattern<Word, int> _leftEnv;
private readonly Matcher<Word, int> _leftEnvMatcher;
private readonly Pattern<Word, int> _rightEnv;
private readonly Matcher<Word, int> _rightEnvMatcher;

public AllomorphEnvironment(ConstraintType type, Pattern<Word, int> leftEnv, Pattern<Word, int> rightEnv)
{
_type = type;
if (leftEnv != null && !leftEnv.IsLeaf)
{
if (!leftEnv.IsFrozen)
throw new ArgumentException("The pattern is not frozen.", "leftEnv");
_leftEnv = leftEnv;
_leftEnvMatcher = new Matcher<Word, ShapeNode>(
_leftEnvMatcher = new Matcher<Word, int>(
leftEnv,
new MatcherSettings<ShapeNode>
new MatcherSettings<int>
{
AnchoredToStart = true,
Direction = Direction.RightToLeft,
Expand All @@ -47,9 +43,9 @@ Pattern<Word, ShapeNode> rightEnv
if (!rightEnv.IsFrozen)
throw new ArgumentException("The pattern is not frozen.", "rightEnv");
_rightEnv = rightEnv;
_rightEnvMatcher = new Matcher<Word, ShapeNode>(
_rightEnvMatcher = new Matcher<Word, int>(
rightEnv,
new MatcherSettings<ShapeNode>
new MatcherSettings<int>
{
AnchoredToStart = true,
Filter = ann =>
Expand All @@ -68,12 +64,12 @@ public ConstraintType Type

public string Name { get; set; }

public Pattern<Word, ShapeNode> LeftEnvironment
public Pattern<Word, int> LeftEnvironment
{
get { return _leftEnv; }
}

public Pattern<Word, ShapeNode> RightEnvironment
public Pattern<Word, int> RightEnvironment
{
get { return _rightEnv; }
}
Expand All @@ -87,10 +83,24 @@ public bool IsWordValid(Word word, Annotation<ShapeNode> morph)

private bool IsMatch(Word word, Annotation<ShapeNode> morph)
{
if (_leftEnvMatcher != null && !_leftEnvMatcher.IsMatch(word, morph.Range.Start.Prev))
// RUSTIFY Stage 2: the env matchers are Matcher<Word,int>; pass the bracketing node's
// direction-aware start offset (left env matches RtL, right env LtR — see the ctor).
if (
_leftEnvMatcher != null
&& !_leftEnvMatcher.IsMatch(
word,
word.Shape.MatchStartOffset(morph.Range.Start.Prev, Direction.RightToLeft)
)
)
return false;

if (_rightEnvMatcher != null && !_rightEnvMatcher.IsMatch(word, morph.Range.End.Next))
if (
_rightEnvMatcher != null
&& !_rightEnvMatcher.IsMatch(
word,
word.Shape.MatchStartOffset(morph.Range.End.Next, Direction.LeftToRight)
)
)
return false;

return true;
Expand Down
43 changes: 24 additions & 19 deletions src/SIL.Machine.Morphology.HermitCrab/AnalysisAffixTemplateRule.cs
Original file line number Diff line number Diff line change
@@ -1,29 +1,27 @@
using System.Collections.Generic;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using SIL.Machine.Annotations;
using SIL.Machine.FeatureModel;
using SIL.Machine.Rules;
using SIL.ObjectModel;
#if !SINGLE_THREADED
using System;
using System.Collections.Concurrent;
using System.Threading.Tasks;
#endif

namespace SIL.Machine.Morphology.HermitCrab
{
internal class AnalysisAffixTemplateRule : IRule<Word, ShapeNode>
internal class AnalysisAffixTemplateRule : IRule<Word, int>
{
private readonly Morpher _morpher;
private readonly AffixTemplate _template;
private readonly List<IRule<Word, ShapeNode>> _rules;
private readonly List<IRule<Word, int>> _rules;

public AnalysisAffixTemplateRule(Morpher morpher, AffixTemplate template)
{
_morpher = morpher;
_template = template;
_rules = new List<IRule<Word, ShapeNode>>(
template.Slots.Select(slot => new RuleBatch<Word, ShapeNode>(
_rules = new List<IRule<Word, int>>(
template.Slots.Select(slot => new RuleBatch<Word, int>(
slot.Rules.Select(mr => mr.CompileAnalysisRule(morpher)),
false,
FreezableEqualityComparer<Word>.Default
Expand All @@ -47,18 +45,24 @@ public IEnumerable<Word> Apply(Word input)
inWord.Freeze();

var output = new HashSet<Word>(FreezableEqualityComparer<Word>.Default);
#if SINGLE_THREADED
ApplySlots(inWord, _rules.Count - 1, output);
#else
ParallelApplySlots(inWord, output);
#endif
if (_morpher.MaxDegreeOfParallelism == 1)
ApplySlots(inWord, _rules.Count - 1, output);
else
ParallelApplySlots(inWord, output);

foreach (Word outWord in output)
outWord.SyntacticFeatureStruct.Add(fs);
{
// Clone-then-reassign, not an in-place mutation: outWord may already be frozen (it
// came out of the rule cascade above), and a frozen FeatureStruct must not be mutated
// in place — a future memoized/shared result instance would otherwise leak this edit
// into every branch that shares it.
FeatureStruct sfs = outWord.SyntacticFeatureStruct.Clone();
sfs.Add(fs);
outWord.SyntacticFeatureStruct = sfs;
}
return output;
}

#if SINGLE_THREADED
private void ApplySlots(Word inWord, int index, HashSet<Word> output)
{
for (int i = index; i >= 0; i--)
Expand All @@ -78,9 +82,10 @@ private void ApplySlots(Word inWord, int index, HashSet<Word> output)
_morpher.TraceManager.EndUnapplyTemplate(_template, inWord, true);
output.Add(inWord);
}
#else

private void ParallelApplySlots(Word inWord, HashSet<Word> output)
{
var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = _morpher.MaxDegreeOfParallelism };
var outStack = new ConcurrentStack<Word>();
var from = new ConcurrentStack<Tuple<Word, int>>();
from.Push(Tuple.Create(inWord, _rules.Count - 1));
Expand All @@ -90,6 +95,7 @@ private void ParallelApplySlots(Word inWord, HashSet<Word> output)
to.Clear();
Parallel.ForEach(
from,
parallelOptions,
work =>
{
bool add = true;
Expand Down Expand Up @@ -126,6 +132,5 @@ private void ParallelApplySlots(Word inWord, HashSet<Word> output)

output.UnionWith(outStack);
}
#endif
}
}
4 changes: 2 additions & 2 deletions src/SIL.Machine.Morphology.HermitCrab/AnalysisLanguageRule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@

namespace SIL.Machine.Morphology.HermitCrab
{
internal class AnalysisLanguageRule : IRule<Word, ShapeNode>
internal class AnalysisLanguageRule : IRule<Word, int>
{
private readonly Morpher _morpher;
private readonly List<Stratum> _strata;
private readonly List<IRule<Word, ShapeNode>> _rules;
private readonly List<IRule<Word, int>> _rules;

public AnalysisLanguageRule(Morpher morpher, Language language)
{
Expand Down
59 changes: 33 additions & 26 deletions src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,28 @@

namespace SIL.Machine.Morphology.HermitCrab
{
internal class AnalysisStratumRule : IRule<Word, ShapeNode>
internal class AnalysisStratumRule : IRule<Word, int>
{
private readonly IRule<Word, ShapeNode> _mrulesRule;
private readonly IRule<Word, ShapeNode> _prulesRule;
private readonly IRule<Word, ShapeNode> _templatesRule;
private readonly IRule<Word, int> _mrulesRule;
private readonly IRule<Word, int> _prulesRule;
private readonly IRule<Word, int> _templatesRule;
private readonly Stratum _stratum;
private readonly Morpher _morpher;

public AnalysisStratumRule(Morpher morpher, Stratum stratum)
{
_stratum = stratum;
_morpher = morpher;
_prulesRule = new LinearRuleCascade<Word, ShapeNode>(
_prulesRule = new LinearRuleCascade<Word, int>(
stratum.PhonologicalRules.Select(prule => CompilePhonologicalRule(prule, morpher)).Reverse()
);
_templatesRule = new RuleBatch<Word, ShapeNode>(
_templatesRule = new RuleBatch<Word, int>(
stratum.AffixTemplates.Select(template => CompileAffixTemplate(template, morpher)),
false,
FreezableEqualityComparer<Word>.Default
);
_mrulesRule = null;
IEnumerable<IRule<Word, ShapeNode>> mrules = stratum
IEnumerable<IRule<Word, int>> mrules = stratum
.MorphologicalRules.Select(mrule => CompileMorphologicalRule(mrule, morpher))
.Reverse();
switch (stratum.MorphologicalRuleOrder)
Expand All @@ -39,31 +39,38 @@ public AnalysisStratumRule(Morpher morpher, Stratum stratum)
// because morphological rules should be considered optional
// during unapplication (they are obligatory during application,
// but we don't know they have been applied during unapplication).
_mrulesRule = new PermutationRuleCascade<Word, ShapeNode>(
_mrulesRule = new PermutationRuleCascade<Word, int>(
mrules,
true,
FreezableEqualityComparer<Word>.Default
);
break;
case MorphologicalRuleOrder.Unordered:
#if SINGLE_THREADED
_mrulesRule = new CombinationRuleCascade<Word, ShapeNode>(
mrules,
true,
FreezableEqualityComparer<Word>.Default
);
#else
_mrulesRule = new ParallelCombinationRuleCascade<Word, ShapeNode>(
mrules,
true,
FreezableEqualityComparer<Word>.Default
);
#endif
// Single-threaded when the caller caps within-word parallelism (e.g. it
// parallelizes across words itself); parallel cascade otherwise.
_mrulesRule =
morpher.MaxDegreeOfParallelism == 1
? (IRule<Word, int>)
new CombinationRuleCascade<Word, int>(
mrules,
true,
FreezableEqualityComparer<Word>.Default
)
: new ParallelCombinationRuleCascade<Word, int>(
mrules,
true,
FreezableEqualityComparer<Word>.Default
)
{
// Honor the within-word parallelism cap rather than running at
// the default (effectively unbounded) scheduler degree.
MaxDegreeOfParallelism = morpher.MaxDegreeOfParallelism,
};
break;
}
}

private IRule<Word, ShapeNode> CompileAffixTemplate(AffixTemplate template, Morpher morpher)
private IRule<Word, int> CompileAffixTemplate(AffixTemplate template, Morpher morpher)
{
try
{
Expand All @@ -75,7 +82,7 @@ private IRule<Word, ShapeNode> CompileAffixTemplate(AffixTemplate template, Morp
}
}

private IRule<Word, ShapeNode> CompileMorphologicalRule(IMorphologicalRule mrule, Morpher morpher)
private IRule<Word, int> CompileMorphologicalRule(IMorphologicalRule mrule, Morpher morpher)
{
try
{
Expand All @@ -87,7 +94,7 @@ private IRule<Word, ShapeNode> CompileMorphologicalRule(IMorphologicalRule mrule
}
}

private IRule<Word, ShapeNode> CompilePhonologicalRule(IPhonologicalRule prule, Morpher morpher)
private IRule<Word, int> CompilePhonologicalRule(IPhonologicalRule prule, Morpher morpher)
{
try
{
Expand Down Expand Up @@ -149,7 +156,7 @@ public IEnumerable<Word> Apply(Word input)

private IEnumerable<Word> ApplyMorphologicalRules(Word input)
{
foreach (Word mruleOutWord in _mrulesRule.Apply(input).Distinct(FreezableEqualityComparer<Word>.Default))
foreach (Word mruleOutWord in _mrulesRule.Apply(input))
{
switch (_stratum.MorphologicalRuleOrder)
{
Expand All @@ -168,7 +175,7 @@ private IEnumerable<Word> ApplyMorphologicalRules(Word input)

private IEnumerable<Word> ApplyTemplates(Word input)
{
foreach (Word tempOutWord in _templatesRule.Apply(input).Distinct(FreezableEqualityComparer<Word>.Default))
foreach (Word tempOutWord in _templatesRule.Apply(input))
{
switch (_stratum.MorphologicalRuleOrder)
{
Expand Down
4 changes: 2 additions & 2 deletions src/SIL.Machine.Morphology.HermitCrab/HCRuleBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ protected HCRuleBase()

public string Name { get; set; }

public abstract IRule<Word, ShapeNode> CompileAnalysisRule(Morpher morpher);
public abstract IRule<Word, int> CompileAnalysisRule(Morpher morpher);

public abstract IRule<Word, ShapeNode> CompileSynthesisRule(Morpher morpher);
public abstract IRule<Word, int> CompileSynthesisRule(Morpher morpher);

public IDictionary Properties
{
Expand Down
Loading
Loading