-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcrk.hfscript
executable file
·61 lines (33 loc) · 2.85 KB
/
crk.hfscript
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/sh
# Script for creating a descriptive analyzer and a normative generator for crk
echo 'Concatenating LEXC files.' ;
cat $GTLANG_crk/src/morphology/root.lexc $GTLANG_crk/src/morphology/affixes///noun_affixes.lexc $GTLANG_crk/src/morphology/affixes///propernouns.lexc $GTLANG_crk/src/morphology/affixes///verb_affixes.lexc $GTLANG_crk/src/morphology/stems///abbreviations.lexc $GTLANG_crk/src/morphology/stems///crk-propernouns.lexc $GTLANG_crk/src/morphology/stems///noun_stems.lexc $GTLANG_crk/src/morphology/stems///numerals.lexc $GTLANG_crk/src/morphology/stems///particles.lexc $GTLANG_crk/src/morphology/stems///pronouns.lexc $GTLANG_crk/src/morphology/stems///verb_stems.lexc > crk.lexc
echo 'Compiling LEXC code.' ;
hfst-lexc crk.lexc -o crk-morph.hfst
# Commented out old TWOLC-specific code
# echo 'Compiling TWOLC code.' ;
# hfst-twolc -i $GTLANG_crk/src/phonology/crk-phon.twolc -o crk-phon.hfst
# echo 'Composing and intersecting LEXC and TWOL transducers.' ;
# hfst-compose-intersect crk-morph.hfst crk-phon.hfst | hfst-minimize - -o crk-gen-norm.hfst
echo 'Compiling phonology in XFSCRIPT code.' ;
hfst-xfst -e "source $GTLANG_crk/src/src/fst/phonology.xfscript" -E "save stack crk-phon.hfst"
echo 'Composing together the morphological (LEXC) and phonological (XFSCRIPT) transducers."
hfst-compose -F -1 crk-morph.hfst -2 crk-phon.hfst -o crk-gen-norn.hfst
echo 'Inverting normative generator transducer into a normative analyser transducer.' ;
hfst-invert crk-gen-norm.hfst -o crk-anl-norm.hfst
echo 'Compiling regular expression implementing spelling-relaxation.' ;
hfst-regexp2fst -S -i $GTLANG_crk/src/orthography/spellrelax.regex | hfst-invert -o crk-orth.hfst
echo 'Composing spelling relaxation transducer with normative analyser transducer to create descriptive analyser.'
hfst-compose -F -1 crk-orth.hfst -2 crk-anl-norm.hfst | hfst-minimize - -o crk-anl-desc.hfst
# hfst-invert crk-desc-anl.hfst -o crk-desc-gen.hfst
echo 'Compiling filter to remove non-normative forms with +Err/Orth tag.' ;
echo '~[ $[ "+Err/Orth" ] ] ;' | hfst-regexp2fst -S -o crk-err-filter.hfst
echo 'Removing lexicalised non-normative forms from normative analyzer and generator.' ;
hfst-compose -F -1 crk-err-filter.hfst -2 crk-gen-norm.hfst | hfst-invert -o crk-anl-norm.hfst
hfst-invert -i crk-anl-norm.hfst -o crk-gen-norm.hfst
echo 'Creating optimized-lookup versions of selected transducers.' ;
hfst-fst2fst -O -i crk-anl-desc.hfst -o crk-anl-desc.hfstol
hfst-fst2fst -O -i crk-anl-norm.hfst -o crk-anl-norm.hfstol
hfst-fst2fst -O -i crk-gen-norm.hfst -o crk-gen-norm.hfstol
# Creating an orthographically omnivorous (descriptive) analyser
# hfst-compose -1 $GTLANG_crk/src/orthography/Cans-to-Latn.compose.hfst -2 $GTLANG_crk/src/orthography/macron-to-circumfl.compose.hfst -o - | hfst-compose -F -1 - -2 crk-anl-desc.hfst | hfst-minimize | hfst-fst2fst -O - -o crk-anl-omni.hfstol