Skip to content

Commit 702b471

Browse files
committed
Updated Old Czech tests.
1 parent e539295 commit 702b471

3 files changed

Lines changed: 30 additions & 5 deletions

File tree

udapi/block/ud/cs/addmwt.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,18 @@
5555
'shape': 'subtree',
5656
}
5757

58+
# Old Czech 'toliť' (special case with 3 subtokens; general -ť will be solved dynamically below).
59+
MWTS['toliť'] = {
60+
'form': 'to li ť',
61+
'lemma': 'ten li ť',
62+
'upos': 'DET SCONJ PART',
63+
'xpos': '* J,------------- TT-------------',
64+
'feats': '* _ _',
65+
'deprel': '* mark discourse',
66+
'main': 0,
67+
'shape': 'siblings'
68+
}
69+
5870

5971

6072
class AddMwt(udapi.block.ud.addmwt.AddMwt):

udapi/block/ud/cs/markfeatsbugs.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,15 @@ def process_node(self, node):
6666
'Animacy': ['Anim', 'Inan'],
6767
'Number': ['Sing', 'Dual', 'Plur'],
6868
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
69-
'NameType': ['Giv', 'Sur', 'Geo', 'Nat'],
69+
'NameType': ['Giv', 'Sur', 'Geo', 'Nat', 'Com', 'Pro', 'Oth'],
7070
'Foreign': ['Yes'],
7171
'Abbr': ['Yes']})
7272
else:
7373
self.check_allowed_features(node, {
7474
'Gender': ['Masc', 'Fem', 'Neut'],
7575
'Number': ['Sing', 'Dual', 'Plur'],
7676
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
77-
'NameType': ['Giv', 'Sur', 'Geo', 'Nat'],
77+
'NameType': ['Giv', 'Sur', 'Geo', 'Nat', 'Com', 'Pro', 'Oth'],
7878
'Foreign': ['Yes'],
7979
'Abbr': ['Yes']})
8080
# ADJECTIVES ###########################################################
@@ -385,7 +385,9 @@ def process_node(self, node):
385385
})
386386
# Relative possessive determiners 'jehož' and 'jejichž' behave similarly
387387
# to the personal possessive determiners but they do not have Person.
388-
elif re.match(r'^(jeho|jejich|j[ií]ch)ž(e|to)?$', node.form.lower()):
388+
# Normally determiners do not change j->n after prepositions but we
389+
# have an example in Old Czech (štěpové zlatí, na nichžto větviech...)
390+
elif re.match(r'^(jeho|jejich|[jn][ií]ch)ž(e|to)?$', node.form.lower()):
389391
self.check_required_features(node, ['PronType', 'Poss', 'Number[psor]'])
390392
self.check_allowed_features(node, {
391393
'PronType': ['Rel'],
@@ -549,13 +551,14 @@ def process_node(self, node):
549551
})
550552
else:
551553
if node.feats['NumType'] == 'Sets':
552-
# 'jedny', 'dvoje', 'troje', 'čtvery'
554+
# 'jedny', 'dvoje', 'oboje', 'troje', 'čtvery'
553555
# Number should perhaps be only Plur because the counted noun will be Plur.
554556
# Gender is not annotated in PDT but there are different forms ('jedni' vs. 'jedny',
555557
# and in Old Czech also 'dvoji' vs. 'dvoje'), so we should allow Gender (and Animacy).
556558
self.check_required_features(node, ['NumType', 'NumForm', 'Number', 'Case'])
557559
self.check_allowed_features(node, {
558560
'NumType': ['Sets'],
561+
'PronType': ['Tot'], # for 'oboje'
559562
'NumForm': ['Word'],
560563
'Gender': ['Masc', 'Fem', 'Neut'],
561564
'Animacy': ['Anim', 'Inan'],

udapi/block/ud/fixadvmodbyupos.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ def process_node(self, node):
6969
node.deprel = 'nmod'
7070
elif node.upos == 'ADJ':
7171
node.deprel = 'amod'
72+
elif node.upos == 'NUM':
73+
node.deprel = 'nummod'
7274
elif node.upos == 'ADV':
7375
node.deprel = 'advmod'
7476
elif node.upos == 'AUX':
@@ -77,13 +79,21 @@ def process_node(self, node):
7779
node.deprel = 'dep'
7880
elif node.upos == 'SCONJ':
7981
node.deprel = 'mark'
82+
elif node.upos == 'CCONJ':
83+
node.deprel = 'cc'
8084
elif node.upos == 'X':
8185
node.deprel = 'dep'
8286
elif node.udeprel == 'nummod':
83-
if node.upos == 'PRON':
87+
if node.upos == 'ADJ':
88+
node.deprel = 'amod'
89+
elif node.upos == 'PRON':
8490
node.deprel = 'nmod'
8591
elif node.upos == 'DET':
8692
node.deprel = 'det'
93+
elif node.upos == 'ADP':
94+
node.deprel = 'case'
8795
elif node.udeprel == 'punct':
8896
if node.upos != 'PUNCT':
8997
node.deprel = 'dep'
98+
elif node.udeprel == 'obl' and node.parent.upos in ['NOUN', 'PROPN', 'PRON'] and node.parent.udeprel in ['nsubj', 'obj', 'iobj', 'obl', 'vocative', 'dislocated', 'expl', 'nmod']:
99+
node.deprel = 'nmod'

0 commit comments

Comments
 (0)