Skip to content

Commit b6533a0

Browse files
committed
Old Czech tests.
1 parent 702b471 commit b6533a0

3 files changed

Lines changed: 32 additions & 14 deletions

File tree

udapi/block/ud/cs/addmwt.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
'abychme': {'form': 'aby bychme', 'feats': '_ Aspect=Imp|Mood=Cnd|Number=Plur|Person=1|VerbForm=Fin'},
1818
'kdybychme': {'form': 'když bychme', 'feats': '_ Aspect=Imp|Mood=Cnd|Number=Plur|Person=1|VerbForm=Fin'},
1919
'abyste': {'form': 'aby byste', 'feats': '_ Aspect=Imp|Mood=Cnd|Number=Plur|Person=2|VerbForm=Fin'},
20+
'abyšte': {'form': 'aby byšte', 'feats': '_ Aspect=Imp|Mood=Cnd|Number=Plur|Person=2|VerbForm=Fin'},
2021
'kdybyste': {'form': 'když byste', 'feats': '_ Aspect=Imp|Mood=Cnd|Number=Plur|Person=2|VerbForm=Fin'},
22+
'kdybyšte': {'form': 'když byšte', 'feats': '_ Aspect=Imp|Mood=Cnd|Number=Plur|Person=2|VerbForm=Fin'},
2123
# Old Czech 'abyšta' == dual number; 2nd or 3rd person, the one example in data so far is 3rd.
2224
'abyšta': {'form': 'aby byšta', 'feats': '_ Aspect=Imp|Mood=Cnd|Number=Dual|Person=3|VerbForm=Fin'},
2325
'kdybyšta': {'form': 'když byšta', 'feats': '_ Aspect=Imp|Mood=Cnd|Number=Dual|Person=3|VerbForm=Fin'},
@@ -150,7 +152,7 @@ def multiword_analysis(self, node):
150152
# could be masculine or neuter. We pick Gender=Masc and Animacy=Anim
151153
# by default, unless the original token was annotated as Animacy=Inan
152154
# or Gender=Neut.
153-
m = re.match(r"^(na|nade|o|pro|přěde|ski?rz[eě]|za)[nň](ž?)$", node.form.lower())
155+
m = re.match(r"^(na|nade|o|po|pro|přěde|ski?rz[eě]|za)[nň](ž?)$", node.form.lower())
154156
if m:
155157
node.misc['AddMwt'] = ''
156158
# Remove vocalization from 'přěde' (přěd něj) but keep it in 'skrze'

udapi/block/ud/cs/markfeatsbugs.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ def process_node(self, node):
9191
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
9292
'NameType': ['Giv', 'Sur', 'Nat'], # for possessive adjectives derived from personal names
9393
'Emph': ['Yes'],
94-
'Foreign': ['Yes']})
94+
'Foreign': ['Yes'],
95+
'Abbr': ['Yes']})
9596
else:
9697
self.check_required_features(node, ['Poss', 'Gender[psor]', 'Gender', 'Number', 'Case'])
9798
self.check_allowed_features(node, {
@@ -102,7 +103,8 @@ def process_node(self, node):
102103
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
103104
'NameType': ['Giv', 'Sur', 'Nat'], # for possessive adjectives derived from personal names
104105
'Emph': ['Yes'],
105-
'Foreign': ['Yes']})
106+
'Foreign': ['Yes'],
107+
'Abbr': ['Yes']})
106108
elif node.feats['NumType'] == 'Ord' or node.feats['NumType'] == 'Mult': # ordinal numerals are a subtype of adjectives; same for some multiplicative numerals (dvojí, trojí)
107109
if node.feats['Gender'] == 'Masc':
108110
self.check_required_features(node, ['NumType', 'Gender', 'Animacy', 'Number', 'Case'])
@@ -114,7 +116,8 @@ def process_node(self, node):
114116
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
115117
'Variant': ['Short'], # sedmer (Mult Short) duch tvój; pól čtverta (Ord Short) komára
116118
'Emph': ['Yes'],
117-
'Foreign': ['Yes']})
119+
'Foreign': ['Yes'],
120+
'Abbr': ['Yes']})
118121
else:
119122
self.check_required_features(node, ['NumType', 'Gender', 'Number', 'Case'])
120123
self.check_allowed_features(node, {
@@ -124,7 +127,8 @@ def process_node(self, node):
124127
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
125128
'Variant': ['Short'],
126129
'Emph': ['Yes'],
127-
'Foreign': ['Yes']})
130+
'Foreign': ['Yes'],
131+
'Abbr': ['Yes']})
128132
elif node.feats['VerbForm'] == 'Part': # participles (except l-participles) are a subtype of adjectives
129133
self.check_required_features(node, ['VerbForm', 'Voice'])
130134
if node.feats['Voice'] == 'Act': # active participles have tense, passives don't but they have degree
@@ -143,7 +147,8 @@ def process_node(self, node):
143147
'Polarity': ['Pos', 'Neg'],
144148
'Variant': ['Short'],
145149
'Emph': ['Yes'],
146-
'Foreign': ['Yes']})
150+
'Foreign': ['Yes'],
151+
'Abbr': ['Yes']})
147152
else:
148153
# Aspect is not required in general because it is omitted for participles of biaspectual verbs (e.g. 'analyzující').
149154
self.check_required_features(node, ['VerbForm', 'Voice', 'Tense', 'Gender', 'Number', 'Case', 'Polarity'])
@@ -158,7 +163,8 @@ def process_node(self, node):
158163
'Polarity': ['Pos', 'Neg'],
159164
'Variant': ['Short'],
160165
'Emph': ['Yes'],
161-
'Foreign': ['Yes']})
166+
'Foreign': ['Yes'],
167+
'Abbr': ['Yes']})
162168
else:
163169
if node.feats['Gender'] == 'Masc':
164170
# Aspect is not required in general because it is omitted for participles of biaspectual verbs (e.g. 'analyzovaný').
@@ -175,7 +181,8 @@ def process_node(self, node):
175181
'Degree': ['Pos', 'Cmp', 'Sup'],
176182
'Variant': ['Short'],
177183
'Emph': ['Yes'],
178-
'Foreign': ['Yes']})
184+
'Foreign': ['Yes'],
185+
'Abbr': ['Yes']})
179186
else:
180187
# Aspect is not required in general because it is omitted for participles of biaspectual verbs (e.g. 'analyzovaný').
181188
self.check_required_features(node, ['VerbForm', 'Voice', 'Gender', 'Number', 'Case', 'Polarity', 'Degree'])
@@ -190,7 +197,8 @@ def process_node(self, node):
190197
'Degree': ['Pos', 'Cmp', 'Sup'],
191198
'Variant': ['Short'],
192199
'Emph': ['Yes'],
193-
'Foreign': ['Yes']})
200+
'Foreign': ['Yes'],
201+
'Abbr': ['Yes']})
194202
else: # regular adjectives, including short forms
195203
if node.feats['Gender'] == 'Masc':
196204
self.check_required_features(node, ['Gender', 'Animacy', 'Number', 'Case', 'Degree', 'Polarity'])
@@ -203,7 +211,8 @@ def process_node(self, node):
203211
'Polarity': ['Pos', 'Neg'],
204212
'Variant': ['Short'],
205213
'Emph': ['Yes'],
206-
'Foreign': ['Yes']})
214+
'Foreign': ['Yes'],
215+
'Abbr': ['Yes']})
207216
else:
208217
self.check_required_features(node, ['Gender', 'Number', 'Case', 'Degree', 'Polarity'])
209218
self.check_allowed_features(node, {
@@ -214,7 +223,8 @@ def process_node(self, node):
214223
'Polarity': ['Pos', 'Neg'],
215224
'Variant': ['Short'],
216225
'Emph': ['Yes'],
217-
'Foreign': ['Yes']})
226+
'Foreign': ['Yes'],
227+
'Abbr': ['Yes']})
218228
# PRONOUNS #############################################################
219229
elif node.upos == 'PRON':
220230
self.check_required_features(node, ['PronType'])
@@ -845,12 +855,14 @@ def process_node(self, node):
845855
# SUBORDINATING CONJUNCTIONS ###########################################
846856
elif node.upos == 'SCONJ':
847857
self.check_allowed_features(node, {
848-
'Emph': ['Yes']
858+
'Emph': ['Yes'],
859+
'Abbr': ['Yes']
849860
})
850861
# COORDINATING CONJUNCTIONS ############################################
851862
elif node.upos == 'CCONJ':
852863
self.check_allowed_features(node, {
853-
'Emph': ['Yes']
864+
'Emph': ['Yes'],
865+
'Abbr': ['Yes']
854866
})
855867
# PARTICLES ############################################################
856868
elif node.upos == 'PART':

udapi/block/ud/fixadvmodbyupos.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ def process_node(self, node):
4444
if node.upos != 'AUX':
4545
node.deprel = 'dep'
4646
elif node.udeprel == 'case':
47-
if node.upos == 'DET':
47+
if node.upos == 'ADJ':
48+
node.deprel = 'amod'
49+
elif node.upos == 'DET':
4850
node.deprel = 'det'
4951
elif node.upos == 'PRON':
5052
node.deprel = 'nmod'
@@ -64,6 +66,8 @@ def process_node(self, node):
6466
node.deprel = 'det'
6567
elif node.upos == 'INTJ':
6668
node.deprel = 'discourse'
69+
elif node.upos == 'NOUN':
70+
node.deprel = 'dep'
6771
elif node.udeprel == 'det':
6872
if node.upos == 'NOUN':
6973
node.deprel = 'nmod'

0 commit comments

Comments
 (0)