Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion machine/corpora/n_parallel_text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def _create_rows(
content_type = row.content_type
text_id = text_id or row.text_id
if self.corpora[i].is_scripture:
refs[i] = self._correct_versification([row.ref] if row.ref is None else default_refs, i)
refs[i] = self._correct_versification([row.ref] if row.ref is not None else default_refs, i)
else:
refs[i] = default_refs
flags[i] = row.flags
Expand Down
47 changes: 44 additions & 3 deletions machine/scripture/verse_ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ def from_string(cls, verse_str: str, versification: Optional[Versification] = No
return VerseRef(b_cv[0], c_v[0], c_v[1], versification)

@classmethod
def try_from_string(cls, verse_str: str) -> Optional[VerseRef]:
def try_from_string(cls, verse_str: str, versification: Optional[Versification] = None) -> Optional[VerseRef]:
try:
return cls.from_string(verse_str)
return cls.from_string(verse_str, versification)
except ValueError:
return None

Expand Down Expand Up @@ -402,6 +402,18 @@ def exact_equals(self, other: object) -> bool:
and self.versification == other.versification
)

def remove_segments(self) -> VerseRef:
if not self.segment():
return self.copy()
vr = VerseRef.try_from_string(
f"{self.book} {self.chapter_num}:{','.join([str(v.verse_num) for v in self.all_verses()])}",
self.versification,
)
if vr is None:
vr = self.copy()
vr.simplify()
return vr

def __eq__(self, other):
if not isinstance(other, VerseRef):
return NotImplemented
Expand Down Expand Up @@ -816,9 +828,11 @@ def first_included_verse(self, book_num: int, chapter_num: int) -> Optional[Vers
def is_excluded(self, bbbcccvvv: int) -> bool:
return bbbcccvvv in self.excluded_verses

def change_versification(self, vref: VerseRef) -> bool:
def change_versification(self, vref: VerseRef, ignore_segments: bool = False) -> bool:
if vref.has_multiple:
return self._change_versification_with_ranges(vref)
if vref.segment() and not ignore_segments:
return self._change_versification_with_segments(vref)

if vref.versification == NULL_VERSIFICATION:
vref.versification = self
Expand Down Expand Up @@ -904,6 +918,33 @@ def _change_versification_with_ranges(self, vref: VerseRef) -> bool:

return all_same_chapter

def _change_versification_with_segments(self, orig_vref: VerseRef) -> bool:
vref = orig_vref.copy()
all_in_one_chapter = self.change_versification(vref, ignore_segments=True)
if not vref.segment():
orig_vref.copy_from(vref)
return all_in_one_chapter

vref_without_segments = orig_vref.remove_segments()
all_in_one_chapter = self.change_versification(vref_without_segments, ignore_segments=True)
if vref_without_segments != vref.remove_segments():
verses = [
v_with_correct_number.verse + v_with_segments.segment()
for (v_with_segments, v_with_correct_number) in zip(
orig_vref.all_verses(), vref_without_segments.all_verses()
)
]

combined_vr = VerseRef.try_from_string(
f"{vref_without_segments.book} {vref_without_segments.chapter_num}:{','.join(verses)}", self
)
if combined_vr is not None:
orig_vref.copy_from(combined_vr)
return all_in_one_chapter

orig_vref.copy_from(vref)
return all_in_one_chapter


class VerseMappings:
def __init__(self) -> None:
Expand Down
108 changes: 108 additions & 0 deletions tests/corpora/test_parallel_text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -989,6 +989,114 @@ def test_get_rows_verse_ref_out_of_order() -> None:
assert rows[3].target_segment == "target chapter one, verse four . target chapter one, verse five .".split()


def test_get_rows_different_versifications_with_verse_segments():
source_corpus = DictionaryTextCorpus(
MemoryText(
"NUM",
[
text_row(
"NUM",
ScriptureRef.parse("NUM 17:1a", ORIGINAL_VERSIFICATION),
"source chapter seventeen, verse one a .",
),
text_row(
"NUM",
ScriptureRef.parse("NUM 17:1b", ORIGINAL_VERSIFICATION),
"source chapter seventeen, verse one b .",
),
text_row(
"NUM",
ScriptureRef.parse("NUM 17:2", ORIGINAL_VERSIFICATION),
"source chapter seventeen, verse two .",
),
text_row(
"NUM",
ScriptureRef.parse("NUM 17:3", ORIGINAL_VERSIFICATION),
"source chapter seventeen, verse three .",
),
text_row(
"NUM",
ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION),
"source chapter seventeen, verse four .",
),
],
)
)
source_corpus.versification = ORIGINAL_VERSIFICATION

target_corpus = DictionaryTextCorpus(
MemoryText(
"NUM",
[
text_row(
"NUM",
ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION),
"target chapter sixteen, verse thirty six .",
),
text_row(
"NUM",
ScriptureRef.parse("NUM 16:37", ENGLISH_VERSIFICATION),
"target chapter sixteen, verse thirty seven .",
),
text_row(
"NUM",
ScriptureRef.parse("NUM 16:38", ENGLISH_VERSIFICATION),
"target chapter sixteen, verse thirty eight .",
),
text_row(
"NUM",
ScriptureRef.parse("NUM 16:39a", ENGLISH_VERSIFICATION),
"target chapter sixteen, verse thirty nine a .",
),
text_row(
"NUM",
ScriptureRef.parse("NUM 16:39b", ENGLISH_VERSIFICATION),
"target chapter sixteen, verse thirty nine b .",
),
],
)
)
target_corpus.versification = ENGLISH_VERSIFICATION

# English vs. Original
# NUM 16:36-50 = NUM 17:1-15
# NUM 17:1-13 = NUM 17:16-28
parallel_corpus = StandardParallelTextCorpus(source_corpus, target_corpus)
rows = list(parallel_corpus.get_rows())

assert len(rows) == 6

assert rows[0].source_refs == [ScriptureRef.parse("NUM 17:1a", ORIGINAL_VERSIFICATION)]
assert rows[0].target_refs == [ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION)]
assert rows[0].source_segment == "source chapter seventeen, verse one a .".split()
assert rows[0].target_segment == "target chapter sixteen, verse thirty six .".split()

assert rows[1].source_refs == [ScriptureRef.parse("NUM 17:1b", ORIGINAL_VERSIFICATION)]
assert rows[1].target_refs == [ScriptureRef.parse("NUM 16:36", ENGLISH_VERSIFICATION)]
assert rows[1].source_segment == "source chapter seventeen, verse one b .".split()
assert rows[1].target_segment == "target chapter sixteen, verse thirty six .".split()

assert rows[2].source_refs == [ScriptureRef.parse("NUM 17:2", ORIGINAL_VERSIFICATION)]
assert rows[2].target_refs == [ScriptureRef.parse("NUM 16:37", ENGLISH_VERSIFICATION)]
assert rows[2].source_segment == "source chapter seventeen, verse two .".split()
assert rows[2].target_segment == "target chapter sixteen, verse thirty seven .".split()

assert rows[3].source_refs == [ScriptureRef.parse("NUM 17:3", ORIGINAL_VERSIFICATION)]
assert rows[3].target_refs == [ScriptureRef.parse("NUM 16:38", ENGLISH_VERSIFICATION)]
assert rows[3].source_segment == "source chapter seventeen, verse three .".split()
assert rows[3].target_segment == "target chapter sixteen, verse thirty eight .".split()

assert rows[4].source_refs == [ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION)]
assert rows[4].target_refs == [ScriptureRef.parse("NUM 16:39a", ENGLISH_VERSIFICATION)]
assert rows[4].source_segment == "source chapter seventeen, verse four .".split()
assert rows[4].target_segment == "target chapter sixteen, verse thirty nine a .".split()

assert rows[5].source_refs == [ScriptureRef.parse("NUM 17:4", ORIGINAL_VERSIFICATION)]
assert rows[5].target_refs == [ScriptureRef.parse("NUM 16:39b", ENGLISH_VERSIFICATION)]
assert rows[5].source_segment == "source chapter seventeen, verse four .".split()
assert rows[5].target_segment == "target chapter sixteen, verse thirty nine b .".split()


def test_to_pandas() -> None:
source_corpus = DictionaryTextCorpus(
MemoryText(
Expand Down
51 changes: 51 additions & 0 deletions tests/scripture/test_verse_ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,3 +757,54 @@ def test_unbridge() -> None:
assert VerseRef.from_string("EXO 6:9a,9b").unbridge() == VerseRef.from_string("EXO 6:9a")
assert VerseRef.from_string("EXO 6:4-10").unbridge() == VerseRef.from_string("EXO 6:4")
assert VerseRef.from_string("EXO 6:150monkeys").unbridge() == VerseRef.from_string("EXO 6:150monkeys")


def test_remove_segments() -> None:
assert VerseRef.from_string("MAT 1:1").remove_segments() == VerseRef.from_string("MAT 1:1")
assert VerseRef.from_string("MAT 1:1a").remove_segments() == VerseRef.from_string("MAT 1:1")
assert VerseRef.from_string("MAT 1:1a-2b,5a").remove_segments() == VerseRef.from_string("MAT 1:1,2,5")
assert VerseRef.from_string("MAT 1:1a-3b").remove_segments() == VerseRef.from_string("MAT 1:1,2,3")


def test_change_versification_with_segments() -> None:

# English vs. Original
# NUM 16:36-50 = NUM 17:1-15
# NUM 17:1-13 = NUM 17:16-28
# ESG 1:1 = ESG 1:1a
# ESG 1:2 = ESG 1:1b

verse_ref = VerseRef.from_string("NUM 17:1", ENGLISH_VERSIFICATION)
verse_ref.change_versification(ORIGINAL_VERSIFICATION)
assert verse_ref.versification == ORIGINAL_VERSIFICATION
assert str(verse_ref) == "NUM 17:16"

verse_ref = VerseRef.from_string("NUM 17:1a", ENGLISH_VERSIFICATION)
verse_ref.change_versification(ORIGINAL_VERSIFICATION)
assert verse_ref.versification == ORIGINAL_VERSIFICATION
assert str(verse_ref) == "NUM 17:16a"

verse_ref = VerseRef.from_string("NUM 17:1a-2b,5a", ENGLISH_VERSIFICATION)
verse_ref.change_versification(ORIGINAL_VERSIFICATION)
assert verse_ref.versification == ORIGINAL_VERSIFICATION
assert str(verse_ref) == "NUM 17:16a-17b,20a"

verse_ref = VerseRef.from_string("NUM 17:13a-15a", ORIGINAL_VERSIFICATION)
verse_ref.change_versification(ENGLISH_VERSIFICATION)
assert verse_ref.versification == ENGLISH_VERSIFICATION
assert str(verse_ref) == "NUM 16:48a-50a"

verse_ref = VerseRef.from_string("NUM 17:1a", ENGLISH_VERSIFICATION)
verse_ref.change_versification(ENGLISH_VERSIFICATION)
assert verse_ref.versification == ENGLISH_VERSIFICATION
assert str(verse_ref) == "NUM 17:1a"

verse_ref = VerseRef.from_string("ESG 1:1b", ORIGINAL_VERSIFICATION)
verse_ref.change_versification(ENGLISH_VERSIFICATION)
assert verse_ref.versification == ENGLISH_VERSIFICATION
assert str(verse_ref) == "ESG 1:2"

verse_ref = VerseRef.from_string("ESG 1:2", ENGLISH_VERSIFICATION)
verse_ref.change_versification(ORIGINAL_VERSIFICATION)
assert verse_ref.versification == ORIGINAL_VERSIFICATION
assert str(verse_ref) == "ESG 1:1b"
Loading