Skip to content

Commit 9b88f6a

Browse files
Add support for HTML handling
1 parent 0915a44 commit 9b88f6a

5 files changed

Lines changed: 39 additions & 9 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99
### Added
10+
* Add support for HTML tag handling in `translate_text()`.
1011
### Changed
1112
### Deprecated
1213
### Removed

deepl/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ def add_common_arguments(subparser: argparse.ArgumentParser):
289289
tag_handling_group.add_argument(
290290
"--tag-handling",
291291
type=str,
292-
choices=["xml"],
292+
choices=["xml", "html"],
293293
default=None,
294294
help="activate processing of formatting tags, for example 'xml'",
295295
)

deepl/translator.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -639,17 +639,19 @@ def translate_text(
639639
:param glossary: (Optional) glossary or glossary ID to use for
640640
translation. Must match specified source_lang and target_lang.
641641
:param tag_handling: (Optional) Type of tags to parse before
642-
translation, only "xml" is currently available.
642+
translation, only "xml" and "html" are currently available.
643643
:param outline_detection: (Optional) Set to False to disable automatic
644644
tag detection.
645-
:param non_splitting_tags: (Optional) Tags that should not split a
645+
:param non_splitting_tags: (Optional) XML tags that should not split a
646646
sentence.
647-
:type non_splitting_tags: List of tags or comma-separated-list of tags.
648-
:param splitting_tags: (Optional) Tags that should split a sentence.
649-
:type splitting_tags: List of tags or comma-separated-list of tags.
650-
:param ignore_tags: (Optional) Tags containing text that should not be
651-
translated.
652-
:type ignore_tags: List of tags or comma-separated-list of tags.
647+
:type non_splitting_tags: List of XML tags or comma-separated-list of
648+
tags.
649+
:param splitting_tags: (Optional) XML tags that should split a
650+
sentence.
651+
:type splitting_tags: List of XML tags or comma-separated-list of tags.
652+
:param ignore_tags: (Optional) XML tags containing text that should not
653+
be translated.
654+
:type ignore_tags: List of XML tags or comma-separated-list of tags.
653655
:return: List of TextResult objects containing results, unless input
654656
text was one string, then a single TextResult object is returned.
655657
"""

tests/test_cli.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,14 @@ def test_text_tags(runner):
147147
), f"output:\n{result.output}"
148148

149149

150+
def test_text_html_tag_handling(runner):
151+
result = runner.invoke(
152+
main_function,
153+
'-vv text --to DE --tag-handling html "<html><p>Test</p></html>"',
154+
)
155+
assert result.exit_code == 0, f"exit: {result.exit_code}\n {result.output}"
156+
157+
150158
def test_document(runner, tmpdir):
151159
tmpdir = pathlib.Path(tmpdir)
152160
output_dir = tmpdir / "output"

tests/test_translate_text.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,25 @@ def test_tag_handling_specify_tags(translator):
259259
assert re.compile("<title>.*Der Titel.*</title>").search(result.text)
260260

261261

262+
@needs_real_server
263+
def test_tag_handling_html(translator):
264+
text = """
265+
<!DOCTYPE html>
266+
<html>
267+
<body>
268+
<h1>My First Heading</h1>
269+
<p translate="no">My first paragraph.</p>
270+
</body>
271+
</html>
272+
"""
273+
274+
result = translator.translate_text(
275+
text, target_lang="DE", tag_handling="html"
276+
)
277+
assert "<h1>Meine erste Überschrift</h1>" in result.text
278+
assert '<p translate="no">My first paragraph.</p>' in result.text
279+
280+
262281
def test_invalid_url(server):
263282
translator = deepl.Translator(
264283
server.auth_key, server_url="https://example.com"

0 commit comments

Comments
 (0)