From fdfe43c81b2a9f7e5ec6917bec52eda8772c6255 Mon Sep 17 00:00:00 2001 From: Benoit TELLIER Date: Fri, 16 May 2025 14:07:26 +0200 Subject: [PATCH 01/18] POC RFC-6532 Support for i8n emails --- .../src/test/resources/eml/cve-2024-23184.eml | 45 +++++++++++++++++++ .../mailets/AddDeliveredToHeaderTest.java | 25 +++++++++++ 2 files changed, 70 insertions(+) create mode 100644 mailbox/opensearch/src/test/resources/eml/cve-2024-23184.eml diff --git a/mailbox/opensearch/src/test/resources/eml/cve-2024-23184.eml b/mailbox/opensearch/src/test/resources/eml/cve-2024-23184.eml new file mode 100644 index 00000000000..30b57dd7430 --- /dev/null +++ b/mailbox/opensearch/src/test/resources/eml/cve-2024-23184.eml @@ -0,0 +1,45 @@ +MIME-Version: 1.0 +Subject: Test +From: Benoit TELLIER +To: Benoit TELLIER +Date: Tue, 13 Feb 2024 23:01:18 +0000 +Message-ID: +Content-Type: multipart/mixed; + boundary="-=Part.17f.732e3d28e1c76db4.18da4b40791.62ef5e3fa995057d=-" + +---=Part.17f.732e3d28e1c76db4.18da4b40791.62ef5e3fa995057d=- +Content-Type: multipart/alternative; + boundary="-=Part.17e.48ac92d73c356567.18da4b40791.360a293e2f389efe=-" + +---=Part.17e.48ac92d73c356567.18da4b40791.360a293e2f389efe=- +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: quoted-printable + +Test + +---=Part.17e.48ac92d73c356567.18da4b40791.360a293e2f389efe=- +Content-Type: text/html; charset=UTF-8 +Content-Transfer-Encoding: quoted-printable + +
Test

+ +---=Part.17e.48ac92d73c356567.18da4b40791.360a293e2f389efe=--- + +---=Part.17f.732e3d28e1c76db4.18da4b40791.62ef5e3fa995057d=- +Content-Type: application/json; name="=?US-ASCII?Q?id=5Frsa.txt?=" +Content-Disposition: attachment +Content-Transfer-Encoding: base64 + +c3NoLXJzYSBBQUFBQjNOemFDMXljMkVBQUFBREFRQUJBQUFDQVFDa0dXMkp5c2lKR2hQZXdBOXRr +bVFFQm5EVjRaQ0llLy92ZFoyV0RybnZiNlZLQzdpWldjODFpU1ZkTFcxUkRBTll4c3ExN0dQanpV +OFlWdk9sRkFJSk1WTm9ESWhuQWtYOU9VUUJpd1hpOHlHZ3FLNGR0RmIxczJBRzNrQmxNUFFJOE5K +MkpLT2Z5MW51VnJubEtoVDlCVnpYMm5iSjNOak9PZlkxQlJEaDZZcVl1a2RuejBUT2k1Rkp1YUJT +NDZQemx3eWdIa0dzeXBLVHM2Y2FUNjBRdjl3eWFadm4yenN1RmNML3o2Mmd3aGZyZGFsakF1UGRX +cERlNG1IRVFmMXA2SXNRMDdPb0lwTmRHQ0tLZHRZQlVTcktzTXRpMllLUGZpSzB2WGU1L3owRWJE +VlRja1BrY3NwQ2cwYVZuZTB2eFVsRGt2U2pwV2tiQkZ0YTk5ekJjOVlJL0ROK28vRmtONlFTdXV5 +U29tNDZkamZpUjdqSzNMRmJKUkhaem9BblNvaTZvRlR0MW1LWjNzam44bnZWUG1PV3pJWHY0Tm1O +R1ExZHFrV1hXcUtyQjlIZUZiQnRPWVAzaEkxQ0kvaVhNbVR1SkdvcHVTUmlTNW1QZXlSQWV6VGtk +UG8vZ2NSVWNzbklhVW1EallUWHBFNzU3Yk5LWVNHbFJsS3FrbEhKc2JveEdTK0NaVzBJS2dZeTdG +cmZRZ1FGMTdvaUpWM1JJQ1VHcU9rM1I2VnZOYlhlL2VmZS9IT24xd0lZUS9qVGRzY0hCamRIM2FF +MmY4Y3dVS1IzNUtWNlJ1SE4vYVpiekxiVkJxUEMvUTcwd3NMQlloV29Da1dRMElUUmxGV2N3bnN3 +VTE5NnlGWkVHSmthOUNEaHZQdUVBV0NLWnFRT3gyMnRoYWVSQlE9PSBiZW53YUBob3Jpem9uCg== \ No newline at end of file diff --git a/server/mailet/integration-testing/src/test/java/org/apache/james/mailets/AddDeliveredToHeaderTest.java b/server/mailet/integration-testing/src/test/java/org/apache/james/mailets/AddDeliveredToHeaderTest.java index d5bb6c788aa..81b1982fce9 100644 --- a/server/mailet/integration-testing/src/test/java/org/apache/james/mailets/AddDeliveredToHeaderTest.java +++ b/server/mailet/integration-testing/src/test/java/org/apache/james/mailets/AddDeliveredToHeaderTest.java @@ -43,6 +43,7 @@ import org.junit.jupiter.api.io.TempDir; class AddDeliveredToHeaderTest { + public static final String RECIPIENT2 = "rené@" + DEFAULT_DOMAIN; @RegisterExtension public TestIMAPClient testIMAPClient = new TestIMAPClient(); @RegisterExtension @@ -58,6 +59,7 @@ void setup(@TempDir File temporaryFolder) throws Exception { DataProbe dataProbe = jamesServer.getProbe(DataProbeImpl.class); dataProbe.addDomain(DEFAULT_DOMAIN); dataProbe.addUser(RECIPIENT, PASSWORD); + dataProbe.addUser(RECIPIENT2, PASSWORD); dataProbe.addUser(FROM, PASSWORD); } @@ -79,4 +81,27 @@ void receivedMessagesShouldContainDeliveredToHeaders() throws Exception { assertThat(testIMAPClient.readFirstMessageHeaders()) .contains(AddDeliveredToHeader.DELIVERED_TO + ": " + RECIPIENT); } + + @Test + void receivedMessagesShouldContainDeliveredToHeadersI8N() throws Exception { + String message = "FROM: " + RECIPIENT2 + "\r\n" + + "subject: testé\r\n" + + "Content-Type: text/plain; charset=UTF-8\r\n" + + "Content-Encoding: 8bit\r\n" + + "\r\n" + + "contenté\r\n" + + ".\r\n"; + messageSender.connect(LOCALHOST_IP, jamesServer.getProbe(SmtpGuiceProbe.class).getSmtpPort()) + .authenticate(FROM, PASSWORD) + .sendMessageWithHeaders(FROM, RECIPIENT2, message); + + testIMAPClient.connect(LOCALHOST_IP, jamesServer.getProbe(ImapGuiceProbe.class).getImapPort()) + .login(RECIPIENT2, PASSWORD) + .select(TestIMAPClient.INBOX) + .awaitMessage(awaitAtMostOneMinute); + assertThat(testIMAPClient.readFirstMessageHeaders()) + .contains("René") + .contains("testé") + .contains("contenté"); + } } From e8a400009a4ec30f7a8537f33eb6111d4d13f6e1 Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Thu, 22 May 2025 16:13:31 +0200 Subject: [PATCH 02/18] Add support for unicode adddresses as defined in RFC6532. --- .../java/org/apache/james/core/Domain.java | 12 ++- .../org/apache/james/core/MailAddress.java | 28 ++++++- .../org/apache/james/core/DomainTest.java | 81 +++++++++++++++++++ .../apache/james/core/MailAddressTest.java | 34 +++++--- 4 files changed, 140 insertions(+), 15 deletions(-) create mode 100644 core/src/test/java/org/apache/james/core/DomainTest.java diff --git a/core/src/main/java/org/apache/james/core/Domain.java b/core/src/main/java/org/apache/james/core/Domain.java index 50ad81c4126..b4372e2b807 100644 --- a/core/src/main/java/org/apache/james/core/Domain.java +++ b/core/src/main/java/org/apache/james/core/Domain.java @@ -20,6 +20,7 @@ package org.apache.james.core; import java.io.Serializable; +import java.net.IDN; import java.util.Locale; import java.util.Objects; @@ -54,9 +55,16 @@ public static Domain of(String domain) { Preconditions.checkArgument(domain.length() <= MAXIMUM_DOMAIN_LENGTH, "Domain name length should not exceed %s characters", MAXIMUM_DOMAIN_LENGTH); - String domainWithoutBrackets = removeBrackets(domain); + String domainWithoutBrackets = IDN.toASCII(removeBrackets(domain), IDN.ALLOW_UNASSIGNED); Preconditions.checkArgument(PART_CHAR_MATCHER.matchesAllOf(domainWithoutBrackets), - "Domain parts ASCII chars must be a-z A-Z 0-9 - or _ in %s", domain); + "Domain parts ASCII chars must be a-z A-Z 0-9 - or _ in %s", domain); + + if (domainWithoutBrackets.startsWith("xn--") || + domainWithoutBrackets.contains(".xn--")) { + domainWithoutBrackets = IDN.toUnicode(domainWithoutBrackets); + Preconditions.checkArgument(!domainWithoutBrackets.startsWith("xn--") && + !domainWithoutBrackets.contains(".xn--")); + } int pos = 0; int nextDot = domainWithoutBrackets.indexOf('.'); diff --git a/core/src/main/java/org/apache/james/core/MailAddress.java b/core/src/main/java/org/apache/james/core/MailAddress.java index 41e88b2b90c..3be44aa8426 100644 --- a/core/src/main/java/org/apache/james/core/MailAddress.java +++ b/core/src/main/java/org/apache/james/core/MailAddress.java @@ -19,6 +19,7 @@ package org.apache.james.core; +import java.net.IDN; import java.util.Locale; import java.util.Objects; import java.util.Optional; @@ -418,7 +419,7 @@ public Optional toInternetAddress() { try { return Optional.of(new InternetAddress(toString())); } catch (AddressException ae) { - LOGGER.warn("A valid address '{}' as per James criterial fails to parse as a jakarta.mail InternetAdrress", asString()); + LOGGER.warn("A valid address '{}' as per James criteria fails to parse as a jakarta.mail InternetAdrress", asString()); return Optional.empty(); } } @@ -549,15 +550,15 @@ private int parseUnquotedLocalPart(StringBuilder lpSB, String address, int pos) //End of local-part break; } else { - // ::= any one of the 128 ASCII characters, but not any - // or + // ::= any printable ASCII character, or any non-ASCII + // unicode codepoint, but not or // ::= "<" | ">" | "(" | ")" | "[" | "]" | "\" | "." // | "," | ";" | ":" | "@" """ | the control // characters (ASCII codes 0 through 31 inclusive and // 127) // ::= the space character (ASCII code 32) char c = address.charAt(pos); - if (c <= 31 || c >= 127 || c == ' ') { + if (c <= 31 || c == 127 || c == ' ') { throw new AddressException("Invalid character in local-part (user account) at position " + (pos + 1) + " in '" + address + "'", address, pos + 1); } @@ -688,6 +689,7 @@ private int parseDomain(StringBuilder dSB, String address, int pos) throws Addre // in practice though, we should relax this as domain names can start // with digits as well as letters. So only check that doesn't start // or end with hyphen. + boolean unicode = false; while (true) { if (pos >= address.length()) { break; @@ -700,6 +702,11 @@ private int parseDomain(StringBuilder dSB, String address, int pos) throws Addre resultSB.append(ch); pos++; continue; + } else if (ch >= 0x0080) { + resultSB.append(ch); + pos++; + unicode = true; + continue; } if (ch == '.') { break; @@ -707,6 +714,19 @@ private int parseDomain(StringBuilder dSB, String address, int pos) throws Addre throw new AddressException("Invalid character at " + pos + " in '" + address + "'", address, pos); } String result = resultSB.toString(); + if (unicode) { + try { + result = IDN.toASCII(result, IDN.ALLOW_UNASSIGNED); + } catch (IllegalArgumentException e) { + throw new AddressException("Domain invalid according to IDNA", address); + } + } + if (result.startsWith("xn--") || result.contains(".xn--")) { + result = IDN.toUnicode(result); + if (result.startsWith("xn--") || result.contains(".xn--")) { + throw new AddressException("Domain invalid according to IDNA", address); + } + } if (result.startsWith("-") || result.endsWith("-")) { throw new AddressException("Domain name cannot begin or end with a hyphen \"-\" at position " + (pos + 1) + " in '" + address + "'", address, pos + 1); diff --git a/core/src/test/java/org/apache/james/core/DomainTest.java b/core/src/test/java/org/apache/james/core/DomainTest.java new file mode 100644 index 00000000000..8a9e4a56214 --- /dev/null +++ b/core/src/test/java/org/apache/james/core/DomainTest.java @@ -0,0 +1,81 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.core; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.util.stream.Stream; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + + +class DomainTest { + @Test + void testPlainDomain() { + Domain d1 = Domain.of("example.com"); + assertThat(d1.name().equals(d1.asString())); + Domain d2 = Domain.of("Example.com"); + assertThat(d2.name()).isNotEqualTo(d2.asString()); + assertThat(d1.asString()).isEqualTo(d2.asString()); + } + + @Test + void testIPv4Domain() { + Domain d1 = Domain.of("192.0.4.1"); + assertThat(d1.asString()).isEqualTo("192.0.4.1"); + } + + @Test + void testPunycodeIDN() { + Domain d1 = Domain.of("xn--gr-zia.example"); + assertThat(d1.asString()).isEqualTo("grå.example"); + } + + @Test + void testDevanagariDomain() { + Domain d1 = Domain.of("डाटामेल.भारत"); + assertThat(d1.asString()).isEqualTo(d1.name()); + } + + private static Stream malformedDomains() { + return Stream.of( + "😊☺️.example", // emoji not permitted by IDNA + "#.example", // really and truly not permitted + "\uFEFF.example", // U+FEFF is the byte order mark + "\u200C.example", // U+200C is a zero-width non-joiner + "\u200Eibm.example" // U+200E is left-to-right + ) + .map(Arguments::of); + } + + @ParameterizedTest + @MethodSource("malformedDomains") + void testMalformedDomains(String malformed) { + assertThatThrownBy(() -> Domain.of(malformed)) + .as("rejecting malformed domain " + malformed) + .isInstanceOf(IllegalArgumentException.class); + } +} + + diff --git a/core/src/test/java/org/apache/james/core/MailAddressTest.java b/core/src/test/java/org/apache/james/core/MailAddressTest.java index 86e5adece4b..e9874c9800f 100644 --- a/core/src/test/java/org/apache/james/core/MailAddressTest.java +++ b/core/src/test/java/org/apache/james/core/MailAddressTest.java @@ -22,12 +22,15 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatCode; +import java.util.Properties; import java.util.stream.Stream; +import jakarta.mail.Session; import jakarta.mail.internet.AddressException; import jakarta.mail.internet.InternetAddress; import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; @@ -55,6 +58,13 @@ private static Stream goodAddresses() { "\\.server-dev@james.apache.org", "Abc@10.42.0.1", "Abc.123@example.com", + "Loïc.Accentué@voilà.fr8", + "pelé@exemple.com", + "δοκιμή@παράδειγμα.δοκιμή", + "我買@屋企.香港", + "二ノ宮@黒川.日本", + "медведь@с-балалайкой.рф", + //"संपर्क@डाटामेल.भारत", fails in Jakarta, reason still unknown "user+mailbox/department=shipping@example.com", "user+mailbox@example.com", "\"Abc@def\"@example.com", @@ -96,26 +106,30 @@ private static Stream badAddresses() { "server-dev@[127.0.1.1.1]", "server-dev@[127.0.1.-1]", "test@dom+ain.com", + "test@xn--.example", "\"a..b\"@domain.com", // jakarta.mail is unable to handle this so we better reject it "server-dev\\.@james.apache.org", // jakarta.mail is unable to handle this so we better reject it "a..b@domain.com", - // According to wikipedia these addresses are valid but as jakarta.mail is unable - // to work with them we shall rather reject them (note that this is not breaking retro-compatibility) - "Loïc.Accentué@voilà.fr8", - "pelé@exemple.com", - "δοκιμή@παράδειγμα.δοκιμή", - "我買@屋企.香港", - "二ノ宮@黒川.日本", - "медведь@с-балалайкой.рф", - "संपर्क@डाटामेल.भारत", + "sales@\u200Eibm.example", // U+200E is left-to-right + // According to wikipedia this address is valid but as jakarta.mail is unable + // to work with it we shall rather reject them (note that this is not breaking retro-compatibility) "mail.allow\\,d@james.apache.org") .map(Arguments::of); } + @BeforeEach + void setup() { + Properties props = new Properties(); + props.setProperty("mail.mime.allowutf8", "true"); + Session s = Session.getDefaultInstance(props); + assertThat(Boolean.parseBoolean(s.getProperties().getProperty("mail.mime.allowutf8", "false"))); + } + @ParameterizedTest @MethodSource("goodAddresses") void testGoodMailAddressString(String mailAddress) { assertThatCode(() -> new MailAddress(mailAddress)) + .as("parses " + mailAddress) .doesNotThrowAnyException(); } @@ -123,6 +137,7 @@ void testGoodMailAddressString(String mailAddress) { @MethodSource("goodAddresses") void toInternetAddressShouldNoop(String mailAddress) throws Exception { assertThat(new MailAddress(mailAddress).toInternetAddress()) + .as("tries to parse " + mailAddress + " using jakarta.mail") .isNotEmpty(); } @@ -130,6 +145,7 @@ void toInternetAddressShouldNoop(String mailAddress) throws Exception { @MethodSource("badAddresses") void testBadMailAddressString(String mailAddress) { Assertions.assertThatThrownBy(() -> new MailAddress(mailAddress)) + .as("fails to parse " + mailAddress) .isInstanceOf(AddressException.class); } From 221a95b913cc7ab5d856a4d6ce492903d636610b Mon Sep 17 00:00:00 2001 From: Benoit TELLIER Date: Fri, 23 May 2025 18:01:40 +0200 Subject: [PATCH 03/18] Use UTF-8 as the charset supported atop SMTP protocol --- .../org/apache/james/protocols/api/ProtocolSessionImpl.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/protocols/api/src/main/java/org/apache/james/protocols/api/ProtocolSessionImpl.java b/protocols/api/src/main/java/org/apache/james/protocols/api/ProtocolSessionImpl.java index ee711356b03..e0339369312 100644 --- a/protocols/api/src/main/java/org/apache/james/protocols/api/ProtocolSessionImpl.java +++ b/protocols/api/src/main/java/org/apache/james/protocols/api/ProtocolSessionImpl.java @@ -19,10 +19,9 @@ package org.apache.james.protocols.api; -import static java.nio.charset.StandardCharsets.US_ASCII; - import java.net.InetSocketAddress; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Map; import java.util.Optional; @@ -216,7 +215,7 @@ public Optional getAttachment(AttachmentKey key, State state) { */ @Override public Charset getCharset() { - return US_ASCII; + return StandardCharsets.UTF_8; } /** From dec4fc8fec865b986eadb5dadf4ea846d511c894 Mon Sep 17 00:00:00 2001 From: Benoit TELLIER Date: Wed, 18 Jun 2025 19:53:03 +0200 Subject: [PATCH 04/18] Upgrade mime4j to 0.8.13-SNAPSHOT --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 5509d511fdc..720f9ff0d4d 100644 --- a/pom.xml +++ b/pom.xml @@ -622,7 +622,7 @@ org.apache.james ${james.groupId}.protocols 6.1.6 - 0.8.12 + 0.8.13-SNAPSHOT 4.0.0 10.14.2.0 2.23.1 From 3f8a36153d7c2792e84d88b013e968699ed12621 Mon Sep 17 00:00:00 2001 From: Benoit TELLIER Date: Wed, 18 Jun 2025 19:54:13 +0200 Subject: [PATCH 05/18] UTF-8 support for james-server-core --- .../core/InternetHeadersInputStream.java | 2 +- .../apache/james/server/core/MailHeaders.java | 4 +-- .../server/core/MimeMessageWrapperTest.java | 31 +++++++++++++++++++ 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/server/container/core/src/main/java/org/apache/james/server/core/InternetHeadersInputStream.java b/server/container/core/src/main/java/org/apache/james/server/core/InternetHeadersInputStream.java index 290bc914ebb..7759e89f136 100644 --- a/server/container/core/src/main/java/org/apache/james/server/core/InternetHeadersInputStream.java +++ b/server/container/core/src/main/java/org/apache/james/server/core/InternetHeadersInputStream.java @@ -69,7 +69,7 @@ private boolean readNextLine() { if (!headerLines.hasMoreElements()) { line += LINE_SEPERATOR; } - currLine = line.getBytes(StandardCharsets.US_ASCII); + currLine = line.getBytes(StandardCharsets.UTF_8); return true; } else { return false; diff --git a/server/container/core/src/main/java/org/apache/james/server/core/MailHeaders.java b/server/container/core/src/main/java/org/apache/james/server/core/MailHeaders.java index 71d70e60dcb..be77e707eeb 100644 --- a/server/container/core/src/main/java/org/apache/james/server/core/MailHeaders.java +++ b/server/container/core/src/main/java/org/apache/james/server/core/MailHeaders.java @@ -39,8 +39,8 @@ * */ public class MailHeaders extends InternetHeaders implements Serializable, Cloneable { - private static final long serialVersionUID = 238748126601L; + private static final boolean ALLOWUTF_8 = true; private boolean modified = false; private long size = -1; @@ -67,7 +67,7 @@ public MailHeaders() { */ public MailHeaders(InputStream in) throws MessagingException { super(); - load(in); + load(in, ALLOWUTF_8); } /** diff --git a/server/container/core/src/test/java/org/apache/james/server/core/MimeMessageWrapperTest.java b/server/container/core/src/test/java/org/apache/james/server/core/MimeMessageWrapperTest.java index bc96f14fe90..23ed471c13e 100644 --- a/server/container/core/src/test/java/org/apache/james/server/core/MimeMessageWrapperTest.java +++ b/server/container/core/src/test/java/org/apache/james/server/core/MimeMessageWrapperTest.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.Enumeration; import java.util.Properties; @@ -87,6 +88,7 @@ public synchronized void loadMessage() throws MessagingException { TestableMimeMessageWrapper mw = null; TestableMimeMessageWrapper onlyHeader = null; final String content = "Subject: foo\r\nContent-Transfer-Encoding2: plain"; + final String contentUtf8 = "Subject: fée\r\nContent-Transfer-Encoding2: plain"; final String sep = "\r\n\r\n"; final String body = "bar\r\n"; @@ -276,6 +278,35 @@ public void testSize() throws MessagingException { assertThat(mw.getSize()).isEqualTo(body.length()); } + @Test + public void testSizeUtf8() throws Exception { + TestableMimeMessageWrapper message = getMessageFromSources(contentUtf8 + sep + body); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + message.writeTo(baos); + + assertThat(message.getMessageSize()) + .isEqualTo(baos.size()); + } + + @Test + public void testWriteToUtf8() throws Exception { + TestableMimeMessageWrapper message = getMessageFromSources(contentUtf8 + sep + body); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + message.writeTo(baos); + + assertThat(baos.toString(StandardCharsets.UTF_8)).isEqualTo(contentUtf8 + sep + body); + } + + @Test + public void testWriteToUtf8AfterHeaderModification() throws Exception { + TestableMimeMessageWrapper message = getMessageFromSources(contentUtf8 + sep + body); + message.addHeader("Another", "header"); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + message.writeTo(baos); + + assertThat(baos.toString(StandardCharsets.UTF_8)).contains("Subject: fée\r\n"); + } + @Test public void getSizeShouldReturnZeroWhenNoHeaderAndAddHeader() throws MessagingException { onlyHeader.addHeader("a", "b"); From 8a8c9934efdf4b4704f50ce7c1bcfb17cfb992b7 Mon Sep 17 00:00:00 2001 From: Benoit TELLIER Date: Wed, 18 Jun 2025 19:55:25 +0200 Subject: [PATCH 06/18] Integration tests for UTF-8 mail reception --- .../mailets/AddDeliveredToHeaderTest.java | 68 ++++++++++++++++--- 1 file changed, 60 insertions(+), 8 deletions(-) diff --git a/server/mailet/integration-testing/src/test/java/org/apache/james/mailets/AddDeliveredToHeaderTest.java b/server/mailet/integration-testing/src/test/java/org/apache/james/mailets/AddDeliveredToHeaderTest.java index 81b1982fce9..20f7db4c792 100644 --- a/server/mailet/integration-testing/src/test/java/org/apache/james/mailets/AddDeliveredToHeaderTest.java +++ b/server/mailet/integration-testing/src/test/java/org/apache/james/mailets/AddDeliveredToHeaderTest.java @@ -29,10 +29,21 @@ import java.io.File; +import org.apache.james.jmap.mailet.filter.JMAPFiltering; +import org.apache.james.mailets.configuration.MailetConfiguration; +import org.apache.james.mailets.configuration.ProcessorConfiguration; import org.apache.james.modules.protocols.ImapGuiceProbe; import org.apache.james.modules.protocols.SmtpGuiceProbe; import org.apache.james.probe.DataProbe; import org.apache.james.transport.mailets.AddDeliveredToHeader; +import org.apache.james.transport.mailets.LocalDelivery; +import org.apache.james.transport.mailets.RecipientRewriteTable; +import org.apache.james.transport.mailets.RemoteDelivery; +import org.apache.james.transport.mailets.ToProcessor; +import org.apache.james.transport.mailets.VacationMailet; +import org.apache.james.transport.matchers.All; +import org.apache.james.transport.matchers.RecipientIsLocal; +import org.apache.james.transport.matchers.SMTPAuthSuccessful; import org.apache.james.utils.DataProbeImpl; import org.apache.james.utils.SMTPMessageSender; import org.apache.james.utils.TestIMAPClient; @@ -43,7 +54,9 @@ import org.junit.jupiter.api.io.TempDir; class AddDeliveredToHeaderTest { - public static final String RECIPIENT2 = "rené@" + DEFAULT_DOMAIN; + private static final String POSTMASTER = "postmaster@" + DEFAULT_DOMAIN; + public static final String RECIPIENT2_UTF8 = "rené@" + DEFAULT_DOMAIN; + public static final String RECIPIENT2 = "rene@" + DEFAULT_DOMAIN; @RegisterExtension public TestIMAPClient testIMAPClient = new TestIMAPClient(); @RegisterExtension @@ -53,7 +66,11 @@ class AddDeliveredToHeaderTest { @BeforeEach void setup(@TempDir File temporaryFolder) throws Exception { - jamesServer = TemporaryJamesServer.builder().build(temporaryFolder); + jamesServer = TemporaryJamesServer.builder() + .withMailetContainer(TemporaryJamesServer.defaultMailetContainerConfiguration() + .postmaster(POSTMASTER) + .putProcessor(transport())) + .build(temporaryFolder); jamesServer.start(); DataProbe dataProbe = jamesServer.getProbe(DataProbeImpl.class); @@ -84,24 +101,59 @@ void receivedMessagesShouldContainDeliveredToHeaders() throws Exception { @Test void receivedMessagesShouldContainDeliveredToHeadersI8N() throws Exception { - String message = "FROM: " + RECIPIENT2 + "\r\n" + + jamesServer.getProbe(DataProbeImpl.class).addUserAliasMapping("rené", "james.org", RECIPIENT2); + String message = "FROM: " + RECIPIENT2_UTF8 + "\r\n" + "subject: testé\r\n" + "Content-Type: text/plain; charset=UTF-8\r\n" + "Content-Encoding: 8bit\r\n" + "\r\n" + - "contenté\r\n" + - ".\r\n"; + "contenté\r\n"; messageSender.connect(LOCALHOST_IP, jamesServer.getProbe(SmtpGuiceProbe.class).getSmtpPort()) .authenticate(FROM, PASSWORD) - .sendMessageWithHeaders(FROM, RECIPIENT2, message); + .sendMessageWithHeaders(FROM, RECIPIENT2_UTF8, message); + + Thread.sleep(1000); testIMAPClient.connect(LOCALHOST_IP, jamesServer.getProbe(ImapGuiceProbe.class).getImapPort()) .login(RECIPIENT2, PASSWORD) .select(TestIMAPClient.INBOX) .awaitMessage(awaitAtMostOneMinute); - assertThat(testIMAPClient.readFirstMessageHeaders()) - .contains("René") + + assertThat(testIMAPClient.readFirstMessage()) + .contains(RECIPIENT2_UTF8) .contains("testé") .contains("contenté"); } + + private ProcessorConfiguration.Builder transport() { + return ProcessorConfiguration.transport() + .enableJmx(false) + .addMailet(MailetConfiguration.builder() + .matcher(All.class) + .mailet(RecipientRewriteTable.class)) + .addMailet(MailetConfiguration.builder() + .matcher(RecipientIsLocal.class) + .mailet(VacationMailet.class)) + .addMailet(MailetConfiguration.builder() + .matcher(RecipientIsLocal.class) + .mailet(JMAPFiltering.class)) + .addMailet(MailetConfiguration.builder() + .matcher(RecipientIsLocal.class) + .mailet(LocalDelivery.class)) + .addMailet(MailetConfiguration.builder() + .matcher(SMTPAuthSuccessful.class) + .mailet(RemoteDelivery.class) + .addProperty("outgoingQueue", "outgoing") + .addProperty("delayTime", "5000, 100000, 500000") + .addProperty("maxRetries", "3") + .addProperty("maxDnsProblemRetries", "0") + .addProperty("deliveryThreads", "10") + .addProperty("sendpartial", "true") + .addProperty("bounceProcessor", "bounces")) + .addMailet(MailetConfiguration.BCC_STRIPPER) + .addMailet(MailetConfiguration.builder() + .matcher(All.class) + .mailet(ToProcessor.class) + .addProperty("processor", "error")); + } } From d3feb044c0fa7df52ee1058b60f0d88c60bc1008 Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Wed, 22 Apr 2026 20:08:45 +0200 Subject: [PATCH 07/18] Update dependencies to use the mime4j 0.8.13 release. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 720f9ff0d4d..cf4055c56ca 100644 --- a/pom.xml +++ b/pom.xml @@ -622,7 +622,7 @@ org.apache.james ${james.groupId}.protocols 6.1.6 - 0.8.13-SNAPSHOT + 0.8.13 4.0.0 10.14.2.0 2.23.1 From 9fdc613fec44fe06699480a366d8c4a5a57e64cd Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Wed, 22 Apr 2026 20:16:54 +0200 Subject: [PATCH 08/18] Provide ENABLE UTF8=ACCEPT (doing nothing so far). --- .../apache/james/imap/api/ImapConstants.java | 4 +- .../imap/processor/CapabilityProcessor.java | 19 +++++++- .../processor/CapabilityProcessorTest.java | 14 ++++++ .../imapserver/netty/IMAPServerTest.java | 44 +++++++++++++++++++ 4 files changed, 79 insertions(+), 2 deletions(-) diff --git a/protocols/imap/src/main/java/org/apache/james/imap/api/ImapConstants.java b/protocols/imap/src/main/java/org/apache/james/imap/api/ImapConstants.java index 5619cf290c1..23f9f0db2c1 100644 --- a/protocols/imap/src/main/java/org/apache/james/imap/api/ImapConstants.java +++ b/protocols/imap/src/main/java/org/apache/james/imap/api/ImapConstants.java @@ -104,7 +104,9 @@ public interface ImapConstants { Capability SUPPORTS_UIDPLUS = Capability.of("UIDPLUS"); Capability SUPPORTS_ANNOTATION = Capability.of("METADATA"); - + + Capability SUPPORTS_UTF8_ACCEPT = Capability.of("UTF8=ACCEPT"); + String INBOX_NAME = "INBOX"; String MIME_TYPE_TEXT = "TEXT"; diff --git a/protocols/imap/src/main/java/org/apache/james/imap/processor/CapabilityProcessor.java b/protocols/imap/src/main/java/org/apache/james/imap/processor/CapabilityProcessor.java index 99033730816..f5141310e18 100644 --- a/protocols/imap/src/main/java/org/apache/james/imap/processor/CapabilityProcessor.java +++ b/protocols/imap/src/main/java/org/apache/james/imap/processor/CapabilityProcessor.java @@ -26,6 +26,7 @@ import static org.apache.james.imap.api.ImapConstants.SUPPORTS_OBJECTID; import static org.apache.james.imap.api.ImapConstants.SUPPORTS_RFC3348; import static org.apache.james.imap.api.ImapConstants.SUPPORTS_SAVEDATE; +import static org.apache.james.imap.api.ImapConstants.SUPPORTS_UTF8_ACCEPT; import static org.apache.james.mailbox.MailboxManager.MessageCapabilities.UniqueID; import java.util.ArrayList; @@ -36,6 +37,7 @@ import jakarta.inject.Inject; import org.apache.james.imap.api.ImapConfiguration; +import org.apache.james.imap.api.ImapMessage; import org.apache.james.imap.api.message.Capability; import org.apache.james.imap.api.message.response.StatusResponseFactory; import org.apache.james.imap.api.process.ImapSession; @@ -49,7 +51,7 @@ import reactor.core.publisher.Mono; -public class CapabilityProcessor extends AbstractMailboxProcessor implements CapabilityImplementingProcessor { +public class CapabilityProcessor extends AbstractMailboxProcessor implements CapabilityImplementingProcessor, PermitEnableCapabilityProcessor { private static final List CAPS = ImmutableList.of( BASIC_CAPABILITIES, @@ -58,8 +60,13 @@ public class CapabilityProcessor extends AbstractMailboxProcessor ENABLEABLE_CAPS = ImmutableList.of(SUPPORTS_UTF8_ACCEPT); + private final List capabilities = new ArrayList<>(); private final Set disabledCaps = new HashSet<>(); @@ -106,6 +113,16 @@ public void addProcessor(CapabilityImplementingProcessor implementor) { public List getImplementedCapabilities(ImapSession session) { return CAPS; } + + @Override + public List getPermitEnableCapabilities(ImapSession session) { + return ENABLEABLE_CAPS; + } + + @Override + public Mono enable(ImapMessage message, Responder responder, ImapSession session, Capability capability) { + return Mono.empty(); + } /** * Return all supported CAPABILITIES for this {@link ImapSession} diff --git a/protocols/imap/src/test/java/org/apache/james/imap/processor/CapabilityProcessorTest.java b/protocols/imap/src/test/java/org/apache/james/imap/processor/CapabilityProcessorTest.java index 1b89404ba5a..1555a694ba9 100644 --- a/protocols/imap/src/test/java/org/apache/james/imap/processor/CapabilityProcessorTest.java +++ b/protocols/imap/src/test/java/org/apache/james/imap/processor/CapabilityProcessorTest.java @@ -78,4 +78,18 @@ void condstoreShouldBeNotSupportedByDefault() { Set supportedCapabilities = testee.getSupportedCapabilities(null); assertThat(supportedCapabilities).doesNotContain(ImapConstants.SUPPORTS_CONDSTORE); } + + @Test + void utf8AcceptShouldBeAdvertised() { + testee.configure(ImapConfiguration.builder().build()); + + Set supportedCapabilities = testee.getSupportedCapabilities(null); + assertThat(supportedCapabilities).contains(ImapConstants.SUPPORTS_UTF8_ACCEPT); + } + + @Test + void utf8AcceptShouldBeEnableable() { + assertThat(testee.getPermitEnableCapabilities(null)) + .contains(ImapConstants.SUPPORTS_UTF8_ACCEPT); + } } diff --git a/server/protocols/protocols-imap4/src/test/java/org/apache/james/imapserver/netty/IMAPServerTest.java b/server/protocols/protocols-imap4/src/test/java/org/apache/james/imapserver/netty/IMAPServerTest.java index a6480898575..fe37f2dc1e1 100644 --- a/server/protocols/protocols-imap4/src/test/java/org/apache/james/imapserver/netty/IMAPServerTest.java +++ b/server/protocols/protocols-imap4/src/test/java/org/apache/james/imapserver/netty/IMAPServerTest.java @@ -3577,4 +3577,48 @@ void renameShouldFailWhenInsufficientRightsOnSharedMailbox() throws Exception { } } + @Nested + class Utf8AcceptTest { + IMAPServer imapServer; + + @AfterEach + void tearDown() { + if (imapServer != null) { + imapServer.destroy(); + } + } + + @Test + void capabilityShouldAdvertiseUtf8Accept() throws Exception { + imapServer = createImapServer("imapServer.xml"); + assertThat( + testIMAPClient.connect("127.0.0.1", imapServer.getListenAddresses().getFirst().getPort()) + .sendCommand("CAPABILITY")) + .contains("UTF8=ACCEPT"); + } + + @Test + void enableUtf8AcceptShouldSucceed() throws Exception { + imapServer = createImapServer("imapServer.xml"); + assertThat( + testIMAPClient.connect("127.0.0.1", imapServer.getListenAddresses().getFirst().getPort()) + .login(USER.asString(), USER_PASS) + .sendCommand("ENABLE UTF8=ACCEPT")) + .contains("* ENABLED UTF8=ACCEPT") + .contains("OK ENABLE completed."); + } + + @Test + void enableUtf8AcceptShouldNotEchoUnsupportedCapability() throws Exception { + imapServer = createImapServer("imapServer.xml"); + assertThat( + testIMAPClient.connect("127.0.0.1", imapServer.getListenAddresses().getFirst().getPort()) + .login(USER.asString(), USER_PASS) + .sendCommand("ENABLE BOGUS-CAPABILITY UTF8=ACCEPT")) + .contains("* ENABLED UTF8=ACCEPT") + .doesNotContain("BOGUS-CAPABILITY") + .contains("OK ENABLE completed."); + } + } + } From 12164a7795d873e24a66b0ea77727fcadc35aa1a Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Wed, 22 Apr 2026 20:45:18 +0200 Subject: [PATCH 09/18] Use UTF8 for mailbox names instead of mUTF7, both for parsing and output. --- .../imap/decode/ImapRequestLineReader.java | 25 +++++- .../encode/base/ImapResponseComposerImpl.java | 32 ++++++- .../decode/ImapRequestLineReaderTest.java | 36 ++++++++ .../netty/ImapChannelUpstreamHandler.java | 4 +- .../netty/ImapRequestFrameDecoder.java | 4 + .../imapserver/netty/IMAPServerTest.java | 84 +++++++++++++++++++ 6 files changed, 181 insertions(+), 4 deletions(-) diff --git a/protocols/imap/src/main/java/org/apache/james/imap/decode/ImapRequestLineReader.java b/protocols/imap/src/main/java/org/apache/james/imap/decode/ImapRequestLineReader.java index 3aa5bbbe185..1b157d5419f 100644 --- a/protocols/imap/src/main/java/org/apache/james/imap/decode/ImapRequestLineReader.java +++ b/protocols/imap/src/main/java/org/apache/james/imap/decode/ImapRequestLineReader.java @@ -20,6 +20,7 @@ package org.apache.james.imap.decode; import static java.nio.charset.StandardCharsets.US_ASCII; +import static java.nio.charset.StandardCharsets.UTF_8; import java.io.Closeable; import java.io.IOException; @@ -291,6 +292,7 @@ public static boolean isQuotedSpecial(char chr) { protected char nextChar; // unknown protected boolean nextSeen = false; + private boolean utf8Accept; private final StringBuilder stringBuilder = new StringBuilder(); /** @@ -489,7 +491,25 @@ public String nstring() throws DecodingException { * */ public String mailbox() throws DecodingException { - return ModifiedUtf7.decodeModifiedUTF7(mailboxUTF7()); + if (utf8Accept) { + String mailbox = astring(UTF_8); + if (mailbox.equalsIgnoreCase(ImapConstants.INBOX_NAME)) { + return ImapConstants.INBOX_NAME; + } + return mailbox; + } + return ModifiedUtf7.decodeModifiedUTF7(mailboxUTF7()); + } + + /** + * When set, {@link #mailbox()} treats the astring as UTF-8 and does not + * run Modified UTF-7 decoding. Callers should set this from + * {@code EnableProcessor.getEnabledCapabilities(session).contains(SUPPORTS_UTF8_ACCEPT)} + * after the session state is known. + */ + public ImapRequestLineReader setUtf8Accept(boolean utf8Accept) { + this.utf8Accept = utf8Accept; + return this; } /** @@ -501,7 +521,8 @@ public String mailbox() throws DecodingException { * variants of ;; INBOX (e.g. "iNbOx") MUST be interpreted as INBOX ;; not * as an astring. * - * Be aware that mailbox names are encoded via a modified UTF7. For more information RFC3501 + * Be aware that mailbox names are encoded via a modified UTF7 in unextended + * IMAP. For more information see RFC3501. RFC9755 changes this. */ public String mailboxUTF7() throws DecodingException { String mailbox = astring(); diff --git a/protocols/imap/src/main/java/org/apache/james/imap/encode/base/ImapResponseComposerImpl.java b/protocols/imap/src/main/java/org/apache/james/imap/encode/base/ImapResponseComposerImpl.java index b489204c0c3..6b3e6969c52 100644 --- a/protocols/imap/src/main/java/org/apache/james/imap/encode/base/ImapResponseComposerImpl.java +++ b/protocols/imap/src/main/java/org/apache/james/imap/encode/base/ImapResponseComposerImpl.java @@ -22,6 +22,7 @@ import static java.nio.charset.StandardCharsets.US_ASCII; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Optional; import jakarta.mail.Flags; @@ -64,6 +65,8 @@ public class ImapResponseComposerImpl implements ImapConstants, ImapResponseComp private boolean skipNextSpace; + private boolean utf8Accept; + public ImapResponseComposerImpl(ImapResponseWriter writer, int bufferSize) { skipNextSpace = false; this.writer = writer; @@ -239,10 +242,37 @@ public ImapResponseComposer message(long number) throws IOException { @Override public ImapResponseComposer mailbox(String mailboxName) throws IOException { - quote(ModifiedUtf7.encodeModifiedUTF7(mailboxName)); + if (utf8Accept) { + quoteUtf8(mailboxName); + } else { + quote(ModifiedUtf7.encodeModifiedUTF7(mailboxName)); + } return this; } + /** + * Per RFC 9755, when the client has ENABLEd UTF8=ACCEPT the server emits + * mailbox names and other strings as UTF-8 octets (not Modified UTF-7). + * Set this once per composer, from the session's enabled-capabilities set. + */ + public ImapResponseComposerImpl setUtf8Accept(boolean utf8Accept) { + this.utf8Accept = utf8Accept; + return this; + } + + private void quoteUtf8(String message) throws IOException { + space(); + buffer.write(BYTE_DQUOTE); + byte[] bytes = message.getBytes(StandardCharsets.UTF_8); + for (byte b : bytes) { + if (b == BYTE_BACK_SLASH || b == BYTE_DQUOTE) { + buffer.write(BYTE_BACK_SLASH); + } + buffer.write(b); + } + buffer.write(BYTE_DQUOTE); + } + @Override public ImapResponseComposer commandName(ImapCommand command) throws IOException { return message(command.getNameAsBytes()); diff --git a/protocols/imap/src/test/java/org/apache/james/imap/decode/ImapRequestLineReaderTest.java b/protocols/imap/src/test/java/org/apache/james/imap/decode/ImapRequestLineReaderTest.java index b2bbdd31888..bfdedea9eeb 100644 --- a/protocols/imap/src/test/java/org/apache/james/imap/decode/ImapRequestLineReaderTest.java +++ b/protocols/imap/src/test/java/org/apache/james/imap/decode/ImapRequestLineReaderTest.java @@ -57,4 +57,40 @@ void nextNonSpaceCharShouldThrowExceptionWhenNotFound() { assertThatThrownBy(() -> lineReader.nextNonSpaceChar()).isInstanceOf(DecodingException.class); } + + @Test + void mailboxShouldDecodeModifiedUtf7WhenUtf8AcceptNotEnabled() throws Exception { + // Wire form "a&--b" is the Modified UTF-7 encoding of "a&-b". + inputStream = new ByteArrayInputStream("\"a&--b\" ".getBytes(StandardCharsets.US_ASCII)); + lineReader = new ImapRequestStreamLineReader(inputStream, outputStream); + + assertThat(lineReader.mailbox()).isEqualTo("a&-b"); + } + + @Test + void mailboxShouldDecodeUnicodeModifiedUtf7WhenUtf8AcceptNotEnabled() throws Exception { + // Wire form "gr&AOU-" is the Modified UTF-7 encoding of "grå". + inputStream = new ByteArrayInputStream("\"gr&AOU-\" ".getBytes(StandardCharsets.US_ASCII)); + lineReader = new ImapRequestStreamLineReader(inputStream, outputStream); + + assertThat(lineReader.mailbox()).isEqualTo("grå"); + } + + @Test + void mailboxShouldReturnRawStringWhenUtf8AcceptEnabledAndNameContainsAmpersand() throws Exception { + inputStream = new ByteArrayInputStream("\"a&-b\" ".getBytes(StandardCharsets.UTF_8)); + lineReader = new ImapRequestStreamLineReader(inputStream, outputStream); + lineReader.setUtf8Accept(true); + + assertThat(lineReader.mailbox()).isEqualTo("a&-b"); + } + + @Test + void mailboxShouldReturnRawUnicodeWhenUtf8AcceptEnabled() throws Exception { + inputStream = new ByteArrayInputStream("\"grå\" ".getBytes(StandardCharsets.UTF_8)); + lineReader = new ImapRequestStreamLineReader(inputStream, outputStream); + lineReader.setUtf8Accept(true); + + assertThat(lineReader.mailbox()).isEqualTo("grå"); + } } \ No newline at end of file diff --git a/server/protocols/protocols-imap4/src/main/java/org/apache/james/imapserver/netty/ImapChannelUpstreamHandler.java b/server/protocols/protocols-imap4/src/main/java/org/apache/james/imapserver/netty/ImapChannelUpstreamHandler.java index 8d478926edf..7a2625ad7ce 100644 --- a/server/protocols/protocols-imap4/src/main/java/org/apache/james/imapserver/netty/ImapChannelUpstreamHandler.java +++ b/server/protocols/protocols-imap4/src/main/java/org/apache/james/imapserver/netty/ImapChannelUpstreamHandler.java @@ -51,6 +51,7 @@ import org.apache.james.imap.main.ResponseEncoder; import org.apache.james.imap.message.request.AbstractImapRequest; import org.apache.james.imap.message.response.ImmutableStatusResponse; +import org.apache.james.imap.processor.EnableProcessor; import org.apache.james.metrics.api.Metric; import org.apache.james.protocols.netty.Encryption; import org.apache.james.util.MDCBuilder; @@ -417,7 +418,8 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) { } ChannelImapResponseWriter writer = new ChannelImapResponseWriter(ctx.channel(), session); - ImapResponseComposerImpl response = new ImapResponseComposerImpl(writer); + ImapResponseComposerImpl response = new ImapResponseComposerImpl(writer) + .setUtf8Accept(EnableProcessor.getEnabledCapabilities(session).contains(ImapConstants.SUPPORTS_UTF8_ACCEPT)); writer.setFlushCallback(response::flush); ImapMessage message = (ImapMessage) msg; diff --git a/server/protocols/protocols-imap4/src/main/java/org/apache/james/imapserver/netty/ImapRequestFrameDecoder.java b/server/protocols/protocols-imap4/src/main/java/org/apache/james/imapserver/netty/ImapRequestFrameDecoder.java index 14cb8aae92e..012662a78f1 100644 --- a/server/protocols/protocols-imap4/src/main/java/org/apache/james/imapserver/netty/ImapRequestFrameDecoder.java +++ b/server/protocols/protocols-imap4/src/main/java/org/apache/james/imapserver/netty/ImapRequestFrameDecoder.java @@ -39,12 +39,14 @@ import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.tuple.Pair; +import org.apache.james.imap.api.ImapConstants; import org.apache.james.imap.api.ImapMessage; import org.apache.james.imap.api.ImapSessionState; import org.apache.james.imap.api.process.ImapSession; import org.apache.james.imap.decode.DecodingException; import org.apache.james.imap.decode.ImapDecoder; import org.apache.james.imap.decode.ImapRequestLineReader; +import org.apache.james.imap.processor.EnableProcessor; import org.apache.james.lifecycle.api.Disposable.LeakAware; import org.apache.james.protocols.netty.LineHandlerAware; @@ -146,6 +148,8 @@ private Optional parseImapMessage(ChannelHandlerContext ctx, ByteBu // Also check if the session was logged out if so there is not need to try to decode it. See JAMES-1341 if (session != null && session.getState() != ImapSessionState.LOGOUT) { try { + readerAndSize.getLeft().setUtf8Accept( + EnableProcessor.getEnabledCapabilities(session).contains(ImapConstants.SUPPORTS_UTF8_ACCEPT)); ImapMessage message = decoder.decode(readerAndSize.getLeft(), session); diff --git a/server/protocols/protocols-imap4/src/test/java/org/apache/james/imapserver/netty/IMAPServerTest.java b/server/protocols/protocols-imap4/src/test/java/org/apache/james/imapserver/netty/IMAPServerTest.java index fe37f2dc1e1..9a39bc382e8 100644 --- a/server/protocols/protocols-imap4/src/test/java/org/apache/james/imapserver/netty/IMAPServerTest.java +++ b/server/protocols/protocols-imap4/src/test/java/org/apache/james/imapserver/netty/IMAPServerTest.java @@ -3619,6 +3619,90 @@ void enableUtf8AcceptShouldNotEchoUnsupportedCapability() throws Exception { .doesNotContain("BOGUS-CAPABILITY") .contains("OK ENABLE completed."); } + + @Test + void listShouldEncodeMailboxNameAsModifiedUtf7WhenUtf8AcceptNotEnabled() throws Exception { + imapServer = createImapServer("imapServer.xml"); + MailboxSession session = memoryIntegrationResources.getMailboxManager().createSystemSession(USER); + memoryIntegrationResources.getMailboxManager() + .createMailbox(MailboxPath.forUser(USER, "grå"), session); + + try (SocketChannel c = SocketChannel.open(new InetSocketAddress(LOCALHOST_IP, + imapServer.getListenAddresses().getFirst().getPort()))) { + readUtf8Bytes(c); + c.write(ByteBuffer.wrap(String.format("a0 LOGIN %s %s\r\n", USER.asString(), USER_PASS).getBytes(StandardCharsets.UTF_8))); + readUtf8Until(c, s -> s.contains("a0 OK")); + c.write(ByteBuffer.wrap("a1 LIST \"\" \"*\"\r\n".getBytes(StandardCharsets.UTF_8))); + List replies = readUtf8Until(c, s -> s.contains("a1 OK")); + + assertThat(String.join("", replies)) + .contains("gr&AOU-") + .doesNotContain("grå"); + } + } + + @Test + void createWithUnicodeMailboxNameShouldSucceedAfterEnableUtf8Accept() throws Exception { + imapServer = createImapServer("imapServer.xml"); + + try (SocketChannel c = SocketChannel.open(new InetSocketAddress(LOCALHOST_IP, + imapServer.getListenAddresses().getFirst().getPort()))) { + readUtf8Bytes(c); + c.write(ByteBuffer.wrap(String.format("a0 LOGIN %s %s\r\n", USER.asString(), USER_PASS).getBytes(StandardCharsets.UTF_8))); + readUtf8Until(c, s -> s.contains("a0 OK")); + c.write(ByteBuffer.wrap("a1 ENABLE UTF8=ACCEPT\r\n".getBytes(StandardCharsets.UTF_8))); + readUtf8Until(c, s -> s.contains("a1 OK")); + c.write(ByteBuffer.wrap("a2 CREATE \"grå\"\r\n".getBytes(StandardCharsets.UTF_8))); + readUtf8Until(c, s -> s.contains("a2 OK")); + c.write(ByteBuffer.wrap("a3 LIST \"\" \"*\"\r\n".getBytes(StandardCharsets.UTF_8))); + List replies = readUtf8Until(c, s -> s.contains("a3 OK")); + + assertThat(String.join("", replies)).contains("grå"); + } + } + + @Test + void listShouldEncodeMailboxNameAsRawUtf8WhenUtf8AcceptEnabled() throws Exception { + imapServer = createImapServer("imapServer.xml"); + MailboxSession session = memoryIntegrationResources.getMailboxManager().createSystemSession(USER); + memoryIntegrationResources.getMailboxManager() + .createMailbox(MailboxPath.forUser(USER, "grå"), session); + + try (SocketChannel c = SocketChannel.open(new InetSocketAddress(LOCALHOST_IP, + imapServer.getListenAddresses().getFirst().getPort()))) { + readUtf8Bytes(c); + c.write(ByteBuffer.wrap(String.format("a0 LOGIN %s %s\r\n", USER.asString(), USER_PASS).getBytes(StandardCharsets.UTF_8))); + readUtf8Until(c, s -> s.contains("a0 OK")); + c.write(ByteBuffer.wrap("a1 ENABLE UTF8=ACCEPT\r\n".getBytes(StandardCharsets.UTF_8))); + readUtf8Until(c, s -> s.contains("a1 OK")); + c.write(ByteBuffer.wrap("a2 LIST \"\" \"*\"\r\n".getBytes(StandardCharsets.UTF_8))); + List replies = readUtf8Until(c, s -> s.contains("a2 OK")); + + assertThat(String.join("", replies)) + .contains("grå") + .doesNotContain("gr&AOU-"); + } + } + + private byte[] readUtf8Bytes(SocketChannel channel) throws IOException { + ByteBuffer buf = ByteBuffer.allocate(8192); + channel.read(buf); + buf.flip(); + byte[] out = new byte[buf.remaining()]; + buf.get(out); + return out; + } + + private List readUtf8Until(SocketChannel channel, Predicate condition) throws IOException { + ImmutableList.Builder result = ImmutableList.builder(); + while (true) { + String line = new String(readUtf8Bytes(channel), StandardCharsets.UTF_8); + result.add(line); + if (condition.test(line)) { + return result.build(); + } + } + } } } From f41f3269815ef0c3bf52e9bb946179a0e6526fc1 Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Wed, 22 Apr 2026 20:59:33 +0200 Subject: [PATCH 10/18] Send UTF8 quoted-strings if enabled, and accept them anyway. This changes the handling of some noncompliant IMAP clients, which James would not tolerate before. --- .../imap/decode/ImapRequestLineReader.java | 2 +- .../encode/base/ImapResponseComposerImpl.java | 45 +++++++++---------- .../decode/ImapRequestLineReaderTest.java | 11 +++++ 3 files changed, 32 insertions(+), 26 deletions(-) diff --git a/protocols/imap/src/main/java/org/apache/james/imap/decode/ImapRequestLineReader.java b/protocols/imap/src/main/java/org/apache/james/imap/decode/ImapRequestLineReader.java index 1b157d5419f..ece078616d7 100644 --- a/protocols/imap/src/main/java/org/apache/james/imap/decode/ImapRequestLineReader.java +++ b/protocols/imap/src/main/java/org/apache/james/imap/decode/ImapRequestLineReader.java @@ -754,7 +754,7 @@ public String consumeQuoted() throws DecodingException { */ protected String consumeQuoted(Charset charset) throws DecodingException { if (charset == null) { - return consumeQuoted(US_ASCII); + return consumeQuoted(UTF_8); } else { // The 1st character must be '"' consumeChar('"'); diff --git a/protocols/imap/src/main/java/org/apache/james/imap/encode/base/ImapResponseComposerImpl.java b/protocols/imap/src/main/java/org/apache/james/imap/encode/base/ImapResponseComposerImpl.java index 6b3e6969c52..ae6ea43048a 100644 --- a/protocols/imap/src/main/java/org/apache/james/imap/encode/base/ImapResponseComposerImpl.java +++ b/protocols/imap/src/main/java/org/apache/james/imap/encode/base/ImapResponseComposerImpl.java @@ -243,7 +243,7 @@ public ImapResponseComposer message(long number) throws IOException { @Override public ImapResponseComposer mailbox(String mailboxName) throws IOException { if (utf8Accept) { - quoteUtf8(mailboxName); + quote(mailboxName); } else { quote(ModifiedUtf7.encodeModifiedUTF7(mailboxName)); } @@ -260,19 +260,6 @@ public ImapResponseComposerImpl setUtf8Accept(boolean utf8Accept) { return this; } - private void quoteUtf8(String message) throws IOException { - space(); - buffer.write(BYTE_DQUOTE); - byte[] bytes = message.getBytes(StandardCharsets.UTF_8); - for (byte b : bytes) { - if (b == BYTE_BACK_SLASH || b == BYTE_DQUOTE) { - buffer.write(BYTE_BACK_SLASH); - } - buffer.write(b); - } - buffer.write(BYTE_DQUOTE); - } - @Override public ImapResponseComposer commandName(ImapCommand command) throws IOException { return message(command.getNameAsBytes()); @@ -281,19 +268,27 @@ public ImapResponseComposer commandName(ImapCommand command) throws IOException @Override public ImapResponseComposer quote(String message) throws IOException { space(); - final int length = message.length(); - buffer.write(BYTE_DQUOTE); - for (int i = 0; i < length; i++) { - char character = message.charAt(i); - if (character == ImapConstants.BACK_SLASH || character == DQUOTE) { - buffer.write(BYTE_BACK_SLASH); + if (utf8Accept) { + for (byte b : message.getBytes(StandardCharsets.UTF_8)) { + if (b == BYTE_BACK_SLASH || b == BYTE_DQUOTE) { + buffer.write(BYTE_BACK_SLASH); + } + buffer.write(b); } - // 7-bit ASCII only - if (character >= 128) { - buffer.write(BYTE_QUESTION); - } else { - buffer.write((byte) character); + } else { + final int length = message.length(); + for (int i = 0; i < length; i++) { + char character = message.charAt(i); + if (character == ImapConstants.BACK_SLASH || character == DQUOTE) { + buffer.write(BYTE_BACK_SLASH); + } + // 7-bit ASCII only + if (character >= 128) { + buffer.write(BYTE_QUESTION); + } else { + buffer.write((byte) character); + } } } buffer.write(BYTE_DQUOTE); diff --git a/protocols/imap/src/test/java/org/apache/james/imap/decode/ImapRequestLineReaderTest.java b/protocols/imap/src/test/java/org/apache/james/imap/decode/ImapRequestLineReaderTest.java index bfdedea9eeb..2aad9c33538 100644 --- a/protocols/imap/src/test/java/org/apache/james/imap/decode/ImapRequestLineReaderTest.java +++ b/protocols/imap/src/test/java/org/apache/james/imap/decode/ImapRequestLineReaderTest.java @@ -93,4 +93,15 @@ void mailboxShouldReturnRawUnicodeWhenUtf8AcceptEnabled() throws Exception { assertThat(lineReader.mailbox()).isEqualTo("grå"); } + + @Test + void astringShouldDecodeUtf8QuotedStringByDefault() throws Exception { + // Many IMAP clients put UTF-8 in quoted-string arguments (e.g. + // SEARCH HEADER Subject "grå") without an explicit CHARSET. RFC 9051 + // allows this, and we accept it regardless of UTF8=ACCEPT. + inputStream = new ByteArrayInputStream("\"grå\" ".getBytes(StandardCharsets.UTF_8)); + lineReader = new ImapRequestStreamLineReader(inputStream, outputStream); + + assertThat(lineReader.astring()).isEqualTo("grå"); + } } \ No newline at end of file From 7df887feac127d0c425ff60f6939e001852cb874 Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Thu, 23 Apr 2026 16:49:55 +0200 Subject: [PATCH 11/18] Add support for RFC 6533 (utf8 addresses in DSNs/MDNs). --- .../main/java/org/apache/james/mdn/MDN.java | 31 ++++++- .../apache/james/mdn/fields/AddressType.java | 15 ++++ .../james/mdn/fields/FinalRecipient.java | 2 +- .../james/mdn/fields/OriginalRecipient.java | 2 +- .../java/org/apache/james/mdn/MDNTest.java | 82 ++++++++++++++++++ .../james/mdn/fields/AddressTypeTest.java | 17 ++++ .../james/mdn/fields/FinalRecipientTest.java | 21 +++++ .../mdn/fields/OriginalRecipientTest.java | 11 +++ .../james/transport/mailets/DSNBounce.java | 34 +++++++- .../transport/mailets/DSNBounceTest.java | 84 +++++++++++++++++++ 10 files changed, 290 insertions(+), 9 deletions(-) diff --git a/mdn/src/main/java/org/apache/james/mdn/MDN.java b/mdn/src/main/java/org/apache/james/mdn/MDN.java index a8842396283..389e58f9acc 100644 --- a/mdn/src/main/java/org/apache/james/mdn/MDN.java +++ b/mdn/src/main/java/org/apache/james/mdn/MDN.java @@ -38,6 +38,7 @@ import org.apache.commons.io.IOUtils; import org.apache.james.javax.MimeMultipartReport; +import org.apache.james.mdn.fields.AddressType; import org.apache.james.mime4j.Charsets; import org.apache.james.mime4j.dom.Entity; import org.apache.james.mime4j.dom.Message; @@ -59,9 +60,15 @@ public class MDN { private static final NameValuePair UTF_8_CHARSET = new NameValuePair("charset", Charsets.UTF_8.name()); public static final String DISPOSITION_CONTENT_TYPE = "message/disposition-notification"; + public static final String GLOBAL_DISPOSITION_CONTENT_TYPE = "message/global-disposition-notification"; public static final String REPORT_SUB_TYPE = "report"; public static final String DISPOSITION_NOTIFICATION_REPORT_TYPE = "disposition-notification"; + private static boolean isDispositionNotificationType(String mimeType) { + return mimeType.equals(DISPOSITION_CONTENT_TYPE) + || mimeType.equals(GLOBAL_DISPOSITION_CONTENT_TYPE); + } + public static class Builder { private String humanReadableText; private MDNReport report; @@ -170,7 +177,7 @@ public static Optional extractHumanReadableText(List entities) t public static Optional extractMDNReport(List entities) { return entities.stream() - .filter(entity -> entity.getMimeType().startsWith(DISPOSITION_CONTENT_TYPE)) + .filter(entity -> isDispositionNotificationType(entity.getMimeType())) .findAny() .flatMap(entity -> { try (InputStream inputStream = ((SingleBody) entity.getBody()).getInputStream()) { @@ -187,7 +194,7 @@ public static Optional extractMDNReport(List entities) { } public boolean isReport(Entity entity) { - return entity.getMimeType().startsWith(DISPOSITION_CONTENT_TYPE); + return isDispositionNotificationType(entity.getMimeType()); } private final String humanReadableText; @@ -245,10 +252,26 @@ public BodyPart computeHumanReadablePart() throws MessagingException { public BodyPart computeReportPart() throws MessagingException { MimeBodyPart mdnPart = new MimeBodyPart(); - mdnPart.setContent(report.formattedValue(), DISPOSITION_CONTENT_TYPE); + mdnPart.setContent(report.formattedValue(), dispositionContentType()); return mdnPart; } + /** + * Per RFC 6533, emits {@code message/global-disposition-notification} when + * any recipient in the report uses the {@code utf-8} addr-type, otherwise + * the RFC 3798 form {@code message/disposition-notification}. + */ + private String dispositionContentType() { + boolean finalIsUtf8 = report.getFinalRecipientField().getAddressType() + .equals(AddressType.UTF_8); + boolean originalIsUtf8 = report.getOriginalRecipientField() + .map(r -> r.getAddressType().equals(AddressType.UTF_8)) + .orElse(false); + return finalIsUtf8 || originalIsUtf8 + ? GLOBAL_DISPOSITION_CONTENT_TYPE + : DISPOSITION_CONTENT_TYPE; + } + public BodyPart computeOriginalMessagePart(Message message) throws MessagingException { MimeBodyPart originalMessagePart = new MimeBodyPart(); try { @@ -276,7 +299,7 @@ private Multipart asMime4JMultipart() throws IOException { builder.addBodyPart(BodyPartBuilder.create() .use(new BasicBodyFactory()) .setBody(report.formattedValue(), Charsets.UTF_8) - .setContentType(DISPOSITION_CONTENT_TYPE, UTF_8_CHARSET)); + .setContentType(dispositionContentType(), UTF_8_CHARSET)); return builder.build(); } diff --git a/mdn/src/main/java/org/apache/james/mdn/fields/AddressType.java b/mdn/src/main/java/org/apache/james/mdn/fields/AddressType.java index ccadbc44b90..36e2cbe526a 100644 --- a/mdn/src/main/java/org/apache/james/mdn/fields/AddressType.java +++ b/mdn/src/main/java/org/apache/james/mdn/fields/AddressType.java @@ -26,8 +26,23 @@ public class AddressType { public static final AddressType DNS = new AddressType("dns"); public static final AddressType RFC_822 = new AddressType("rfc822"); + public static final AddressType UTF_8 = new AddressType("utf-8"); public static final AddressType UNKNOWN = new AddressType("unknown"); + /** + * Picks the appropriate addr-type per RFC 6533: {@link #UTF_8} when the + * address contains non-ASCII octets, otherwise {@link #RFC_822}. + */ + public static AddressType pickFor(Text text) { + String value = text.formatted(); + for (int i = 0; i < value.length(); i++) { + if (value.charAt(i) > 0x7F) { + return UTF_8; + } + } + return RFC_822; + } + private final String type; public AddressType(String type) { diff --git a/mdn/src/main/java/org/apache/james/mdn/fields/FinalRecipient.java b/mdn/src/main/java/org/apache/james/mdn/fields/FinalRecipient.java index 0e5ade2a0da..f80914f807d 100644 --- a/mdn/src/main/java/org/apache/james/mdn/fields/FinalRecipient.java +++ b/mdn/src/main/java/org/apache/james/mdn/fields/FinalRecipient.java @@ -57,7 +57,7 @@ public Builder finalRecipient(Text finalRecipient) { public FinalRecipient build() { Preconditions.checkNotNull(finalRecipient); - return new FinalRecipient(addressType.orElse(AddressType.RFC_822), finalRecipient); + return new FinalRecipient(addressType.orElseGet(() -> AddressType.pickFor(finalRecipient)), finalRecipient); } } diff --git a/mdn/src/main/java/org/apache/james/mdn/fields/OriginalRecipient.java b/mdn/src/main/java/org/apache/james/mdn/fields/OriginalRecipient.java index 01ea441cf75..28f987e7026 100644 --- a/mdn/src/main/java/org/apache/james/mdn/fields/OriginalRecipient.java +++ b/mdn/src/main/java/org/apache/james/mdn/fields/OriginalRecipient.java @@ -61,7 +61,7 @@ public Builder originalRecipient(Text originalRecipient) { public OriginalRecipient build() { Preconditions.checkNotNull(originalRecipient); - return new OriginalRecipient(addressType.orElse(AddressType.RFC_822), originalRecipient); + return new OriginalRecipient(addressType.orElseGet(() -> AddressType.pickFor(originalRecipient)), originalRecipient); } } diff --git a/mdn/src/test/java/org/apache/james/mdn/MDNTest.java b/mdn/src/test/java/org/apache/james/mdn/MDNTest.java index ba58ab3c238..8321131eb75 100644 --- a/mdn/src/test/java/org/apache/james/mdn/MDNTest.java +++ b/mdn/src/test/java/org/apache/james/mdn/MDNTest.java @@ -521,4 +521,86 @@ public void originalMessageShouldBeContainInMimeMessage() throws Exception { private String asString(Message message) throws Exception { return new String(DefaultMessageWriter.asBytes(message), StandardCharsets.UTF_8); } + + // RFC 6533 section + + @Test + void asMime4JMessageShouldUseLegacyContentTypeWhenRecipientsAreAscii() throws Exception { + MDN mdn = MDN.builder() + .humanReadableText("human") + .report(MINIMAL_REPORT) + .build(); + + assertThat(asString(mdn.asMime4JMessageBuilder().build())) + .contains("Content-Type: message/disposition-notification") + .doesNotContain("message/global-disposition-notification"); + } + + @Test + void asMime4JMessageShouldUseGlobalContentTypeWhenFinalRecipientIsUtf8() throws Exception { + MDNReport report = MDNReport.builder() + .finalRecipientField(FinalRecipient.builder() + .finalRecipient(Text.fromRawText("user@grå.org")) + .build()) + .dispositionField(Disposition.builder() + .actionMode(DispositionActionMode.Automatic) + .sendingMode(DispositionSendingMode.Automatic) + .type(DispositionType.Deleted) + .build()) + .build(); + MDN mdn = MDN.builder() + .humanReadableText("human") + .report(report) + .build(); + + assertThat(asString(mdn.asMime4JMessageBuilder().build())) + .contains("Content-Type: message/global-disposition-notification"); + } + + @Test + void parseShouldAcceptGlobalDispositionNotificationContentType() throws Exception { + MDNReport parsed = parseReportWithContentType( + "Final-Recipient: utf-8; user@grå.org\r\n" + + "Disposition: automatic-action/MDN-sent-automatically;processed/error,failed\r\n", + "message/global-disposition-notification"); + + assertThat(parsed.getFinalRecipientField().getFinalRecipient()) + .isEqualTo(Text.fromRawText("user@grå.org")); + assertThat(parsed.getFinalRecipientField().getAddressType()) + .isEqualTo(AddressType.UTF_8); + } + + @Test + void parsedReportShouldBeIndistinguishableAcrossFormatsForAsciiAddress() throws Exception { + // Same recipient, both addr-types — parsed reports should be equal on the + // fields callers actually care about (recipient + disposition). + MDNReport legacy = parseReportWithContentType( + "Final-Recipient: rfc822; user@example.com\r\n" + + "Disposition: automatic-action/MDN-sent-automatically;processed/error,failed\r\n", + "message/disposition-notification"); + MDNReport global = parseReportWithContentType( + "Final-Recipient: rfc822; user@example.com\r\n" + + "Disposition: automatic-action/MDN-sent-automatically;processed/error,failed\r\n", + "message/global-disposition-notification"); + + assertThat(legacy.getFinalRecipientField().getFinalRecipient()) + .isEqualTo(global.getFinalRecipientField().getFinalRecipient()); + assertThat(legacy.getDispositionField()) + .isEqualTo(global.getDispositionField()); + } + + private MDNReport parseReportWithContentType(String body, String contentType) throws Exception { + BodyPart mdnBodyPart = BodyPartBuilder + .create() + .setBody(SingleBodyBuilder.create().setText(body).buildText()) + .setContentType(contentType) + .build(); + Message message = Message.Builder.of() + .setBody(MultipartBuilder.create("report") + .addTextPart("first", StandardCharsets.UTF_8) + .addBodyPart(mdnBodyPart) + .build()) + .build(); + return MDN.parse(message).getReport(); + } } diff --git a/mdn/src/test/java/org/apache/james/mdn/fields/AddressTypeTest.java b/mdn/src/test/java/org/apache/james/mdn/fields/AddressTypeTest.java index 0b610d5873a..5333d5a6336 100644 --- a/mdn/src/test/java/org/apache/james/mdn/fields/AddressTypeTest.java +++ b/mdn/src/test/java/org/apache/james/mdn/fields/AddressTypeTest.java @@ -86,4 +86,21 @@ void typeShouldBeTrimmed() { assertThat(addressType.getType()) .isEqualTo("ab"); } + + @Test + void utf8ConstantShouldHoldRfc6533Value() { + assertThat(AddressType.UTF_8.getType()).isEqualTo("utf-8"); + } + + @Test + void pickForShouldReturnRfc822ForAsciiAddress() { + assertThat(AddressType.pickFor(Text.fromRawText("user@example.com"))) + .isEqualTo(AddressType.RFC_822); + } + + @Test + void pickForShouldReturnUtf8ForNonAsciiAddress() { + assertThat(AddressType.pickFor(Text.fromRawText("user@grå.org"))) + .isEqualTo(AddressType.UTF_8); + } } diff --git a/mdn/src/test/java/org/apache/james/mdn/fields/FinalRecipientTest.java b/mdn/src/test/java/org/apache/james/mdn/fields/FinalRecipientTest.java index bb8c84d5983..b18a4d347d0 100644 --- a/mdn/src/test/java/org/apache/james/mdn/fields/FinalRecipientTest.java +++ b/mdn/src/test/java/org/apache/james/mdn/fields/FinalRecipientTest.java @@ -72,6 +72,27 @@ void typeShouldDefaultToRfc822() { .build()); } + @Test + void typeShouldDefaultToUtf8WhenAddressContainsNonAscii() { + // RFC 6533 §3.1: use the utf-8 addr-type when the address contains UTF-8 + Text address = Text.fromRawText("arnt@grå.org"); + + assertThat(FinalRecipient.builder() + .finalRecipient(address) + .build() + .getAddressType()) + .isEqualTo(AddressType.UTF_8); + } + + @Test + void formattedValueShouldDisplayUtf8TypeForNonAsciiAddress() { + assertThat(FinalRecipient.builder() + .finalRecipient(Text.fromRawText("arnt@grå.org")) + .build() + .formattedValue()) + .isEqualTo("Final-Recipient: utf-8; arnt@grå.org"); + } + @Test void formattedValueShouldDisplayAddress() { assertThat(FinalRecipient.builder() diff --git a/mdn/src/test/java/org/apache/james/mdn/fields/OriginalRecipientTest.java b/mdn/src/test/java/org/apache/james/mdn/fields/OriginalRecipientTest.java index 964132e0ca4..ae78aac2bd2 100644 --- a/mdn/src/test/java/org/apache/james/mdn/fields/OriginalRecipientTest.java +++ b/mdn/src/test/java/org/apache/james/mdn/fields/OriginalRecipientTest.java @@ -72,6 +72,17 @@ void addressTypeShouldDefaultToRfc822() { .build()); } + @Test + void addressTypeShouldDefaultToUtf8WhenAddressContainsNonAscii() { + Text address = Text.fromRawText("arnt@grå.org"); + + assertThat(OriginalRecipient.builder() + .originalRecipient(address) + .build() + .getAddressType()) + .isEqualTo(AddressType.UTF_8); + } + @Test void formattedValueShouldDisplayAddress() { assertThat(OriginalRecipient.builder() diff --git a/server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/DSNBounce.java b/server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/DSNBounce.java index c0119286cfb..8f9cc697c85 100755 --- a/server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/DSNBounce.java +++ b/server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/DSNBounce.java @@ -476,6 +476,7 @@ private String bounceMessage() { private MimeBodyPart createDSN(Mail originalMail) throws MessagingException { StringBuilder buffer = new StringBuilder(); + boolean anyNonAsciiAddress = false; appendReportingMTA(buffer); buffer.append("Received-From-MTA: dns; " + originalMail.getRemoteHost()) @@ -496,12 +497,22 @@ private MimeBodyPart createDSN(Mail originalMail) throws MessagingException { .append(LINE_BREAK)); for (MailAddress rec : originalMail.getRecipients()) { + anyNonAsciiAddress |= containsNonAscii(rec); appendRecipient(buffer, rec, getDeliveryError(originalMail), originalMail.getLastUpdated()); } MimeBodyPart bodyPart = new MimeBodyPart(); - bodyPart.setContent(buffer.toString(), "text/plain"); - bodyPart.setHeader("Content-Type", "message/delivery-status"); + // RFC 6533 §3.2: when any reported address contains non-ASCII + // octets the DSN body part is "message/global-delivery-status"; + // otherwise the RFC 3464 form. The outer "multipart/report; + // report-type=delivery-status" wrapper does not change. Setting + // the storage Content-Type with charset=UTF-8 first makes + // jakarta.mail serialise the body as UTF-8 octets; the second + // setHeader overrides only the type label. + bodyPart.setContent(buffer.toString(), "text/plain; charset=UTF-8"); + bodyPart.setHeader("Content-Type", anyNonAsciiAddress + ? "message/global-delivery-status; charset=UTF-8" + : "message/delivery-status"); bodyPart.setDescription("Delivery Status Notification"); bodyPart.setFileName("status.dat"); return bodyPart; @@ -518,7 +529,10 @@ private void appendReportingMTA(StringBuilder buffer) { private void appendRecipient(StringBuilder buffer, MailAddress mailAddress, String deliveryError, Date lastUpdated) { buffer.append(LINE_BREAK); - buffer.append("Final-Recipient: rfc822; " + mailAddress.toString()).append(LINE_BREAK); + // RFC 6533 §3.2: addr-type is "utf-8" when the address contains + // non-ASCII octets, otherwise the legacy "rfc822". + buffer.append("Final-Recipient: ").append(addrType(mailAddress)).append("; ") + .append(mailAddress.toString()).append(LINE_BREAK); buffer.append("Action: ").append(action.asString().toLowerCase(Locale.US)).append(LINE_BREAK); buffer.append("Status: " + deliveryError).append(LINE_BREAK); if (action.shouldIncludeDiagnostic()) { @@ -528,6 +542,20 @@ private void appendRecipient(StringBuilder buffer, MailAddress mailAddress, Stri .append(LINE_BREAK); } + private static String addrType(MailAddress mailAddress) { + return containsNonAscii(mailAddress) ? "utf-8" : "rfc822"; + } + + private static boolean containsNonAscii(MailAddress mailAddress) { + String s = mailAddress.toString(); + for (int i = 0; i < s.length(); i++) { + if (s.charAt(i) > 0x7F) { + return true; + } + } + return false; + } + private String getDeliveryError(Mail originalMail) { return AttributeUtils .getValueAndCastFromMail(originalMail, DELIVERY_ERROR, String.class) diff --git a/server/mailet/mailets/src/test/java/org/apache/james/transport/mailets/DSNBounceTest.java b/server/mailet/mailets/src/test/java/org/apache/james/transport/mailets/DSNBounceTest.java index 80ede282035..8dee8c9d130 100644 --- a/server/mailet/mailets/src/test/java/org/apache/james/transport/mailets/DSNBounceTest.java +++ b/server/mailet/mailets/src/test/java/org/apache/james/transport/mailets/DSNBounceTest.java @@ -1496,4 +1496,88 @@ void shouldAddAutoSubmittedHeader() throws Exception { assertThat(MimeMessageUtil.asString(sentMessage)) .contains("Auto-Submitted: auto-replied"); } + + @Nested + class Rfc6533 { + @Test + void dsnForAsciiRecipientShouldUseRfc822AddrTypeAndLegacyContentType() throws Exception { + FakeMailetConfig mailetConfig = FakeMailetConfig.builder() + .mailetName(MAILET_NAME) + .mailetContext(fakeMailContext) + .build(); + dsnBounce.init(mailetConfig); + + FakeMail mail = FakeMail.builder() + .name(MAILET_NAME) + .sender(new MailAddress("sender@example.com")) + .attribute(DELIVERY_ERROR_ATTRIBUTE) + .mimeMessage(MimeMessageBuilder.mimeMessageBuilder().setText("body")) + .recipient("info@example.com") + .lastUpdated(Date.from(Instant.parse("2026-04-27T10:00:00.000Z"))) + .remoteAddr("remoteHost") + .build(); + + dsnBounce.service(mail); + + BodyPart dsnPart = (BodyPart) ((MimeMultipart) fakeMailContext.getSentMails() + .get(0).getMsg().getContent()).getBodyPart(1); + assertThat(dsnPart.getContentType()).startsWith("message/delivery-status"); + String body = IOUtils.toString((SharedByteArrayInputStream) dsnPart.getContent(), StandardCharsets.UTF_8); + assertThat(body).contains("Final-Recipient: rfc822; info@example.com"); + } + + @Test + void dsnForUtf8LocalPartShouldUseUtf8AddrTypeAndGlobalContentType() throws Exception { + FakeMailetConfig mailetConfig = FakeMailetConfig.builder() + .mailetName(MAILET_NAME) + .mailetContext(fakeMailContext) + .build(); + dsnBounce.init(mailetConfig); + + FakeMail mail = FakeMail.builder() + .name(MAILET_NAME) + .sender(new MailAddress("sender@example.com")) + .attribute(DELIVERY_ERROR_ATTRIBUTE) + .mimeMessage(MimeMessageBuilder.mimeMessageBuilder().setText("body")) + .recipient("grå@example.com") + .lastUpdated(Date.from(Instant.parse("2026-04-27T10:00:00.000Z"))) + .remoteAddr("remoteHost") + .build(); + + dsnBounce.service(mail); + + BodyPart dsnPart = (BodyPart) ((MimeMultipart) fakeMailContext.getSentMails() + .get(0).getMsg().getContent()).getBodyPart(1); + assertThat(dsnPart.getContentType()).startsWith("message/global-delivery-status"); + String body = IOUtils.toString((SharedByteArrayInputStream) dsnPart.getContent(), StandardCharsets.UTF_8); + assertThat(body).contains("Final-Recipient: utf-8; grå@example.com"); + } + + @Test + void dsnForUtf8DomainOnlyShouldUseUtf8AddrTypeAndGlobalContentType() throws Exception { + FakeMailetConfig mailetConfig = FakeMailetConfig.builder() + .mailetName(MAILET_NAME) + .mailetContext(fakeMailContext) + .build(); + dsnBounce.init(mailetConfig); + + FakeMail mail = FakeMail.builder() + .name(MAILET_NAME) + .sender(new MailAddress("sender@example.com")) + .attribute(DELIVERY_ERROR_ATTRIBUTE) + .mimeMessage(MimeMessageBuilder.mimeMessageBuilder().setText("body")) + .recipient("arnt@grå.org") + .lastUpdated(Date.from(Instant.parse("2026-04-27T10:00:00.000Z"))) + .remoteAddr("remoteHost") + .build(); + + dsnBounce.service(mail); + + BodyPart dsnPart = (BodyPart) ((MimeMultipart) fakeMailContext.getSentMails() + .get(0).getMsg().getContent()).getBodyPart(1); + assertThat(dsnPart.getContentType()).startsWith("message/global-delivery-status"); + String body = IOUtils.toString((SharedByteArrayInputStream) dsnPart.getContent(), StandardCharsets.UTF_8); + assertThat(body).contains("Final-Recipient: utf-8; arnt@grå.org"); + } + } } \ No newline at end of file From e1bde2f5da675312f9e51a9d2b3ac6a523893e74 Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Fri, 24 Apr 2026 13:47:46 +0200 Subject: [PATCH 12/18] Add SMTP server support for RFC 6531. --- .../api/AbstractProtocolTransport.java | 7 +- .../smtp/SMTPProtocolHandlerChain.java | 2 + .../james/protocols/smtp/SMTPSession.java | 2 + .../protocols/smtp/core/MailCmdHandler.java | 21 ++- .../protocols/smtp/core/RcptCmdHandler.java | 19 ++ .../smtp/core/esmtp/SMTPUTF8Extension.java | 61 ++++++ .../james/protocols/smtp/dsn/DSNStatus.java | 5 + .../smtp/AbstractSMTPSServerTest.java | 26 ++- .../smtp/AbstractSMTPServerTest.java | 177 +++++++++++++++++- 9 files changed, 313 insertions(+), 7 deletions(-) create mode 100644 protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/esmtp/SMTPUTF8Extension.java diff --git a/protocols/api/src/main/java/org/apache/james/protocols/api/AbstractProtocolTransport.java b/protocols/api/src/main/java/org/apache/james/protocols/api/AbstractProtocolTransport.java index a3ef7eb8f4f..fa4f63d2aa6 100644 --- a/protocols/api/src/main/java/org/apache/james/protocols/api/AbstractProtocolTransport.java +++ b/protocols/api/src/main/java/org/apache/james/protocols/api/AbstractProtocolTransport.java @@ -85,7 +85,12 @@ protected static byte[] toBytes(Response response) { builder.append(CRLF); } } - return builder.toString().getBytes(StandardCharsets.US_ASCII); + // RFC 6531 §3.7.4.2: when a server echoes a UTF-8 mailbox address back + // to the client, those octets are UTF-8; all other reply content stays + // ASCII. UTF-8 is a strict superset of ASCII, so encoding the whole + // reply in UTF-8 preserves ASCII-only replies byte-for-byte while + // allowing non-ASCII addresses to survive the echo. + return builder.toString().getBytes(StandardCharsets.UTF_8); } /** diff --git a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/SMTPProtocolHandlerChain.java b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/SMTPProtocolHandlerChain.java index 098d1a97bd1..99aced4f27e 100644 --- a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/SMTPProtocolHandlerChain.java +++ b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/SMTPProtocolHandlerChain.java @@ -48,6 +48,7 @@ import org.apache.james.protocols.smtp.core.esmtp.AuthCmdHandler; import org.apache.james.protocols.smtp.core.esmtp.EhloCmdHandler; import org.apache.james.protocols.smtp.core.esmtp.MailSizeEsmtpExtension; +import org.apache.james.protocols.smtp.core.esmtp.SMTPUTF8Extension; import org.apache.james.protocols.smtp.core.esmtp.StartTlsCmdHandler; import org.apache.james.protocols.smtp.hook.AuthHook; import org.apache.james.protocols.smtp.hook.Hook; @@ -99,6 +100,7 @@ protected List initDefaultHandlers() { defaultHandlers.add(new VrfyCmdHandler()); defaultHandlers.add(new DataCmdHandler(metricFactory)); defaultHandlers.add(new MailSizeEsmtpExtension()); + defaultHandlers.add(new SMTPUTF8Extension()); defaultHandlers.add(new WelcomeMessageHandler()); defaultHandlers.add(new PostmasterAbuseRcptHook()); defaultHandlers.add(new ReceivedDataLineFilter()); diff --git a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/SMTPSession.java b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/SMTPSession.java index 960bab9085e..9a437e1bcf5 100644 --- a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/SMTPSession.java +++ b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/SMTPSession.java @@ -44,6 +44,8 @@ public interface SMTPSession extends ProtocolSession { /** HELO or EHLO */ AttachmentKey CURRENT_HELO_MODE = AttachmentKey.of("CURRENT_HELO_MODE", String.class); AttachmentKey CURRENT_HELO_NAME = AttachmentKey.of("CURRENT_HELO_NAME", String.class); + /** Set per-transaction when the client asserted the RFC 6531 SMTPUTF8 parameter on MAIL FROM. */ + AttachmentKey SMTPUTF8_REQUESTED = AttachmentKey.of("SMTPUTF8_REQUESTED", Boolean.class); /** * Returns the service wide configuration diff --git a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/MailCmdHandler.java b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/MailCmdHandler.java index 1b46315dcaa..7883c713aa2 100644 --- a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/MailCmdHandler.java +++ b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/MailCmdHandler.java @@ -72,6 +72,10 @@ public class MailCmdHandler extends AbstractHookableCmdHandler { DSNStatus.getStatus(DSNStatus.PERMANENT, DSNStatus.ADDRESS_SYNTAX_SENDER) + " Syntax error in sender address").immutable(); + /** RFC 6531 §4.2: 553 5.6.7 when a non-ASCII sender is given without SMTPUTF8. */ + private static final Response NON_ASCII_SENDER_WITHOUT_SMTPUTF8 = new SMTPResponse(SMTPRetCode.SYNTAX_ERROR_MAILBOX, + DSNStatus.getStatus(DSNStatus.PERMANENT, DSNStatus.CONTENT_NON_ASCII_ADDR) + + " Non-ASCII addresses not permitted without SMTPUTF8").immutable(); /** * A map of parameterHooks */ @@ -203,8 +207,14 @@ private Response doMAILFilter(SMTPSession session, String argument) { LOGGER.info("Error parsing sender address: {}: did not start and end with < >", sender); return SYNTAX_ERROR; } + String senderAddressString = removeBrackets(sender); + if (containsNonAscii(senderAddressString) + && !session.getAttachment(SMTPSession.SMTPUTF8_REQUESTED, State.Transaction).orElse(Boolean.FALSE)) { + LOGGER.info("Rejected non-ASCII sender address without SMTPUTF8: {}", sender); + return NON_ASCII_SENDER_WITHOUT_SMTPUTF8; + } try { - MaybeSender senderAddress = toMaybeSender(removeBrackets(sender)); + MaybeSender senderAddress = toMaybeSender(senderAddressString); // Store the senderAddress in session map session.setAttachment(SMTPSession.SENDER, senderAddress, State.Transaction); } catch (Exception pe) { @@ -227,6 +237,15 @@ private MaybeSender toMaybeSender(String senderAsString) throws AddressException appendDefaultDomainIfNeeded(senderAsString))); } + private static boolean containsNonAscii(String s) { + for (int i = 0; i < s.length(); i++) { + if (s.charAt(i) > 0x7F) { + return true; + } + } + return false; + } + private String removeBrackets(String input) { if (input.startsWith("<") && input.endsWith(">")) { // Remove < and > diff --git a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/RcptCmdHandler.java b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/RcptCmdHandler.java index 1ce041ff911..167ad6528b0 100644 --- a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/RcptCmdHandler.java +++ b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/RcptCmdHandler.java @@ -62,6 +62,10 @@ public class RcptCmdHandler extends AbstractHookableCmdHandler impleme private static final Response SYNTAX_ERROR_ARGS = new SMTPResponse(SMTPRetCode.SYNTAX_ERROR_ARGUMENTS, DSNStatus.getStatus(DSNStatus.PERMANENT, DSNStatus.DELIVERY_SYNTAX) + " Usage: RCPT TO:").immutable(); private static final Response SYNTAX_ERROR_DELIVERY = new SMTPResponse(SMTPRetCode.SYNTAX_ERROR_ARGUMENTS, DSNStatus.getStatus(DSNStatus.PERMANENT, DSNStatus.DELIVERY_SYNTAX) + " Syntax error in parameters or arguments").immutable(); private static final Response SYNTAX_ERROR_ADDRESS = new SMTPResponse(SMTPRetCode.SYNTAX_ERROR_MAILBOX, DSNStatus.getStatus(DSNStatus.PERMANENT, DSNStatus.ADDRESS_SYNTAX) + " Syntax error in recipient address").immutable(); + /** RFC 6531 §4.2: 553 5.6.7 when a non-ASCII recipient is given without SMTPUTF8. */ + private static final Response NON_ASCII_RECIPIENT_WITHOUT_SMTPUTF8 = new SMTPResponse(SMTPRetCode.SYNTAX_ERROR_MAILBOX, + DSNStatus.getStatus(DSNStatus.PERMANENT, DSNStatus.CONTENT_NON_ASCII_ADDR) + + " Non-ASCII addresses not permitted without SMTPUTF8").immutable(); @Inject public RcptCmdHandler(MetricFactory metricFactory) { @@ -153,6 +157,12 @@ protected Response doFilterChecks(SMTPSession session, String command, + getDefaultDomain(); } + if (containsNonAscii(recipient) + && !session.getAttachment(SMTPSession.SMTPUTF8_REQUESTED, State.Transaction).orElse(Boolean.FALSE)) { + LOGGER.info("Rejected non-ASCII recipient address without SMTPUTF8: {}", recipient); + return NON_ASCII_RECIPIENT_WITHOUT_SMTPUTF8; + } + try { recipientAddress = new MailAddress(recipient); } catch (Exception pe) { @@ -195,6 +205,15 @@ protected Response doFilterChecks(SMTPSession session, String command, return null; } + private static boolean containsNonAscii(String s) { + for (int i = 0; i < s.length(); i++) { + if (s.charAt(i) > 0x7F) { + return true; + } + } + return false; + } + private String getContext(SMTPSession session, MailAddress recipientAddress, String recipient) { StringBuilder sb = new StringBuilder(128); if (null != recipientAddress) { diff --git a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/esmtp/SMTPUTF8Extension.java b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/esmtp/SMTPUTF8Extension.java new file mode 100644 index 00000000000..c54a2219b44 --- /dev/null +++ b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/esmtp/SMTPUTF8Extension.java @@ -0,0 +1,61 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.protocols.smtp.core.esmtp; + +import java.util.Collections; +import java.util.List; + +import org.apache.james.protocols.api.ProtocolSession.State; +import org.apache.james.protocols.smtp.SMTPSession; +import org.apache.james.protocols.smtp.hook.HookResult; +import org.apache.james.protocols.smtp.hook.MailParametersHook; + +/** + * RFC 6531 SMTPUTF8 extension. + * + * Advertises the {@code SMTPUTF8} EHLO keyword and parses the {@code SMTPUTF8} + * parameter on {@code MAIL FROM}. The parameter takes no value; its presence + * on a transaction authorises the use of UTF-8 in the envelope addresses. + * + * Gating of UTF-8 addresses themselves lives in {@code MailCmdHandler} / + * {@code RcptCmdHandler}, which reject non-ASCII addresses with 553 5.6.7 + * when {@link SMTPSession#SMTPUTF8_REQUESTED} is not set. + */ +public class SMTPUTF8Extension implements MailParametersHook, EhloExtension { + + private static final String[] MAIL_PARAMS = { "SMTPUTF8" }; + private static final List FEATURES = Collections.singletonList("SMTPUTF8"); + + @Override + public HookResult doMailParameter(SMTPSession session, String paramName, String paramValue) { + session.setAttachment(SMTPSession.SMTPUTF8_REQUESTED, Boolean.TRUE, State.Transaction); + return null; + } + + @Override + public String[] getMailParamNames() { + return MAIL_PARAMS; + } + + @Override + public List getImplementedEsmtpFeatures(SMTPSession session) { + return FEATURES; + } +} diff --git a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/dsn/DSNStatus.java b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/dsn/DSNStatus.java index 47fc509a44a..55706ef39c8 100644 --- a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/dsn/DSNStatus.java +++ b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/dsn/DSNStatus.java @@ -290,6 +290,11 @@ public class DSNStatus { */ public static final String CONTENT_CONVERSION_FAILED = "6.5"; + /** + * Non-ASCII addresses not permitted for that sender/recipient (RFC 6531) + */ + public static final String CONTENT_NON_ASCII_ADDR = "6.7"; + /** * Security or Policy Status diff --git a/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPSServerTest.java b/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPSServerTest.java index 442a3ec87ff..16bf96c9331 100644 --- a/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPSServerTest.java +++ b/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPSServerTest.java @@ -25,11 +25,13 @@ import org.apache.james.protocols.api.utils.BogusSslContextFactory; import org.apache.james.protocols.api.utils.BogusTrustManagerFactory; import org.apache.james.protocols.netty.Encryption; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; public abstract class AbstractSMTPSServerTest extends AbstractSMTPServerTest { - - + + @Override protected SMTPClient createClient() { SMTPSClient client = new SMTPSClient(true,BogusSslContextFactory.getClientContext()); @@ -37,11 +39,27 @@ protected SMTPClient createClient() { return client; } - + @Override protected ProtocolServer createServer(Protocol protocol) { return createEncryptedServer(protocol, Encryption.createTls(BogusSslContextFactory.getServerContext())); } - + protected abstract ProtocolServer createEncryptedServer(Protocol protocol, Encryption enc); + + // The UTF-8 "accepted" tests use a raw TCP socket to control bytes on the + // wire. That does not speak TLS, so skip those cases under SMTPS — the plain + // variant covers the server-side logic. Rejection tests still work under + // SMTPS via the regular SMTPClient path. + @Override + @Test + @Disabled("Raw-socket UTF-8 test is not SSL-aware; covered by NettySMTPServerTest") + void mailFromWithNonAsciiSenderShouldBeAcceptedWhenSmtpUtf8IsAsserted() { + } + + @Override + @Test + @Disabled("Raw-socket UTF-8 test is not SSL-aware; covered by NettySMTPServerTest") + void rcptToWithNonAsciiRecipientShouldBeAcceptedWhenSmtpUtf8IsAsserted() { + } } diff --git a/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPServerTest.java b/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPServerTest.java index 71d8979c16d..a7ff4d5fbbf 100644 --- a/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPServerTest.java +++ b/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPServerTest.java @@ -984,5 +984,180 @@ protected static void checkEnvelope(MailEnvelope env, String sender, List"); + + assertThat(client.getReplyCode()).isEqualTo(553); + assertThat(client.getReplyString()).contains("5.6.7"); + + client.quit(); + client.disconnect(); + } finally { + if (server != null) { + server.unbind(); + } + } + } + + @Test + void mailFromWithNonAsciiSenderShouldBeAcceptedWhenSmtpUtf8IsAsserted() throws Exception { + ProtocolServer server = null; + try { + server = createServer(createProtocol(new TestMessageHook())); + server.bind(); + InetSocketAddress bindedAddress = new ProtocolServerUtils(server).retrieveBindedAddress(); + + String reply = rawUtf8Exchange(bindedAddress, + "EHLO localhost\r\n", + "MAIL FROM: SMTPUTF8\r\n", + "QUIT\r\n"); + + // RFC 6531 §3.7.4.2: the server echoes the UTF-8 sender address + // back unmodified. + assertThat(reply).contains("250 2.1.0 Sender OK"); + } finally { + if (server != null) { + server.unbind(); + } + } + } + + @Test + void rcptToWithNonAsciiRecipientShouldBeRejectedWhenSmtpUtf8NotAsserted() throws Exception { + ProtocolServer server = null; + try { + server = createServer(createProtocol(new TestMessageHook())); + server.bind(); + + SMTPClient client = createClient(); + InetSocketAddress bindedAddress = new ProtocolServerUtils(server).retrieveBindedAddress(); + client.connect(bindedAddress.getAddress().getHostAddress(), bindedAddress.getPort()); + client.sendCommand("EHLO", "localhost"); + client.sendCommand("MAIL", "FROM:<" + SENDER + ">"); + assertThat(SMTPReply.isPositiveCompletion(client.getReplyCode())) + .as("Reply=" + client.getReplyString()).isTrue(); + + client.sendCommand("RCPT", "TO:"); + + assertThat(client.getReplyCode()).isEqualTo(553); + assertThat(client.getReplyString()).contains("5.6.7"); + + client.quit(); + client.disconnect(); + } finally { + if (server != null) { + server.unbind(); + } + } + } + + @Test + void rcptToWithNonAsciiRecipientShouldBeAcceptedWhenSmtpUtf8IsAsserted() throws Exception { + ProtocolServer server = null; + try { + server = createServer(createProtocol(new TestMessageHook())); + server.bind(); + InetSocketAddress bindedAddress = new ProtocolServerUtils(server).retrieveBindedAddress(); + + String reply = rawUtf8Exchange(bindedAddress, + "EHLO localhost\r\n", + "MAIL FROM:<" + SENDER + "> SMTPUTF8\r\n", + "RCPT TO:\r\n", + "QUIT\r\n"); + + // RFC 6531 §3.7.4.2: the server echoes the UTF-8 recipient address + // back unmodified. + assertThat(reply).contains("250 2.1.5 Recipient OK"); + } finally { + if (server != null) { + server.unbind(); + } + } + } + + /** + * Write all {@code commands} verbatim in UTF-8 and return the concatenated + * server response as one UTF-8 decoded string. Reads until the server + * closes the socket (which it does on QUIT). + */ + private String rawUtf8Exchange(InetSocketAddress address, String... commands) throws IOException { + try (java.net.Socket socket = new java.net.Socket(address.getAddress().getHostAddress(), address.getPort())) { + socket.getOutputStream().write(String.join("", commands).getBytes(StandardCharsets.UTF_8)); + socket.getOutputStream().flush(); + java.io.ByteArrayOutputStream collected = new java.io.ByteArrayOutputStream(); + byte[] buf = new byte[4096]; + int n; + while ((n = socket.getInputStream().read(buf)) > 0) { + collected.write(buf, 0, n); + } + return collected.toString(StandardCharsets.UTF_8); + } + } + + @Test + void asciiAddressesShouldStillWorkWithoutSmtpUtf8() throws Exception { + ProtocolServer server = null; + try { + server = createServer(createProtocol(new TestMessageHook())); + server.bind(); + + SMTPClient client = createClient(); + InetSocketAddress bindedAddress = new ProtocolServerUtils(server).retrieveBindedAddress(); + client.connect(bindedAddress.getAddress().getHostAddress(), bindedAddress.getPort()); + client.sendCommand("EHLO", "localhost"); + + client.sendCommand("MAIL", "FROM:"); + assertThat(SMTPReply.isPositiveCompletion(client.getReplyCode())) + .as("Reply=" + client.getReplyString()).isTrue(); + + client.sendCommand("RCPT", "TO:"); + assertThat(SMTPReply.isPositiveCompletion(client.getReplyCode())) + .as("Reply=" + client.getReplyString()).isTrue(); + + client.quit(); + client.disconnect(); + } finally { + if (server != null) { + server.unbind(); + } + } + } + } From 106201715b3c7f726f6f54a151571920870dddc2 Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Fri, 24 Apr 2026 14:11:12 +0200 Subject: [PATCH 13/18] Unify ACE (foo@xn--...) to Unicode in the SMTP server. Application-layer code often assumes that addresses can be compared using String.equalsIgnoreCase(), and some also uses regular expressions or substring matching on addresses. This commit provides addresses to upper-layer code in their UTF8 form, so that kind of code continues to work. This might also have security implications: If upper-layer code can be confused about whether two addresses are the same, that sounds as if an attacker could exploit the confusion. This change should block the possibility. --- .../james/protocols/smtp/SMTPSession.java | 4 + .../smtp/core/AddressNormalization.java | 65 +++++++++ .../protocols/smtp/core/MailCmdHandler.java | 18 ++- .../protocols/smtp/core/RcptCmdHandler.java | 18 ++- .../smtp/AbstractSMTPSServerTest.java | 30 ++++ .../smtp/AbstractSMTPServerTest.java | 132 ++++++++++++++++++ 6 files changed, 265 insertions(+), 2 deletions(-) create mode 100644 protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/AddressNormalization.java diff --git a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/SMTPSession.java b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/SMTPSession.java index 9a437e1bcf5..ee93690cce2 100644 --- a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/SMTPSession.java +++ b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/SMTPSession.java @@ -46,6 +46,10 @@ public interface SMTPSession extends ProtocolSession { AttachmentKey CURRENT_HELO_NAME = AttachmentKey.of("CURRENT_HELO_NAME", String.class); /** Set per-transaction when the client asserted the RFC 6531 SMTPUTF8 parameter on MAIL FROM. */ AttachmentKey SMTPUTF8_REQUESTED = AttachmentKey.of("SMTPUTF8_REQUESTED", Boolean.class); + /** The sender address exactly as it arrived on the wire (no bracket removal, no IDN normalisation). Used for echoing back in responses. */ + AttachmentKey RAW_SENDER_STRING = AttachmentKey.of("RAW_SENDER_STRING", String.class); + /** The recipient currently being processed, in wire form. See {@link #RAW_SENDER_STRING}. */ + AttachmentKey RAW_CURRENT_RECIPIENT_STRING = AttachmentKey.of("RAW_CURRENT_RECIPIENT_STRING", String.class); /** * Returns the service wide configuration diff --git a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/AddressNormalization.java b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/AddressNormalization.java new file mode 100644 index 00000000000..fd633b44cf7 --- /dev/null +++ b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/AddressNormalization.java @@ -0,0 +1,65 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.protocols.smtp.core; + +import java.net.IDN; + +/** + * Address-string normalisation helpers shared by MAIL FROM and RCPT TO + * handling. Address validity proper lives in + * {@link org.apache.james.core.MailAddress}; this class is concerned only + * with the protocol-layer transforms. + */ +final class AddressNormalization { + + private AddressNormalization() { + } + + /** + * Convert any {@code xn--} labels (IDNA A-labels) in the domain part of + * {@code address} to their Unicode (U-label) form, leaving the local + * part untouched. Addresses without an {@code @} or without an + * {@code xn--} substring are returned unchanged. + * + * This runs regardless of whether the client declared SMTPUTF8, because + * an A-label-only address is purely ASCII on the wire and has always + * been valid SMTP. Storing the decoded U-label form lets upper layers + * reason about one canonical address. + * + * @throws IllegalArgumentException if any label still starts with + * {@code xn--} after {@link IDN#toUnicode(String, int)} — which + * indicates a malformed A-label that the IDN decoder could not + * interpret. + */ + static String aceLabelsToUnicode(String address) { + int at = address.lastIndexOf('@'); + if (at < 0 || !address.substring(at + 1).contains("xn--")) { + return address; + } + String localPart = address.substring(0, at); + String domain = address.substring(at + 1); + String unicodeDomain = IDN.toUnicode(domain, IDN.ALLOW_UNASSIGNED); + if (unicodeDomain.startsWith("xn--") || unicodeDomain.contains(".xn--")) { + throw new IllegalArgumentException( + "Malformed A-label in domain: " + domain); + } + return localPart + "@" + unicodeDomain; + } +} diff --git a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/MailCmdHandler.java b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/MailCmdHandler.java index 7883c713aa2..eb61891db5f 100644 --- a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/MailCmdHandler.java +++ b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/MailCmdHandler.java @@ -76,6 +76,9 @@ public class MailCmdHandler extends AbstractHookableCmdHandler { private static final Response NON_ASCII_SENDER_WITHOUT_SMTPUTF8 = new SMTPResponse(SMTPRetCode.SYNTAX_ERROR_MAILBOX, DSNStatus.getStatus(DSNStatus.PERMANENT, DSNStatus.CONTENT_NON_ASCII_ADDR) + " Non-ASCII addresses not permitted without SMTPUTF8").immutable(); + private static final Response INVALID_IDN_SENDER = new SMTPResponse(SMTPRetCode.SYNTAX_ERROR_ARGUMENTS, + DSNStatus.getStatus(DSNStatus.PERMANENT, DSNStatus.ADDRESS_SYNTAX_SENDER) + + " Invalid A-label (xn--) in sender domain").immutable(); /** * A map of parameterHooks */ @@ -108,11 +111,17 @@ public Response onCommand(SMTPSession session, Request request) { private Response doMAIL(SMTPSession session) { StringBuilder responseBuffer = new StringBuilder(); MaybeSender sender = session.getAttachment(SMTPSession.SENDER, State.Transaction).orElse(MaybeSender.nullSender()); + // Echo the sender back in the exact form the client sent it. RFC 6531 + // §3.7.4.2 restricts server responses to ASCII unless SMTPUTF8 is + // asserted, and also lets us preserve the client's choice of + // A-label (xn--) vs U-label when they sent ACE form. + String echo = session.getAttachment(SMTPSession.RAW_SENDER_STRING, State.Transaction) + .orElse(sender.asString()); responseBuffer.append( DSNStatus.getStatus(DSNStatus.SUCCESS, DSNStatus.ADDRESS_OTHER)) .append(" Sender <"); if (!sender.isNullSender()) { - responseBuffer.append(sender.asString()); + responseBuffer.append(echo); } responseBuffer.append("> OK"); @@ -208,11 +217,18 @@ private Response doMAILFilter(SMTPSession session, String argument) { return SYNTAX_ERROR; } String senderAddressString = removeBrackets(sender); + session.setAttachment(SMTPSession.RAW_SENDER_STRING, senderAddressString, State.Transaction); if (containsNonAscii(senderAddressString) && !session.getAttachment(SMTPSession.SMTPUTF8_REQUESTED, State.Transaction).orElse(Boolean.FALSE)) { LOGGER.info("Rejected non-ASCII sender address without SMTPUTF8: {}", sender); return NON_ASCII_SENDER_WITHOUT_SMTPUTF8; } + try { + senderAddressString = AddressNormalization.aceLabelsToUnicode(senderAddressString); + } catch (IllegalArgumentException e) { + LOGGER.info("Rejected sender address with invalid A-label: {}", sender); + return INVALID_IDN_SENDER; + } try { MaybeSender senderAddress = toMaybeSender(senderAddressString); // Store the senderAddress in session map diff --git a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/RcptCmdHandler.java b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/RcptCmdHandler.java index 167ad6528b0..b0685828aa9 100644 --- a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/RcptCmdHandler.java +++ b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/RcptCmdHandler.java @@ -66,6 +66,9 @@ public class RcptCmdHandler extends AbstractHookableCmdHandler impleme private static final Response NON_ASCII_RECIPIENT_WITHOUT_SMTPUTF8 = new SMTPResponse(SMTPRetCode.SYNTAX_ERROR_MAILBOX, DSNStatus.getStatus(DSNStatus.PERMANENT, DSNStatus.CONTENT_NON_ASCII_ADDR) + " Non-ASCII addresses not permitted without SMTPUTF8").immutable(); + private static final Response INVALID_IDN_RECIPIENT = new SMTPResponse(SMTPRetCode.SYNTAX_ERROR_ARGUMENTS, + DSNStatus.getStatus(DSNStatus.PERMANENT, DSNStatus.ADDRESS_SYNTAX) + + " Invalid A-label (xn--) in recipient domain").immutable(); @Inject public RcptCmdHandler(MetricFactory metricFactory) { @@ -91,10 +94,14 @@ protected Response doCoreCmd(SMTPSession session, String command, String paramet rcptColl.add(recipientAddress); session.setAttachment(SMTPSession.RCPT_LIST, rcptColl, State.Transaction); + // Echo the recipient back in the exact form the client sent it. See + // the matching comment in MailCmdHandler.doMAIL — RFC 6531 §3.7.4.2. + String echo = session.getAttachment(SMTPSession.RAW_CURRENT_RECIPIENT_STRING, State.Transaction) + .orElseGet(recipientAddress::asString); StringBuilder response = new StringBuilder(); String status = DSNStatus.getStatus(DSNStatus.SUCCESS, DSNStatus.ADDRESS_VALID); response.append(status) - .append(" Recipient <").append(recipientAddress).append("> OK"); + .append(" Recipient <").append(echo).append("> OK"); LOGGER.debug("RCPT TO {}", StringUtils.abbreviate(recipientAddress.asString(), 80)); @@ -157,12 +164,21 @@ protected Response doFilterChecks(SMTPSession session, String command, + getDefaultDomain(); } + session.setAttachment(SMTPSession.RAW_CURRENT_RECIPIENT_STRING, recipient, State.Transaction); + if (containsNonAscii(recipient) && !session.getAttachment(SMTPSession.SMTPUTF8_REQUESTED, State.Transaction).orElse(Boolean.FALSE)) { LOGGER.info("Rejected non-ASCII recipient address without SMTPUTF8: {}", recipient); return NON_ASCII_RECIPIENT_WITHOUT_SMTPUTF8; } + try { + recipient = AddressNormalization.aceLabelsToUnicode(recipient); + } catch (IllegalArgumentException e) { + LOGGER.info("Rejected recipient address with invalid A-label: {}", recipient); + return INVALID_IDN_RECIPIENT; + } + try { recipientAddress = new MailAddress(recipient); } catch (Exception pe) { diff --git a/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPSServerTest.java b/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPSServerTest.java index 16bf96c9331..e07b3562b04 100644 --- a/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPSServerTest.java +++ b/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPSServerTest.java @@ -62,4 +62,34 @@ void mailFromWithNonAsciiSenderShouldBeAcceptedWhenSmtpUtf8IsAsserted() { @Disabled("Raw-socket UTF-8 test is not SSL-aware; covered by NettySMTPServerTest") void rcptToWithNonAsciiRecipientShouldBeAcceptedWhenSmtpUtf8IsAsserted() { } + + @Override + @Test + @Disabled("Raw-socket test is not SSL-aware; covered by NettySMTPServerTest") + void mailFromWithAceLabelDomainShouldBeAcceptedWithoutSmtpUtf8() { + } + + @Override + @Test + @Disabled("Raw-socket test is not SSL-aware; covered by NettySMTPServerTest") + void rcptToWithAceLabelDomainShouldBeAcceptedWithoutSmtpUtf8() { + } + + @Override + @Test + @Disabled("Raw-socket test is not SSL-aware; covered by NettySMTPServerTest") + void mailFromWithMalformedAceLabelShouldBeRejected() { + } + + @Override + @Test + @Disabled("Raw-socket test is not SSL-aware; covered by NettySMTPServerTest") + void rcptToWithMalformedAceLabelShouldBeRejected() { + } + + @Override + @Test + @Disabled("Raw-socket test is not SSL-aware; covered by NettySMTPServerTest") + void aceLabelDomainsShouldBeExposedToHooksAsUnicode() { + } } diff --git a/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPServerTest.java b/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPServerTest.java index a7ff4d5fbbf..b02dd06f9e6 100644 --- a/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPServerTest.java +++ b/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/AbstractSMTPServerTest.java @@ -1112,6 +1112,138 @@ void rcptToWithNonAsciiRecipientShouldBeAcceptedWhenSmtpUtf8IsAsserted() throws } } + @Test + void aceLabelDomainsShouldBeExposedToHooksAsUnicode() throws Exception { + // Drive a full transaction with ACE-form addresses on the wire, then + // inspect the envelope that TestMessageHook captured: both sender and + // recipient should be in U-label form (grå.org), not the ACE form the + // client sent. + TestMessageHook hook = new TestMessageHook(); + ProtocolServer server = null; + try { + server = createServer(createProtocol(hook)); + server.bind(); + InetSocketAddress bindedAddress = new ProtocolServerUtils(server).retrieveBindedAddress(); + + rawUtf8Exchange(bindedAddress, + "EHLO localhost\r\n", + "MAIL FROM:\r\n", + "RCPT TO:\r\n", + "DATA\r\n", + MSG1 + "\r\n.\r\n", + "QUIT\r\n"); + + Iterator queued = hook.getQueued().iterator(); + assertThat(queued.hasNext()).isTrue(); + MailEnvelope env = queued.next(); + assertThat(env.getMaybeSender().asString()).isEqualTo("arnt@grå.org"); + assertThat(env.getRecipients()) + .extracting(MailAddress::asString) + .containsExactly("someone@grå.org"); + } finally { + if (server != null) { + server.unbind(); + } + } + } + + @Test + void mailFromWithAceLabelDomainShouldBeAcceptedWithoutSmtpUtf8() throws Exception { + // xn--gr-zia is the Punycode (A-label) form of "grå". The wire is pure + // ASCII, no SMTPUTF8 asserted. RFC 6531 §3.7.4.2 says the server + // response must stay ASCII in that case, so the echo preserves the + // ACE form the client sent — even though internally we store the + // U-label form (see aceLabelDomainsShouldBeExposedToHooksAsUnicode). + ProtocolServer server = null; + try { + server = createServer(createProtocol(new TestMessageHook())); + server.bind(); + InetSocketAddress bindedAddress = new ProtocolServerUtils(server).retrieveBindedAddress(); + + String reply = rawUtf8Exchange(bindedAddress, + "EHLO localhost\r\n", + "MAIL FROM:\r\n", + "QUIT\r\n"); + + assertThat(reply).contains("250 2.1.0 Sender OK"); + assertThat(reply).doesNotContain("grå"); + } finally { + if (server != null) { + server.unbind(); + } + } + } + + @Test + void rcptToWithAceLabelDomainShouldBeAcceptedWithoutSmtpUtf8() throws Exception { + ProtocolServer server = null; + try { + server = createServer(createProtocol(new TestMessageHook())); + server.bind(); + InetSocketAddress bindedAddress = new ProtocolServerUtils(server).retrieveBindedAddress(); + + String reply = rawUtf8Exchange(bindedAddress, + "EHLO localhost\r\n", + "MAIL FROM:<" + SENDER + ">\r\n", + "RCPT TO:\r\n", + "QUIT\r\n"); + + assertThat(reply).contains("250 2.1.5 Recipient OK"); + assertThat(reply).doesNotContain("grå"); + } finally { + if (server != null) { + server.unbind(); + } + } + } + + @Test + void mailFromWithMalformedAceLabelShouldBeRejected() throws Exception { + // "xn--" on its own is not a valid A-label; IDN.toUnicode leaves it + // unchanged, which we detect and reject with a specific error. + ProtocolServer server = null; + try { + server = createServer(createProtocol(new TestMessageHook())); + server.bind(); + InetSocketAddress bindedAddress = new ProtocolServerUtils(server).retrieveBindedAddress(); + + String reply = rawUtf8Exchange(bindedAddress, + "EHLO localhost\r\n", + "MAIL FROM:\r\n", + "QUIT\r\n"); + + assertThat(reply).contains("501"); + assertThat(reply).contains("Invalid A-label"); + } finally { + if (server != null) { + server.unbind(); + } + } + } + + @Test + void rcptToWithMalformedAceLabelShouldBeRejected() throws Exception { + ProtocolServer server = null; + try { + server = createServer(createProtocol(new TestMessageHook())); + server.bind(); + InetSocketAddress bindedAddress = new ProtocolServerUtils(server).retrieveBindedAddress(); + + String reply = rawUtf8Exchange(bindedAddress, + "EHLO localhost\r\n", + "MAIL FROM:<" + SENDER + ">\r\n", + "RCPT TO:\r\n", + "QUIT\r\n"); + + assertThat(reply).contains("501"); + assertThat(reply).contains("Invalid A-label"); + } finally { + if (server != null) { + server.unbind(); + } + } + } + /** * Write all {@code commands} verbatim in UTF-8 and return the concatenated * server response as one UTF-8 decoded string. Reads until the server From 20f077c0864b5aeff585b1ff37de4fe76abaff90 Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Fri, 24 Apr 2026 14:33:49 +0200 Subject: [PATCH 14/18] Add support for RFC 6531 (SMTPUTF8) to remote delivery. --- .../transport/mailets/RemoteDelivery.java | 15 ++ .../remote/delivery/MailDelivrerToHost.java | 30 ++++ .../remote/delivery/SmtpUtf8Strategy.java | 148 +++++++++++++++++ .../remote/delivery/SmtpUtf8StrategyTest.java | 153 ++++++++++++++++++ 4 files changed, 346 insertions(+) create mode 100644 server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/remote/delivery/SmtpUtf8Strategy.java create mode 100644 server/mailet/mailets/src/test/java/org/apache/james/transport/mailets/remote/delivery/SmtpUtf8StrategyTest.java diff --git a/server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/RemoteDelivery.java b/server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/RemoteDelivery.java index fabfeff6891..07381cb17ec 100644 --- a/server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/RemoteDelivery.java +++ b/server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/RemoteDelivery.java @@ -152,6 +152,21 @@ * or use the mail.smtps.ssl.checkserveridentity and mail.smtp.ssl.checkserveridentity javax properties for fine control.
* Read org.eclipse.angus.mail.smtp * for full information. + *
+ * SMTPUTF8 (RFC 6531): when the first MX we reach advertises + * SMTPUTF8 and the envelope contains any non-ASCII character, the + * extension is asserted on MAIL FROM and UTF-8 addresses flow through + * unchanged. When the MX lacks SMTPUTF8 and only the domain parts are + * non-ASCII, those domains are converted to their ACE (A-label, xn--) + * form for the envelope commands; the message headers themselves are + * still allowed to carry UTF-8 (Subject, display names, etc.). This can + * produce a transaction where RCPT TO carries punycode but the mail + * headers carry UTF-8 — that mismatch is a fair compromise that + * optimises handling on the receiver side: a receiver that cannot speak + * SMTPUTF8 still accepts the envelope it understands, and a receiver + * that can read UTF-8 headers (most do) gets them intact. When a local + * part is non-ASCII and the MX lacks SMTPUTF8 the transaction fails + * permanently — no lossless downgrade exists. */ public class RemoteDelivery extends GenericMailet { private static final Logger LOGGER = LoggerFactory.getLogger(RemoteDelivery.class); diff --git a/server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/remote/delivery/MailDelivrerToHost.java b/server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/remote/delivery/MailDelivrerToHost.java index 75efb77f755..503c1909e10 100644 --- a/server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/remote/delivery/MailDelivrerToHost.java +++ b/server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/remote/delivery/MailDelivrerToHost.java @@ -68,6 +68,7 @@ public class MailDelivrerToHost { public static final String BIT_MIME_8 = "8BITMIME"; public static final String REQUIRE_TLS = "REQUIRETLS"; public static final String STARTTLS = "STARTTLS"; + public static final String SMTPUTF8 = "SMTPUTF8"; public static final String MT_PRIORITY = "MT-PRIORITY"; public static final String MAIL_PRIORITY_ATTRIBUTE_NAME = "MAIL_PRIORITY"; private static final List supportedSmtpExtensionsList = List.of(MT_PRIORITY, STARTTLS); @@ -134,6 +135,27 @@ public ExecutionResult tryDeliveryToHost(Mail mail, Collection if (receiverDoesNotProvideNecessaryStartTls(mail, transport)) { return ExecutionResult.permanentFailure(new SendFailedException("Mail delivery failed; the receiving server does not support STARTTLS")); } + // We assume all MXes for a given destination domain advertise + // the same set of SMTP extensions; this decision is made once, + // on the first MX we reach, and we do not fall back to another + // MX hoping it might have different capabilities. + SmtpUtf8Strategy.Action utf8Action = SmtpUtf8Strategy.pick( + mail.getMaybeSender(), addr, transport.supportsExtension(SMTPUTF8)); + if (utf8Action == SmtpUtf8Strategy.Action.CANNOT_DOWNGRADE) { + return ExecutionResult.permanentFailure(new SendFailedException( + "Remote server does not advertise SMTPUTF8 but the envelope " + + "contains a non-ASCII local part that cannot be downgraded")); + } + if (utf8Action == SmtpUtf8Strategy.Action.DOWNGRADE_DOMAINS) { + addr = toAceDomains(addr); + props.put(inContext(session, "mail.smtp.from"), + SmtpUtf8Strategy.aceAddressString(mail.getMaybeSender().asString())); + } else if (utf8Action == SmtpUtf8Strategy.Action.USE_EXTENSION) { + // Enable Angus Mail's UTF-8 mode for this session; since the + // remote advertises SMTPUTF8, the transport will emit it on + // MAIL FROM and accept UTF-8 in the envelope. + session.getProperties().put("mail.mime.allowutf8", "true"); + } if (mail.dsnParameters().isPresent()) { sendDSNAwareEmail(mail, transport, addr); } else if (extensionsSupported(transport)) { @@ -236,6 +258,14 @@ private static boolean extensionsSupported(SMTPTransport transport) { return supportedSmtpExtensionsList.stream().anyMatch(transport::supportsExtension); } + private static Collection toAceDomains(Collection addr) throws MessagingException { + Collection out = new java.util.ArrayList<>(addr.size()); + for (InternetAddress a : addr) { + out.add(SmtpUtf8Strategy.toAceDomain(a)); + } + return out; + } + private static boolean receiverDoesNotProvideNecessaryStartTls(Mail mail, SMTPTransport transport) { return !transport.getLastServerResponse().contains(STARTTLS) && mail.attributesMap().containsKey(AttributeName.of(REQUIRE_TLS)) && diff --git a/server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/remote/delivery/SmtpUtf8Strategy.java b/server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/remote/delivery/SmtpUtf8Strategy.java new file mode 100644 index 00000000000..ced6183bcc0 --- /dev/null +++ b/server/mailet/mailets/src/main/java/org/apache/james/transport/mailets/remote/delivery/SmtpUtf8Strategy.java @@ -0,0 +1,148 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.transport.mailets.remote.delivery; + +import java.net.IDN; +import java.util.Collection; + +import jakarta.mail.internet.AddressException; +import jakarta.mail.internet.InternetAddress; + +import org.apache.james.core.MaybeSender; + +/** + * Picks a relaying strategy for the RFC 6531 SMTPUTF8 extension based on + * what the remote MX advertises and what's in the envelope. We assume all + * MXes for a single destination domain advertise the same extensions; if + * the first MX we try lacks SMTPUTF8 we don't retry subsequent ones hoping + * they'll differ. + */ +public final class SmtpUtf8Strategy { + + public enum Action { + /** No envelope address has non-ASCII characters; deliver as-is. */ + NO_UTF8_NEEDED, + /** Some envelope address has non-ASCII characters and the remote + * advertises SMTPUTF8; deliver as-is and assert SMTPUTF8. */ + USE_EXTENSION, + /** Remote lacks SMTPUTF8 but all non-ASCII lives in the domain + * part, which can be downgraded to ACE (A-label, xn--) form. */ + DOWNGRADE_DOMAINS, + /** Remote lacks SMTPUTF8 and at least one local part is non-ASCII, + * so no lossless downgrade exists. Caller should fail the + * transaction the same way it fails a SIZE overflow. */ + CANNOT_DOWNGRADE + } + + private SmtpUtf8Strategy() { + } + + public static Action pick(MaybeSender sender, + Collection recipients, + boolean remoteSupportsSmtpUtf8) { + boolean nonAsciiLocalPart = hasNonAsciiLocalPart(sender, recipients); + boolean nonAsciiDomain = hasNonAsciiDomain(sender, recipients); + + if (!nonAsciiLocalPart && !nonAsciiDomain) { + return Action.NO_UTF8_NEEDED; + } + if (remoteSupportsSmtpUtf8) { + return Action.USE_EXTENSION; + } + if (nonAsciiLocalPart) { + return Action.CANNOT_DOWNGRADE; + } + return Action.DOWNGRADE_DOMAINS; + } + + /** + * Returns a copy of {@code address} with its domain converted to ACE + * (A-label) form via {@link IDN#toASCII}. Passing an already-ASCII + * domain through this is a no-op, so callers don't need to check. + * + * @throws AddressException if the address has no {@code @} + */ + public static InternetAddress toAceDomain(InternetAddress address) throws AddressException { + String asString = address.getAddress(); + int at = asString.lastIndexOf('@'); + if (at < 0) { + throw new AddressException("Address has no @: " + asString); + } + String localPart = asString.substring(0, at); + String domain = asString.substring(at + 1); + // InternetAddress(String) parses strictly; bypass via setAddress so + // we don't reject local parts we're only passing through unchanged. + InternetAddress result = new InternetAddress(); + result.setAddress(localPart + "@" + IDN.toASCII(domain, IDN.ALLOW_UNASSIGNED)); + return result; + } + + /** ACE form of the string address. See {@link #toAceDomain}. */ + public static String aceAddressString(String address) { + int at = address.lastIndexOf('@'); + if (at < 0) { + return address; + } + return address.substring(0, at + 1) + + IDN.toASCII(address.substring(at + 1), IDN.ALLOW_UNASSIGNED); + } + + private static boolean hasNonAsciiLocalPart(MaybeSender sender, + Collection recipients) { + if (!sender.isNullSender() + && containsNonAscii(sender.asString().substring(0, Math.max(0, sender.asString().lastIndexOf('@'))))) { + return true; + } + for (InternetAddress a : recipients) { + int at = a.getAddress().lastIndexOf('@'); + String localPart = at < 0 ? a.getAddress() : a.getAddress().substring(0, at); + if (containsNonAscii(localPart)) { + return true; + } + } + return false; + } + + private static boolean hasNonAsciiDomain(MaybeSender sender, + Collection recipients) { + if (!sender.isNullSender()) { + int at = sender.asString().lastIndexOf('@'); + if (at >= 0 && containsNonAscii(sender.asString().substring(at + 1))) { + return true; + } + } + for (InternetAddress a : recipients) { + int at = a.getAddress().lastIndexOf('@'); + if (at >= 0 && containsNonAscii(a.getAddress().substring(at + 1))) { + return true; + } + } + return false; + } + + private static boolean containsNonAscii(String s) { + for (int i = 0; i < s.length(); i++) { + if (s.charAt(i) > 0x7F) { + return true; + } + } + return false; + } +} diff --git a/server/mailet/mailets/src/test/java/org/apache/james/transport/mailets/remote/delivery/SmtpUtf8StrategyTest.java b/server/mailet/mailets/src/test/java/org/apache/james/transport/mailets/remote/delivery/SmtpUtf8StrategyTest.java new file mode 100644 index 00000000000..a302dd09ce8 --- /dev/null +++ b/server/mailet/mailets/src/test/java/org/apache/james/transport/mailets/remote/delivery/SmtpUtf8StrategyTest.java @@ -0,0 +1,153 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.transport.mailets.remote.delivery; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.List; + +import jakarta.mail.internet.InternetAddress; + +import org.apache.james.core.MailAddress; +import org.apache.james.core.MaybeSender; +import org.junit.jupiter.api.Test; + +class SmtpUtf8StrategyTest { + + private static MaybeSender sender(String addr) throws Exception { + return MaybeSender.of(new MailAddress(addr)); + } + + private static List rcpts(String... addrs) throws Exception { + List out = new java.util.ArrayList<>(); + for (String a : addrs) { + InternetAddress ia = new InternetAddress(); + ia.setAddress(a); + out.add(ia); + } + return out; + } + + @Test + void allAsciiShouldNotNeedUtf8() throws Exception { + assertThat(SmtpUtf8Strategy.pick( + sender("arnt@example.com"), + rcpts("info@example.com"), + /* remoteSupportsSmtpUtf8 */ false)) + .isEqualTo(SmtpUtf8Strategy.Action.NO_UTF8_NEEDED); + } + + @Test + void unicodeDomainWithSmtpUtf8ShouldUseExtension() throws Exception { + assertThat(SmtpUtf8Strategy.pick( + sender("arnt@grå.org"), + rcpts("info@grå.org"), + true)) + .isEqualTo(SmtpUtf8Strategy.Action.USE_EXTENSION); + } + + @Test + void unicodeLocalPartWithSmtpUtf8ShouldUseExtension() throws Exception { + assertThat(SmtpUtf8Strategy.pick( + sender("grå@example.com"), + rcpts("info@example.com"), + true)) + .isEqualTo(SmtpUtf8Strategy.Action.USE_EXTENSION); + } + + @Test + void unicodeDomainWithoutSmtpUtf8ShouldDowngradeDomains() throws Exception { + // ASCII local parts everywhere — we can ACE-encode the domain(s) + // and send RFC 5321-clean envelope commands. + assertThat(SmtpUtf8Strategy.pick( + sender("arnt@grå.org"), + rcpts("info@münchen.de"), + false)) + .isEqualTo(SmtpUtf8Strategy.Action.DOWNGRADE_DOMAINS); + } + + @Test + void unicodeLocalPartWithoutSmtpUtf8ShouldFailTransaction() throws Exception { + assertThat(SmtpUtf8Strategy.pick( + sender("grå@example.com"), + rcpts("info@example.com"), + false)) + .isEqualTo(SmtpUtf8Strategy.Action.CANNOT_DOWNGRADE); + } + + @Test + void nonAsciiInOnlyOneRecipientShouldStillTriggerAction() throws Exception { + assertThat(SmtpUtf8Strategy.pick( + sender("arnt@example.com"), + rcpts("info@example.com", "गोरिल@उदाहरण.भारत"), + false)) + .isEqualTo(SmtpUtf8Strategy.Action.CANNOT_DOWNGRADE); + } + + @Test + void nullSenderWithAsciiRecipientShouldNotNeedUtf8() throws Exception { + // Bounce path: MAIL FROM:<>. Only recipients matter. + assertThat(SmtpUtf8Strategy.pick( + MaybeSender.nullSender(), + rcpts("info@example.com"), + false)) + .isEqualTo(SmtpUtf8Strategy.Action.NO_UTF8_NEEDED); + } + + @Test + void nullSenderWithUnicodeRecipientShouldFollowRecipient() throws Exception { + assertThat(SmtpUtf8Strategy.pick( + MaybeSender.nullSender(), + rcpts("arnt@grå.org"), + true)) + .isEqualTo(SmtpUtf8Strategy.Action.USE_EXTENSION); + } + + @Test + void toAceDomainShouldConvertUnicodeDomain() throws Exception { + InternetAddress input = new InternetAddress(); + input.setAddress("arnt@grå.org"); + InternetAddress converted = SmtpUtf8Strategy.toAceDomain(input); + assertThat(converted.getAddress()).isEqualTo("arnt@xn--gr-zia.org"); + } + + @Test + void toAceDomainShouldLeaveAsciiDomainUntouched() throws Exception { + InternetAddress input = new InternetAddress(); + input.setAddress("arnt@example.com"); + InternetAddress converted = SmtpUtf8Strategy.toAceDomain(input); + assertThat(converted.getAddress()).isEqualTo("arnt@example.com"); + } + + @Test + void aceAddressStringShouldConvertDomainButPreserveLocalPart() { + // Local part "grå" is kept verbatim — this helper is only for the + // downgrade path, where callers have already confirmed the local + // part is ASCII. Preserving whatever local part arrived is the + // right contract. + assertThat(SmtpUtf8Strategy.aceAddressString("arnt@grå.org")) + .isEqualTo("arnt@xn--gr-zia.org"); + } + + @Test + void aceAddressStringShouldPreserveNullSender() { + assertThat(SmtpUtf8Strategy.aceAddressString("")).isEqualTo(""); + } +} From 82c515503dd4ac8c93ad0cfaa5bd87371cbbad17 Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Mon, 27 Apr 2026 11:43:47 +0200 Subject: [PATCH 15/18] Normalise addresses to NFC, per RFC 6532 3.1. RFC 6532 says we SHOULD do this and JAMES is generally very careful, so I did this as well. --- .../org/apache/james/core/MailAddress.java | 9 ++- .../apache/james/core/MailAddressTest.java | 55 +++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/james/core/MailAddress.java b/core/src/main/java/org/apache/james/core/MailAddress.java index 3be44aa8426..74d69db97e8 100644 --- a/core/src/main/java/org/apache/james/core/MailAddress.java +++ b/core/src/main/java/org/apache/james/core/MailAddress.java @@ -20,6 +20,7 @@ package org.apache.james.core; import java.net.IDN; +import java.text.Normalizer; import java.util.Locale; import java.util.Objects; import java.util.Optional; @@ -173,7 +174,13 @@ private int stripSourceRoute(String address, int pos) { * @throws AddressException if the parse failed */ public MailAddress(String address) throws AddressException { - address = address.trim(); + // RFC 6532 §3.1 recommends NFC normalisation. Canonically-equivalent + // Unicode strings (for example U+00E9 vs U+0065 U+0301 — both render + // as "é") then produce equal MailAddress objects with equal hashCode + // values, which dedup, alias resolution and routing-table lookups + // rely on. NFC is a no-op for pure ASCII, so ASCII addresses are + // unaffected. + address = Normalizer.normalize(address.trim(), Normalizer.Form.NFC); int pos = 0; // Test if mail address has source routing information (RFC-821) and get rid of it!! diff --git a/core/src/test/java/org/apache/james/core/MailAddressTest.java b/core/src/test/java/org/apache/james/core/MailAddressTest.java index e9874c9800f..8d61b396567 100644 --- a/core/src/test/java/org/apache/james/core/MailAddressTest.java +++ b/core/src/test/java/org/apache/james/core/MailAddressTest.java @@ -22,6 +22,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatCode; +import java.text.Normalizer; import java.util.Properties; import java.util.stream.Stream; @@ -329,4 +330,58 @@ void stripDetailsShouldBePreciseWithMultipleCharacterDelimiter() throws AddressE assertThat(mailAddress.stripDetails("--")).isEqualTo("localpart@example.com"); } + // RFC 6532 §3.1 — NFC normalisation + + @Test + void nfcAndNfdFormsOfSameAddressShouldCompareEqual() throws AddressException { + // "pelé" as NFC: p, e, l, U+00E9 + MailAddress nfc = new MailAddress("pelé@example.com"); + // "pelé" as NFD: p, e, l, U+0065, U+0301 (combining acute) + MailAddress nfd = new MailAddress("pelé@example.com"); + + assertThat(nfc).isEqualTo(nfd); + assertThat(nfc.hashCode()).isEqualTo(nfd.hashCode()); + assertThat(nfc.asString()).isEqualTo(nfd.asString()); + } + + @Test + void nfdInputShouldBeStoredAsNfc() throws AddressException { + // Build the input string explicitly in NFD form: the local + // part is p, e, l, e, U+0301 (combining acute) — five codepoints. + String input = "pele\u0301@example.com"; + int atIndex = input.indexOf('@'); + int lIndex = input.indexOf('l'); + + // Sanity-check that the input is actually NFD: there should be + // two codepoints between 'l' and '@' (the 'e' and the + // combining acute). + assertThat(input.codePointCount(lIndex + 1, atIndex)).isEqualTo(2); + + MailAddress address = new MailAddress(input); + + // The local part comes back in NFC form (single U+00E9), not + // the two codepoints that went in. + assertThat(address.getLocalPart()).isEqualTo("pelé"); + assertThat(address.getLocalPart().codePointAt(3)).isEqualTo(0x00E9); + } + + @Test + void nfcNormalisationShouldBeNoopForAsciiAddresses() throws AddressException { + MailAddress address = new MailAddress("arnt@example.com"); + + assertThat(address.asString()).isEqualTo("arnt@example.com"); + } + + @Test + void nfcNormalisationShouldApplyToDomainsToo() throws AddressException { + // Unicode combining sequence in the domain's Unicode form. After + // construction, asString() should round-trip through NFC (whether the + // domain is ultimately stored as A-label or U-label is the Domain + // class's concern — here we only check that the two spellings collapse). + MailAddress nfc = new MailAddress("info@grå.org"); + MailAddress nfd = new MailAddress("info@gra\u030A.org"); + + assertThat(nfc).isEqualTo(nfd); + } + } From 7767a29617dad94704650b87c98ad112036d59c5 Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Mon, 27 Apr 2026 12:23:39 +0200 Subject: [PATCH 16/18] Check for and reject addresses containing lone surrogates etc. This should not make a difference, but a sufficiently inventive attacker might combine it with something to confuse some code... --- .../org/apache/james/core/MailAddress.java | 46 +++++++++++++++++-- .../apache/james/core/MailAddressTest.java | 18 ++++++++ 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/apache/james/core/MailAddress.java b/core/src/main/java/org/apache/james/core/MailAddress.java index 74d69db97e8..99bdebcff46 100644 --- a/core/src/main/java/org/apache/james/core/MailAddress.java +++ b/core/src/main/java/org/apache/james/core/MailAddress.java @@ -521,6 +521,24 @@ private int parseQuotedLocalPart(StringBuilder lpSB, String address, int pos) th "characters exception , , quote (\"), or backslash (\\) at position " + (pos + 1) + " in '" + address + "'"); } + // Same surrogate-pair check as in parseUnquotedLocalPart: + // unpaired or mis-ordered surrogates would produce + // ill-formed UTF-8 on output. + if (Character.isLowSurrogate(q)) { + throw new AddressException("Unpaired UTF-16 low surrogate in quoted local-part at position " + + (pos + 1) + " in '" + address + "'"); + } + if (Character.isHighSurrogate(q)) { + if (pos + 1 >= address.length() + || !Character.isLowSurrogate(address.charAt(pos + 1))) { + throw new AddressException("Unpaired UTF-16 high surrogate in quoted local-part at position " + + (pos + 1) + " in '" + address + "'"); + } + lpSB.append(q); + lpSB.append(address.charAt(pos + 1)); + pos += 2; + continue; + } lpSB.append(q); pos++; } @@ -561,14 +579,36 @@ private int parseUnquotedLocalPart(StringBuilder lpSB, String address, int pos) // unicode codepoint, but not or // ::= "<" | ">" | "(" | ")" | "[" | "]" | "\" | "." // | "," | ";" | ":" | "@" """ | the control - // characters (ASCII codes 0 through 31 inclusive and - // 127) + // characters (ASCII codes 0 through 31 inclusive, + // 127, and the C1 controls 128 through 159) // ::= the space character (ASCII code 32) char c = address.charAt(pos); - if (c <= 31 || c == 127 || c == ' ') { + if (c <= 31 || c == 127 || c == ' ' || (c >= 0x80 && c <= 0x9F)) { throw new AddressException("Invalid character in local-part (user account) at position " + (pos + 1) + " in '" + address + "'", address, pos + 1); } + // Java strings are UTF-16, so a supplementary-plane + // codepoint (emoji, CJK extension, etc.) appears here as a + // high-surrogate followed by a low-surrogate. We must keep + // them paired so the address can serialise as well-formed + // UTF-8 (RFC 6532 §3.1) — a lone or mis-ordered surrogate + // would produce ill-formed UTF-8 octets on output. + if (Character.isLowSurrogate(c)) { + throw new AddressException("Unpaired UTF-16 low surrogate in local-part at position " + + (pos + 1) + " in '" + address + "'", address, pos + 1); + } + if (Character.isHighSurrogate(c)) { + if (pos + 1 >= address.length() + || !Character.isLowSurrogate(address.charAt(pos + 1))) { + throw new AddressException("Unpaired UTF-16 high surrogate in local-part at position " + + (pos + 1) + " in '" + address + "'", address, pos + 1); + } + lpSB.append(c); + lpSB.append(address.charAt(pos + 1)); + pos += 2; + lastCharDot = false; + continue; + } int i = 0; while (i < SPECIAL.length) { if (c == SPECIAL[i]) { diff --git a/core/src/test/java/org/apache/james/core/MailAddressTest.java b/core/src/test/java/org/apache/james/core/MailAddressTest.java index 8d61b396567..bb0455bd701 100644 --- a/core/src/test/java/org/apache/james/core/MailAddressTest.java +++ b/core/src/test/java/org/apache/james/core/MailAddressTest.java @@ -60,6 +60,9 @@ private static Stream goodAddresses() { "Abc@10.42.0.1", "Abc.123@example.com", "Loïc.Accentué@voilà.fr8", + // Supplementary-plane codepoint as a properly-paired + // UTF-16 surrogate pair (U+1F600). + "abc\uD83D\uDE00@example.com", "pelé@exemple.com", "δοκιμή@παράδειγμα.δοκιμή", "我買@屋企.香港", @@ -112,6 +115,21 @@ private static Stream badAddresses() { "server-dev\\.@james.apache.org", // jakarta.mail is unable to handle this so we better reject it "a..b@domain.com", "sales@\u200Eibm.example", // U+200E is left-to-right + // Unpaired and mis-ordered UTF-16 surrogates would + // produce ill-formed UTF-8 on output, so we reject + // them: lone high surrogate at end, lone low + // surrogate, and a high surrogate not followed by + // a low one. + "abc\uD83D@example.com", + "abc\uDE00def@example.com", + "abc\uD83Ddef@example.com", + // C1 controls in the local part: rejected on the + // same grounds as C0. Tested with U+0080 (start), + // U+0085 (NEL — common in EBCDIC interop bugs), + // and U+009F (end of the C1 range). + "abc\u0080def@example.com", + "abc\u0085def@example.com", + "abc\u009Fdef@example.com", // According to wikipedia this address is valid but as jakarta.mail is unable // to work with it we shall rather reject them (note that this is not breaking retro-compatibility) "mail.allow\\,d@james.apache.org") From 2a613d37815556a5f4f5fa1015fbdc93573a9149 Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Mon, 27 Apr 2026 12:37:32 +0200 Subject: [PATCH 17/18] Name the malformed domain name in the error message about that domain. --- .../main/java/org/apache/james/core/Domain.java | 15 +++++++++++++-- .../java/org/apache/james/core/DomainTest.java | 10 ++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/james/core/Domain.java b/core/src/main/java/org/apache/james/core/Domain.java index b4372e2b807..5a59003b1aa 100644 --- a/core/src/main/java/org/apache/james/core/Domain.java +++ b/core/src/main/java/org/apache/james/core/Domain.java @@ -55,7 +55,17 @@ public static Domain of(String domain) { Preconditions.checkArgument(domain.length() <= MAXIMUM_DOMAIN_LENGTH, "Domain name length should not exceed %s characters", MAXIMUM_DOMAIN_LENGTH); - String domainWithoutBrackets = IDN.toASCII(removeBrackets(domain), IDN.ALLOW_UNASSIGNED); + String domainWithoutBrackets; + try { + domainWithoutBrackets = IDN.toASCII(removeBrackets(domain), IDN.ALLOW_UNASSIGNED); + } catch (IllegalArgumentException e) { + // IDN.toASCII's own message can be cryptic ("Empty label is not + // a legal name", "A prohibited code point was found in the + // input..."). Let's save wear and tear on the poor developer's + // brain. + throw new IllegalArgumentException( + "Domain '" + domain + "' is invalid according to IDNA: " + e.getMessage(), e); + } Preconditions.checkArgument(PART_CHAR_MATCHER.matchesAllOf(domainWithoutBrackets), "Domain parts ASCII chars must be a-z A-Z 0-9 - or _ in %s", domain); @@ -63,7 +73,8 @@ public static Domain of(String domain) { domainWithoutBrackets.contains(".xn--")) { domainWithoutBrackets = IDN.toUnicode(domainWithoutBrackets); Preconditions.checkArgument(!domainWithoutBrackets.startsWith("xn--") && - !domainWithoutBrackets.contains(".xn--")); + !domainWithoutBrackets.contains(".xn--"), + "A-label could not be decoded to Unicode in %s", domain); } int pos = 0; diff --git a/core/src/test/java/org/apache/james/core/DomainTest.java b/core/src/test/java/org/apache/james/core/DomainTest.java index 8a9e4a56214..a1aa69d5c2b 100644 --- a/core/src/test/java/org/apache/james/core/DomainTest.java +++ b/core/src/test/java/org/apache/james/core/DomainTest.java @@ -76,6 +76,16 @@ void testMalformedDomains(String malformed) { .as("rejecting malformed domain " + malformed) .isInstanceOf(IllegalArgumentException.class); } + + @ParameterizedTest + @MethodSource("malformedDomains") + void exceptionForMalformedDomainShouldNameTheOffendingInput(String malformed) { + // Without the offending input in the message, "Domain invalid + // according to IDNA" leaves a future debugger guessing what + // string actually triggered it. + assertThatThrownBy(() -> Domain.of(malformed)) + .hasMessageContaining(malformed); + } } From acbd1214044de8a9583fb09fede10febeb243209 Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Mon, 27 Apr 2026 13:06:48 +0200 Subject: [PATCH 18/18] Generate the correct WITH keywords when James receives SMTPUTF8. --- .../smtp/core/ReceivedHeaderGenerator.java | 20 ++- .../core/ReceivedHeaderGeneratorTest.java | 141 ++++++++++++++++++ 2 files changed, 157 insertions(+), 4 deletions(-) create mode 100644 protocols/smtp/src/test/java/org/apache/james/protocols/smtp/core/ReceivedHeaderGeneratorTest.java diff --git a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/ReceivedHeaderGenerator.java b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/ReceivedHeaderGenerator.java index 614fb5e96ea..4ed4ede28d1 100644 --- a/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/ReceivedHeaderGenerator.java +++ b/protocols/smtp/src/main/java/org/apache/james/protocols/smtp/core/ReceivedHeaderGenerator.java @@ -40,6 +40,10 @@ public class ReceivedHeaderGenerator { private static final String ESMTPSA = "ESMTPSA"; private static final String ESMTP = "ESMTP"; private static final String ESMTPS = "ESMTPS"; + private static final String UTF8SMTP = "UTF8SMTP"; + private static final String UTF8SMTPA = "UTF8SMTPA"; + private static final String UTF8SMTPS = "UTF8SMTPS"; + private static final String UTF8SMTPSA = "UTF8SMTPSA"; private final ProtocolSession.AttachmentKey mtPriority = ProtocolSession.AttachmentKey.of("MT-PRIORITY", Integer.class); /** @@ -48,24 +52,32 @@ public class ReceivedHeaderGenerator { protected String getServiceType(SMTPSession session, String heloMode) { // Check if EHLO was used if (EHLO.equals(heloMode)) { + // See RFC 6531 §4.3: + // The new keyword "UTF8SMTP" indicates the use of ESMTP when + // the SMTPUTF8 extension is also used; the "A" / "S" / "SA" + // suffixes have the same meaning as in the E* keywords. + boolean smtpUtf8 = session.getAttachment(SMTPSession.SMTPUTF8_REQUESTED, ProtocolSession.State.Transaction) + .orElse(Boolean.FALSE); // Not successful auth if (session.getUsername() == null) { if (session.isTLSStarted()) { - return ESMTPS; + return smtpUtf8 ? UTF8SMTPS : ESMTPS; } else { - return ESMTP; + return smtpUtf8 ? UTF8SMTP : ESMTP; } } else { // See RFC3848: // The new keyword "ESMTPA" indicates the use of ESMTP when the SMTP // AUTH [3] extension is also used and authentication is successfully achieved. if (session.isTLSStarted()) { - return ESMTPSA; + return smtpUtf8 ? UTF8SMTPSA : ESMTPSA; } else { - return ESMTPA; + return smtpUtf8 ? UTF8SMTPA : ESMTPA; } } } else { + // HELO was used (not EHLO), so SMTPUTF8 cannot have been + // negotiated — the extension requires EHLO. Plain SMTP only. return SMTP; } } diff --git a/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/core/ReceivedHeaderGeneratorTest.java b/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/core/ReceivedHeaderGeneratorTest.java new file mode 100644 index 00000000000..070b66ba53c --- /dev/null +++ b/protocols/smtp/src/test/java/org/apache/james/protocols/smtp/core/ReceivedHeaderGeneratorTest.java @@ -0,0 +1,141 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.protocols.smtp.core; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.Optional; + +import org.apache.james.core.Username; +import org.apache.james.protocols.api.ProtocolSession.AttachmentKey; +import org.apache.james.protocols.api.ProtocolSession.State; +import org.apache.james.protocols.smtp.SMTPSession; +import org.apache.james.protocols.smtp.utils.BaseFakeSMTPSession; +import org.junit.jupiter.api.Test; + +/** + * Covers RFC 6531 §4.3 — the UTF8SMTP / UTF8SMTPA / UTF8SMTPS / UTF8SMTPSA + * trace keywords used in the Received header when the transaction asserted + * SMTPUTF8. + */ +class ReceivedHeaderGeneratorTest { + + private static final ReceivedHeaderGenerator generator = new ReceivedHeaderGenerator(); + + private static String serviceTypeFor(String heloMode, boolean tls, boolean authenticated, boolean smtpUtf8) { + SMTPSession session = new FakeSession(tls, authenticated, smtpUtf8); + // getServiceType is protected; we exercise it through a thin + // subclass that exposes it. + return new ReceivedHeaderGenerator() { + String invoke() { + return getServiceType(session, heloMode); + } + }.invoke(); + } + + // --- HELO (no extensions can have been negotiated) --- + + @Test + void heloShouldYieldSmtp() { + assertThat(serviceTypeFor("HELO", false, false, false)).isEqualTo("SMTP"); + } + + @Test + void heloShouldYieldSmtpEvenWhenSmtpUtf8FlagIsSet() { + // The flag should only ever be set after EHLO + an SMTPUTF8 + // parameter, but we double-check the HELO branch ignores it. + assertThat(serviceTypeFor("HELO", false, false, true)).isEqualTo("SMTP"); + } + + // --- EHLO without SMTPUTF8: existing RFC 3848 keywords --- + + @Test + void ehloShouldYieldEsmtp() { + assertThat(serviceTypeFor("EHLO", false, false, false)).isEqualTo("ESMTP"); + } + + @Test + void ehloAuthenticatedShouldYieldEsmtpa() { + assertThat(serviceTypeFor("EHLO", false, true, false)).isEqualTo("ESMTPA"); + } + + @Test + void ehloOverTlsShouldYieldEsmtps() { + assertThat(serviceTypeFor("EHLO", true, false, false)).isEqualTo("ESMTPS"); + } + + @Test + void ehloOverTlsAuthenticatedShouldYieldEsmtpsa() { + assertThat(serviceTypeFor("EHLO", true, true, false)).isEqualTo("ESMTPSA"); + } + + // --- EHLO with SMTPUTF8: RFC 6531 §4.3 keywords --- + + @Test + void ehloWithSmtpUtf8ShouldYieldUtf8Smtp() { + assertThat(serviceTypeFor("EHLO", false, false, true)).isEqualTo("UTF8SMTP"); + } + + @Test + void ehloAuthenticatedWithSmtpUtf8ShouldYieldUtf8Smtpa() { + assertThat(serviceTypeFor("EHLO", false, true, true)).isEqualTo("UTF8SMTPA"); + } + + @Test + void ehloOverTlsWithSmtpUtf8ShouldYieldUtf8Smtps() { + assertThat(serviceTypeFor("EHLO", true, false, true)).isEqualTo("UTF8SMTPS"); + } + + @Test + void ehloOverTlsAuthenticatedWithSmtpUtf8ShouldYieldUtf8Smtpsa() { + assertThat(serviceTypeFor("EHLO", true, true, true)).isEqualTo("UTF8SMTPSA"); + } + + private static class FakeSession extends BaseFakeSMTPSession { + private final boolean tls; + private final Username username; + private final boolean smtpUtf8; + + FakeSession(boolean tls, boolean authenticated, boolean smtpUtf8) { + this.tls = tls; + this.username = authenticated ? Username.of("alice@example.com") : null; + this.smtpUtf8 = smtpUtf8; + } + + @Override + public boolean isTLSStarted() { + return tls; + } + + @Override + public Username getUsername() { + return username; + } + + @Override + @SuppressWarnings("unchecked") + public Optional getAttachment(AttachmentKey key, State state) { + if (key == SMTPSession.SMTPUTF8_REQUESTED) { + return (Optional) Optional.of(smtpUtf8); + } + return Optional.empty(); + } + } +}