Skip to content

Commit b41bb62

Browse files
Merge pull request #3047 from SixLabors/js/xmp-improvements
Enhance XmpProfile to add XDocument normalization
2 parents 3e7c12e + 81d533b commit b41bb62

4 files changed

Lines changed: 150 additions & 34 deletions

File tree

Lines changed: 114 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
// Copyright (c) Six Labors.
22
// Licensed under the Six Labors Split License.
33

4-
using System.Diagnostics;
54
using System.Text;
5+
using System.Xml;
66
using System.Xml.Linq;
77

88
namespace SixLabors.ImageSharp.Metadata.Profiles.Xmp;
@@ -25,18 +25,17 @@ public XmpProfile()
2525
/// Initializes a new instance of the <see cref="XmpProfile"/> class.
2626
/// </summary>
2727
/// <param name="data">The UTF8 encoded byte array to read the XMP profile from.</param>
28-
public XmpProfile(byte[]? data) => this.Data = data;
28+
public XmpProfile(byte[]? data) => this.Data = NormalizeDataIfNeeded(data);
2929

3030
/// <summary>
31-
/// Initializes a new instance of the <see cref="XmpProfile"/> class
32-
/// by making a copy from another XMP profile.
31+
/// Initializes a new instance of the <see cref="XmpProfile"/> class from an XML document.
32+
/// The document is serialized as UTF-8 without BOM.
3333
/// </summary>
34-
/// <param name="other">The other XMP profile, from which the clone should be made from.</param>
35-
private XmpProfile(XmpProfile other)
34+
/// <param name="document">The XMP XML document.</param>
35+
public XmpProfile(XDocument document)
3636
{
37-
Guard.NotNull(other, nameof(other));
38-
39-
this.Data = other.Data;
37+
Guard.NotNull(document, nameof(document));
38+
this.Data = SerializeDocument(document);
4039
}
4140

4241
/// <summary>
@@ -45,30 +44,28 @@ private XmpProfile(XmpProfile other)
4544
internal byte[]? Data { get; private set; }
4645

4746
/// <summary>
48-
/// Gets the raw XML document containing the XMP profile.
47+
/// Convert the content of this <see cref="XmpProfile"/> into an <see cref="XDocument"/>.
4948
/// </summary>
50-
/// <returns>The <see cref="XDocument"/></returns>
51-
public XDocument? GetDocument()
49+
/// <returns>The <see cref="XDocument"/> instance, or <see langword="null"/> if no XMP data is present.</returns>
50+
public XDocument? ToXDocument()
5251
{
53-
byte[]? byteArray = this.Data;
54-
if (byteArray is null)
52+
byte[]? data = this.Data;
53+
if (data is null || data.Length == 0)
5554
{
5655
return null;
5756
}
5857

59-
// Strip leading whitespace, as the XmlReader doesn't like them.
60-
int count = byteArray.Length;
61-
for (int i = count - 1; i > 0; i--)
58+
using MemoryStream stream = new(data, writable: false);
59+
60+
XmlReaderSettings settings = new()
6261
{
63-
if (byteArray[i] is 0 or 0x0f)
64-
{
65-
count--;
66-
}
67-
}
62+
DtdProcessing = DtdProcessing.Ignore,
63+
XmlResolver = null,
64+
CloseInput = false
65+
};
6866

69-
using MemoryStream stream = new(byteArray, 0, count);
70-
using StreamReader reader = new(stream, Encoding.UTF8);
71-
return XDocument.Load(reader);
67+
using XmlReader reader = XmlReader.Create(stream, settings);
68+
return XDocument.Load(reader, LoadOptions.PreserveWhitespace);
7269
}
7370

7471
/// <summary>
@@ -77,12 +74,101 @@ private XmpProfile(XmpProfile other)
7774
/// <returns>The <see cref="T:Byte[]"/></returns>
7875
public byte[] ToByteArray()
7976
{
80-
Guard.NotNull(this.Data);
81-
byte[] result = new byte[this.Data.Length];
77+
byte[]? data = this.Data;
78+
79+
if (data is null)
80+
{
81+
return [];
82+
}
83+
84+
byte[] result = new byte[data.Length];
8285
this.Data.AsSpan().CopyTo(result);
8386
return result;
8487
}
8588

8689
/// <inheritdoc/>
87-
public XmpProfile DeepClone() => new(this);
90+
public XmpProfile DeepClone()
91+
{
92+
byte[]? data = this.Data;
93+
if (data is null)
94+
{
95+
// Preserve the semantics of an "empty" profile when cloning.
96+
return new XmpProfile();
97+
}
98+
99+
byte[] clone = new byte[data.Length];
100+
data.AsSpan().CopyTo(clone);
101+
return new XmpProfile(clone);
102+
}
103+
104+
private static byte[] SerializeDocument(XDocument document)
105+
{
106+
using MemoryStream ms = new();
107+
108+
XmlWriterSettings writerSettings = new()
109+
{
110+
Encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false), // no BOM
111+
OmitXmlDeclaration = true, // generally safer for XMP consumers
112+
Indent = false,
113+
NewLineHandling = NewLineHandling.None
114+
};
115+
116+
using (XmlWriter xw = XmlWriter.Create(ms, writerSettings))
117+
{
118+
document.Save(xw);
119+
}
120+
121+
return ms.ToArray();
122+
}
123+
124+
private static byte[]? NormalizeDataIfNeeded(byte[]? data)
125+
{
126+
if (data is null || data.Length == 0)
127+
{
128+
return data;
129+
}
130+
131+
// Allocation-free fast path for the normal case.
132+
133+
// Check for UTF-8 BOM (0xEF,0xBB,0xBF)
134+
bool hasBom = data.Length >= 3 && data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF;
135+
136+
// XMP metadata is commonly stored in fixed-size container blocks (e.g. TIFF tag 700).
137+
// Producers often pad unused space so the packet can be updated in-place without
138+
// rewriting the file. In practice this padding is either NUL (0x00) from the container
139+
// or 0x0F used by Adobe XMP writers. Both are invalid XML and must be trimmed.
140+
bool hasTrailingPad = data[^1] is 0 or 0x0F;
141+
142+
if (!hasBom && !hasTrailingPad)
143+
{
144+
return data;
145+
}
146+
147+
int start = hasBom ? 3 : 0;
148+
int end = data.Length;
149+
150+
if (hasTrailingPad)
151+
{
152+
while (end > start)
153+
{
154+
byte b = data[end - 1];
155+
if (b is not 0 and not 0x0F)
156+
{
157+
break;
158+
}
159+
160+
end--;
161+
}
162+
}
163+
164+
int length = end - start;
165+
if (length <= 0)
166+
{
167+
return null;
168+
}
169+
170+
byte[] normalized = new byte[length];
171+
Buffer.BlockCopy(data, start, normalized, 0, length);
172+
return normalized;
173+
}
88174
}

tests/ImageSharp.Tests/Formats/Tiff/TiffMetadataTests.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ public void MetadataProfiles<TPixel>(TestImageProvider<TPixel> provider, bool ig
157157
{
158158
Assert.NotNull(rootFrameMetaData.XmpProfile);
159159
Assert.NotNull(rootFrameMetaData.ExifProfile);
160-
Assert.Equal(2599, rootFrameMetaData.XmpProfile.Data.Length);
160+
Assert.Equal(2596, rootFrameMetaData.XmpProfile.Data.Length); // padding bytes are trimmed
161161
Assert.Equal(25, rootFrameMetaData.ExifProfile.Values.Count);
162162
}
163163
}
@@ -186,7 +186,7 @@ public void BaselineTags<TPixel>(TestImageProvider<TPixel> provider)
186186
Assert.Equal(32, rootFrame.Width);
187187
Assert.Equal(32, rootFrame.Height);
188188
Assert.NotNull(rootFrame.Metadata.XmpProfile);
189-
Assert.Equal(2599, rootFrame.Metadata.XmpProfile.Data.Length);
189+
Assert.Equal(2596, rootFrame.Metadata.XmpProfile.Data.Length); // padding bytes are trimmed
190190

191191
ExifProfile exifProfile = rootFrame.Metadata.ExifProfile;
192192
TiffFrameMetadata tiffFrameMetadata = rootFrame.Metadata.GetTiffMetadata();

tests/ImageSharp.Tests/Metadata/ImageFrameMetadataTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ public void CloneIsDeep()
7474
Assert.False(metaData.ExifProfile.Equals(clone.ExifProfile));
7575
Assert.True(metaData.ExifProfile.Values.Count == clone.ExifProfile.Values.Count);
7676
Assert.False(ReferenceEquals(metaData.XmpProfile, clone.XmpProfile));
77-
Assert.True(metaData.XmpProfile.Data.Equals(clone.XmpProfile.Data));
77+
Assert.False(ReferenceEquals(metaData.XmpProfile.Data, clone.XmpProfile.Data));
7878
Assert.False(metaData.GetGifMetadata().Equals(clone.GetGifMetadata()));
7979
Assert.False(metaData.IccProfile.Equals(clone.IccProfile));
8080
Assert.False(metaData.IptcProfile.Equals(clone.IptcProfile));

tests/ImageSharp.Tests/Metadata/Profiles/XMP/XmpProfileTests.cs

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,34 @@ public async Task ReadXmpMetadata_FromWebp_Works<TPixel>(TestImageProvider<TPixe
7878
}
7979
}
8080

81+
[Fact]
82+
public void XmpProfile_CtorFromXDocument_Works()
83+
{
84+
// arrange
85+
XDocument document = CreateMinimalXDocument();
86+
87+
// act
88+
XmpProfile profile = new(document);
89+
90+
// assert
91+
XmpProfileContainsExpectedValues(profile);
92+
}
93+
94+
[Fact]
95+
public void XmpProfile_ToXDocument_ReturnsValidDocument()
96+
{
97+
// arrange
98+
XmpProfile profile = CreateMinimalXmlProfile();
99+
100+
// act
101+
XDocument document = profile.ToXDocument();
102+
103+
// assert
104+
Assert.NotNull(document);
105+
Assert.Equal("xmpmeta", document.Root.Name.LocalName);
106+
Assert.Equal("adobe:ns:meta/", document.Root.Name.NamespaceName);
107+
}
108+
81109
[Fact]
82110
public void XmpProfile_ToFromByteArray_ReturnsClone()
83111
{
@@ -97,11 +125,11 @@ public void XmpProfile_CloneIsDeep()
97125
{
98126
// arrange
99127
XmpProfile profile = CreateMinimalXmlProfile();
100-
byte[] original = profile.ToByteArray();
128+
byte[] original = profile.Data;
101129

102130
// act
103131
XmpProfile clone = profile.DeepClone();
104-
byte[] actual = clone.ToByteArray();
132+
byte[] actual = clone.Data;
105133

106134
// assert
107135
Assert.False(ReferenceEquals(original, actual));
@@ -218,7 +246,7 @@ public void WritingWebp_PreservesXmpProfile()
218246
private static void XmpProfileContainsExpectedValues(XmpProfile xmp)
219247
{
220248
Assert.NotNull(xmp);
221-
XDocument document = xmp.GetDocument();
249+
XDocument document = xmp.ToXDocument();
222250
Assert.NotNull(document);
223251
Assert.Equal("xmpmeta", document.Root.Name.LocalName);
224252
Assert.Equal("adobe:ns:meta/", document.Root.Name.NamespaceName);
@@ -232,6 +260,8 @@ private static XmpProfile CreateMinimalXmlProfile()
232260
return profile;
233261
}
234262

263+
private static XDocument CreateMinimalXDocument() => CreateMinimalXmlProfile().ToXDocument();
264+
235265
private static Image<Rgba32> WriteAndRead(Image<Rgba32> image, IImageEncoder encoder)
236266
{
237267
using (MemoryStream memStream = new())

0 commit comments

Comments
 (0)