Skip to content

Commit ce7185e

Browse files
Merge pull request #3048 from SixLabors/js/png-iptc
Add proper IptcProfile support in PNG
2 parents b41bb62 + ff02d0c commit ce7185e

8 files changed

Lines changed: 499 additions & 14 deletions

File tree

src/ImageSharp/Formats/Png/PngConstants.cs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,21 @@ internal static class PngConstants
6262
/// </summary>
6363
public const int MinTextKeywordLength = 1;
6464

65+
/// <summary>
66+
/// Specifies the keyword used to identify the Exif raw profile in image metadata.
67+
/// </summary>
68+
public const string ExifRawProfileKeyword = "Raw profile type exif";
69+
70+
/// <summary>
71+
/// Specifies the profile keyword used to identify raw IPTC metadata within image files.
72+
/// </summary>
73+
public const string IptcRawProfileKeyword = "Raw profile type iptc";
74+
75+
/// <summary>
76+
/// The IPTC resource id in Photoshop IRB. 0x0404 (big endian).
77+
/// </summary>
78+
public const ushort AdobeIptcResourceId = 0x0404;
79+
6580
/// <summary>
6681
/// Gets the header bytes identifying a Png.
6782
/// </summary>
@@ -100,4 +115,31 @@ internal static class PngConstants
100115
(byte)'m',
101116
(byte)'p'
102117
];
118+
119+
/// <summary>
120+
/// Gets the ASCII bytes for the "Photoshop 3.0" identifier used in some PNG metadata payloads.
121+
/// This value is null-terminated.
122+
/// </summary>
123+
public static ReadOnlySpan<byte> AdobePhotoshop30 =>
124+
[
125+
(byte)'P',
126+
(byte)'h',
127+
(byte)'o',
128+
(byte)'t',
129+
(byte)'o',
130+
(byte)'s',
131+
(byte)'h',
132+
(byte)'o',
133+
(byte)'p',
134+
(byte)' ',
135+
(byte)'3',
136+
(byte)'.',
137+
(byte)'0',
138+
0
139+
];
140+
141+
/// <summary>
142+
/// Gets the ASCII bytes for the "8BIM" signature used in Photoshop resources.
143+
/// </summary>
144+
public static ReadOnlySpan<byte> EightBim => [(byte)'8', (byte)'B', (byte)'I', (byte)'M'];
103145
}

src/ImageSharp/Formats/Png/PngDecoderCore.cs

Lines changed: 216 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
using SixLabors.ImageSharp.Metadata.Profiles.Cicp;
2222
using SixLabors.ImageSharp.Metadata.Profiles.Exif;
2323
using SixLabors.ImageSharp.Metadata.Profiles.Icc;
24+
using SixLabors.ImageSharp.Metadata.Profiles.Iptc;
2425
using SixLabors.ImageSharp.Metadata.Profiles.Xmp;
2526
using SixLabors.ImageSharp.PixelFormats;
2627

@@ -1440,14 +1441,19 @@ private void ReadCompressedTextChunk(ImageMetadata baseMetadata, PngMetadata met
14401441
/// object unmodified.</returns>
14411442
private static bool TryReadTextChunkMetadata(ImageMetadata baseMetadata, string chunkName, string chunkText)
14421443
{
1443-
if (chunkName.Equals("Raw profile type exif", StringComparison.OrdinalIgnoreCase) &&
1444+
if (chunkName.Equals(PngConstants.ExifRawProfileKeyword, StringComparison.OrdinalIgnoreCase) &&
14441445
TryReadLegacyExifTextChunk(baseMetadata, chunkText))
14451446
{
14461447
// Successfully parsed legacy exif data from text
14471448
return true;
14481449
}
14491450

1450-
// TODO: "Raw profile type iptc", potentially others?
1451+
if (chunkName.Equals(PngConstants.IptcRawProfileKeyword, StringComparison.OrdinalIgnoreCase) &&
1452+
TryReadLegacyIptcTextChunk(baseMetadata, chunkText))
1453+
{
1454+
// Successfully parsed legacy iptc data from text
1455+
return true;
1456+
}
14511457

14521458
// No special chunk data identified
14531459
return false;
@@ -1571,6 +1577,214 @@ private static bool TryReadLegacyExifTextChunk(ImageMetadata metadata, string da
15711577
return true;
15721578
}
15731579

1580+
/// <summary>
1581+
/// Reads iptc data encoded into a text chunk with the name "Raw profile type iptc".
1582+
/// This convention is used by ImageMagick/exiftool/exiv2/digiKam and stores a byte-count
1583+
/// followed by hex-encoded bytes.
1584+
/// </summary>
1585+
/// <param name="metadata">The <see cref="ImageMetadata"/> to store the decoded iptc tags into.</param>
1586+
/// <param name="data">The contents of the "Raw profile type iptc" text chunk.</param>
1587+
private static bool TryReadLegacyIptcTextChunk(ImageMetadata metadata, string data)
1588+
{
1589+
// Preserve first IPTC found.
1590+
if (metadata.IptcProfile != null)
1591+
{
1592+
return true;
1593+
}
1594+
1595+
ReadOnlySpan<char> dataSpan = data.AsSpan().TrimStart();
1596+
1597+
// Must start with the "iptc" identifier (case-insensitive).
1598+
// Common real-world format (ImageMagick/ExifTool) is:
1599+
// "IPTC profile\n <len>\n<hex...>"
1600+
if (dataSpan.Length < 4 || !StringEqualsInsensitive(dataSpan[..4], "iptc".AsSpan()))
1601+
{
1602+
return false;
1603+
}
1604+
1605+
// Skip the remainder of the first line ("IPTC profile", etc).
1606+
int firstLineEnd = dataSpan.IndexOf('\n');
1607+
if (firstLineEnd < 0)
1608+
{
1609+
return false;
1610+
}
1611+
1612+
dataSpan = dataSpan[(firstLineEnd + 1)..].TrimStart();
1613+
1614+
// Next line contains the decimal byte length (often indented).
1615+
int dataLengthEnd = dataSpan.IndexOf('\n');
1616+
if (dataLengthEnd < 0)
1617+
{
1618+
return false;
1619+
}
1620+
1621+
int dataLength;
1622+
try
1623+
{
1624+
dataLength = ParseInt32(dataSpan[..dataLengthEnd]);
1625+
}
1626+
catch
1627+
{
1628+
return false;
1629+
}
1630+
1631+
if (dataLength <= 0)
1632+
{
1633+
return false;
1634+
}
1635+
1636+
// Skip to the hex-encoded data.
1637+
dataSpan = dataSpan[(dataLengthEnd + 1)..].Trim();
1638+
1639+
byte[] iptcBlob = new byte[dataLength];
1640+
1641+
try
1642+
{
1643+
int written = 0;
1644+
1645+
for (; written < dataLength;)
1646+
{
1647+
ReadOnlySpan<char> lineSpan = dataSpan;
1648+
1649+
int newlineIndex = dataSpan.IndexOf('\n');
1650+
if (newlineIndex != -1)
1651+
{
1652+
lineSpan = dataSpan[..newlineIndex];
1653+
}
1654+
1655+
// Important: handle CRLF and any incidental whitespace.
1656+
lineSpan = lineSpan.Trim(); // removes ' ', '\t', '\r', '\n', etc.
1657+
1658+
if (!lineSpan.IsEmpty)
1659+
{
1660+
written += HexConverter.HexStringToBytes(lineSpan, iptcBlob.AsSpan()[written..]);
1661+
}
1662+
1663+
if (newlineIndex == -1)
1664+
{
1665+
break;
1666+
}
1667+
1668+
dataSpan = dataSpan[(newlineIndex + 1)..];
1669+
}
1670+
1671+
if (written != dataLength)
1672+
{
1673+
return false;
1674+
}
1675+
}
1676+
catch
1677+
{
1678+
return false;
1679+
}
1680+
1681+
// Prefer IRB extraction if this is Photoshop-style data (8BIM resource blocks).
1682+
byte[] iptcPayload = TryExtractIptcFromPhotoshopIrb(iptcBlob, out byte[] extracted)
1683+
? extracted
1684+
: iptcBlob;
1685+
1686+
metadata.IptcProfile = new IptcProfile(iptcPayload);
1687+
return true;
1688+
}
1689+
1690+
/// <summary>
1691+
/// Attempts to extract IPTC metadata from a Photoshop Image Resource Block (IRB) contained within the specified
1692+
/// data buffer.
1693+
/// </summary>
1694+
/// <remarks>This method scans the provided data for a Photoshop IRB block containing IPTC metadata and
1695+
/// extracts it if present. The method does not validate the contents of the IPTC data beyond locating the
1696+
/// appropriate resource block.</remarks>
1697+
/// <param name="data">A read-only span of bytes containing the Photoshop IRB data to search for embedded IPTC metadata.</param>
1698+
/// <param name="iptcBytes">When this method returns, contains the extracted IPTC metadata as a byte array if found; otherwise, an undefined
1699+
/// value.</param>
1700+
/// <returns><see langword="true"/> if IPTC metadata is successfully extracted from the IRB data; otherwise, <see langword="false"/>.</returns>
1701+
private static bool TryExtractIptcFromPhotoshopIrb(ReadOnlySpan<byte> data, out byte[] iptcBytes)
1702+
{
1703+
iptcBytes = default!;
1704+
1705+
ReadOnlySpan<byte> adobePhotoshop30 = PngConstants.AdobePhotoshop30;
1706+
1707+
// Some writers include the "Photoshop 3.0\0" header, some store just IRB blocks.
1708+
if (data.Length >= adobePhotoshop30.Length && data[..adobePhotoshop30.Length].SequenceEqual(adobePhotoshop30))
1709+
{
1710+
data = data[adobePhotoshop30.Length..];
1711+
}
1712+
1713+
ReadOnlySpan<byte> eightBim = PngConstants.EightBim;
1714+
ushort adobeIptcResourceId = PngConstants.AdobeIptcResourceId;
1715+
while (data.Length >= 12)
1716+
{
1717+
if (!data[..4].SequenceEqual(eightBim))
1718+
{
1719+
return false;
1720+
}
1721+
1722+
data = data[4..];
1723+
1724+
// Resource ID (2 bytes, big endian)
1725+
if (data.Length < 2)
1726+
{
1727+
return false;
1728+
}
1729+
1730+
ushort resourceId = (ushort)((data[0] << 8) | data[1]);
1731+
data = data[2..];
1732+
1733+
// Pascal string name (1-byte length, then bytes), padded to even.
1734+
if (data.Length < 1)
1735+
{
1736+
return false;
1737+
}
1738+
1739+
int nameLen = data[0];
1740+
int nameFieldLen = 1 + nameLen;
1741+
if ((nameFieldLen & 1) != 0)
1742+
{
1743+
nameFieldLen++; // pad to even
1744+
}
1745+
1746+
if (data.Length < nameFieldLen + 4)
1747+
{
1748+
return false;
1749+
}
1750+
1751+
data = data[nameFieldLen..];
1752+
1753+
// Resource data size (4 bytes, big endian)
1754+
int size = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];
1755+
data = data[4..];
1756+
1757+
if (size < 0 || data.Length < size)
1758+
{
1759+
return false;
1760+
}
1761+
1762+
ReadOnlySpan<byte> payload = data[..size];
1763+
1764+
// Data is padded to even.
1765+
int advance = size;
1766+
if ((advance & 1) != 0)
1767+
{
1768+
advance++;
1769+
}
1770+
1771+
if (resourceId == adobeIptcResourceId)
1772+
{
1773+
iptcBytes = payload.ToArray();
1774+
return true;
1775+
}
1776+
1777+
if (data.Length < advance)
1778+
{
1779+
return false;
1780+
}
1781+
1782+
data = data[advance..];
1783+
}
1784+
1785+
return false;
1786+
}
1787+
15741788
/// <summary>
15751789
/// Reads the color profile chunk. The data is stored similar to the zTXt chunk.
15761790
/// </summary>

0 commit comments

Comments
 (0)