|
21 | 21 | using SixLabors.ImageSharp.Metadata.Profiles.Cicp; |
22 | 22 | using SixLabors.ImageSharp.Metadata.Profiles.Exif; |
23 | 23 | using SixLabors.ImageSharp.Metadata.Profiles.Icc; |
| 24 | +using SixLabors.ImageSharp.Metadata.Profiles.Iptc; |
24 | 25 | using SixLabors.ImageSharp.Metadata.Profiles.Xmp; |
25 | 26 | using SixLabors.ImageSharp.PixelFormats; |
26 | 27 |
|
@@ -1440,14 +1441,19 @@ private void ReadCompressedTextChunk(ImageMetadata baseMetadata, PngMetadata met |
1440 | 1441 | /// object unmodified.</returns> |
1441 | 1442 | private static bool TryReadTextChunkMetadata(ImageMetadata baseMetadata, string chunkName, string chunkText) |
1442 | 1443 | { |
1443 | | - if (chunkName.Equals("Raw profile type exif", StringComparison.OrdinalIgnoreCase) && |
| 1444 | + if (chunkName.Equals(PngConstants.ExifRawProfileKeyword, StringComparison.OrdinalIgnoreCase) && |
1444 | 1445 | TryReadLegacyExifTextChunk(baseMetadata, chunkText)) |
1445 | 1446 | { |
1446 | 1447 | // Successfully parsed legacy exif data from text |
1447 | 1448 | return true; |
1448 | 1449 | } |
1449 | 1450 |
|
1450 | | - // TODO: "Raw profile type iptc", potentially others? |
| 1451 | + if (chunkName.Equals(PngConstants.IptcRawProfileKeyword, StringComparison.OrdinalIgnoreCase) && |
| 1452 | + TryReadLegacyIptcTextChunk(baseMetadata, chunkText)) |
| 1453 | + { |
| 1454 | + // Successfully parsed legacy iptc data from text |
| 1455 | + return true; |
| 1456 | + } |
1451 | 1457 |
|
1452 | 1458 | // No special chunk data identified |
1453 | 1459 | return false; |
@@ -1571,6 +1577,214 @@ private static bool TryReadLegacyExifTextChunk(ImageMetadata metadata, string da |
1571 | 1577 | return true; |
1572 | 1578 | } |
1573 | 1579 |
|
| 1580 | + /// <summary> |
| 1581 | + /// Reads iptc data encoded into a text chunk with the name "Raw profile type iptc". |
| 1582 | + /// This convention is used by ImageMagick/exiftool/exiv2/digiKam and stores a byte-count |
| 1583 | + /// followed by hex-encoded bytes. |
| 1584 | + /// </summary> |
| 1585 | + /// <param name="metadata">The <see cref="ImageMetadata"/> to store the decoded iptc tags into.</param> |
| 1586 | + /// <param name="data">The contents of the "Raw profile type iptc" text chunk.</param> |
| 1587 | + private static bool TryReadLegacyIptcTextChunk(ImageMetadata metadata, string data) |
| 1588 | + { |
| 1589 | + // Preserve first IPTC found. |
| 1590 | + if (metadata.IptcProfile != null) |
| 1591 | + { |
| 1592 | + return true; |
| 1593 | + } |
| 1594 | + |
| 1595 | + ReadOnlySpan<char> dataSpan = data.AsSpan().TrimStart(); |
| 1596 | + |
| 1597 | + // Must start with the "iptc" identifier (case-insensitive). |
| 1598 | + // Common real-world format (ImageMagick/ExifTool) is: |
| 1599 | + // "IPTC profile\n <len>\n<hex...>" |
| 1600 | + if (dataSpan.Length < 4 || !StringEqualsInsensitive(dataSpan[..4], "iptc".AsSpan())) |
| 1601 | + { |
| 1602 | + return false; |
| 1603 | + } |
| 1604 | + |
| 1605 | + // Skip the remainder of the first line ("IPTC profile", etc). |
| 1606 | + int firstLineEnd = dataSpan.IndexOf('\n'); |
| 1607 | + if (firstLineEnd < 0) |
| 1608 | + { |
| 1609 | + return false; |
| 1610 | + } |
| 1611 | + |
| 1612 | + dataSpan = dataSpan[(firstLineEnd + 1)..].TrimStart(); |
| 1613 | + |
| 1614 | + // Next line contains the decimal byte length (often indented). |
| 1615 | + int dataLengthEnd = dataSpan.IndexOf('\n'); |
| 1616 | + if (dataLengthEnd < 0) |
| 1617 | + { |
| 1618 | + return false; |
| 1619 | + } |
| 1620 | + |
| 1621 | + int dataLength; |
| 1622 | + try |
| 1623 | + { |
| 1624 | + dataLength = ParseInt32(dataSpan[..dataLengthEnd]); |
| 1625 | + } |
| 1626 | + catch |
| 1627 | + { |
| 1628 | + return false; |
| 1629 | + } |
| 1630 | + |
| 1631 | + if (dataLength <= 0) |
| 1632 | + { |
| 1633 | + return false; |
| 1634 | + } |
| 1635 | + |
| 1636 | + // Skip to the hex-encoded data. |
| 1637 | + dataSpan = dataSpan[(dataLengthEnd + 1)..].Trim(); |
| 1638 | + |
| 1639 | + byte[] iptcBlob = new byte[dataLength]; |
| 1640 | + |
| 1641 | + try |
| 1642 | + { |
| 1643 | + int written = 0; |
| 1644 | + |
| 1645 | + for (; written < dataLength;) |
| 1646 | + { |
| 1647 | + ReadOnlySpan<char> lineSpan = dataSpan; |
| 1648 | + |
| 1649 | + int newlineIndex = dataSpan.IndexOf('\n'); |
| 1650 | + if (newlineIndex != -1) |
| 1651 | + { |
| 1652 | + lineSpan = dataSpan[..newlineIndex]; |
| 1653 | + } |
| 1654 | + |
| 1655 | + // Important: handle CRLF and any incidental whitespace. |
| 1656 | + lineSpan = lineSpan.Trim(); // removes ' ', '\t', '\r', '\n', etc. |
| 1657 | + |
| 1658 | + if (!lineSpan.IsEmpty) |
| 1659 | + { |
| 1660 | + written += HexConverter.HexStringToBytes(lineSpan, iptcBlob.AsSpan()[written..]); |
| 1661 | + } |
| 1662 | + |
| 1663 | + if (newlineIndex == -1) |
| 1664 | + { |
| 1665 | + break; |
| 1666 | + } |
| 1667 | + |
| 1668 | + dataSpan = dataSpan[(newlineIndex + 1)..]; |
| 1669 | + } |
| 1670 | + |
| 1671 | + if (written != dataLength) |
| 1672 | + { |
| 1673 | + return false; |
| 1674 | + } |
| 1675 | + } |
| 1676 | + catch |
| 1677 | + { |
| 1678 | + return false; |
| 1679 | + } |
| 1680 | + |
| 1681 | + // Prefer IRB extraction if this is Photoshop-style data (8BIM resource blocks). |
| 1682 | + byte[] iptcPayload = TryExtractIptcFromPhotoshopIrb(iptcBlob, out byte[] extracted) |
| 1683 | + ? extracted |
| 1684 | + : iptcBlob; |
| 1685 | + |
| 1686 | + metadata.IptcProfile = new IptcProfile(iptcPayload); |
| 1687 | + return true; |
| 1688 | + } |
| 1689 | + |
| 1690 | + /// <summary> |
| 1691 | + /// Attempts to extract IPTC metadata from a Photoshop Image Resource Block (IRB) contained within the specified |
| 1692 | + /// data buffer. |
| 1693 | + /// </summary> |
| 1694 | + /// <remarks>This method scans the provided data for a Photoshop IRB block containing IPTC metadata and |
| 1695 | + /// extracts it if present. The method does not validate the contents of the IPTC data beyond locating the |
| 1696 | + /// appropriate resource block.</remarks> |
| 1697 | + /// <param name="data">A read-only span of bytes containing the Photoshop IRB data to search for embedded IPTC metadata.</param> |
| 1698 | + /// <param name="iptcBytes">When this method returns, contains the extracted IPTC metadata as a byte array if found; otherwise, an undefined |
| 1699 | + /// value.</param> |
| 1700 | + /// <returns><see langword="true"/> if IPTC metadata is successfully extracted from the IRB data; otherwise, <see langword="false"/>.</returns> |
| 1701 | + private static bool TryExtractIptcFromPhotoshopIrb(ReadOnlySpan<byte> data, out byte[] iptcBytes) |
| 1702 | + { |
| 1703 | + iptcBytes = default!; |
| 1704 | + |
| 1705 | + ReadOnlySpan<byte> adobePhotoshop30 = PngConstants.AdobePhotoshop30; |
| 1706 | + |
| 1707 | + // Some writers include the "Photoshop 3.0\0" header, some store just IRB blocks. |
| 1708 | + if (data.Length >= adobePhotoshop30.Length && data[..adobePhotoshop30.Length].SequenceEqual(adobePhotoshop30)) |
| 1709 | + { |
| 1710 | + data = data[adobePhotoshop30.Length..]; |
| 1711 | + } |
| 1712 | + |
| 1713 | + ReadOnlySpan<byte> eightBim = PngConstants.EightBim; |
| 1714 | + ushort adobeIptcResourceId = PngConstants.AdobeIptcResourceId; |
| 1715 | + while (data.Length >= 12) |
| 1716 | + { |
| 1717 | + if (!data[..4].SequenceEqual(eightBim)) |
| 1718 | + { |
| 1719 | + return false; |
| 1720 | + } |
| 1721 | + |
| 1722 | + data = data[4..]; |
| 1723 | + |
| 1724 | + // Resource ID (2 bytes, big endian) |
| 1725 | + if (data.Length < 2) |
| 1726 | + { |
| 1727 | + return false; |
| 1728 | + } |
| 1729 | + |
| 1730 | + ushort resourceId = (ushort)((data[0] << 8) | data[1]); |
| 1731 | + data = data[2..]; |
| 1732 | + |
| 1733 | + // Pascal string name (1-byte length, then bytes), padded to even. |
| 1734 | + if (data.Length < 1) |
| 1735 | + { |
| 1736 | + return false; |
| 1737 | + } |
| 1738 | + |
| 1739 | + int nameLen = data[0]; |
| 1740 | + int nameFieldLen = 1 + nameLen; |
| 1741 | + if ((nameFieldLen & 1) != 0) |
| 1742 | + { |
| 1743 | + nameFieldLen++; // pad to even |
| 1744 | + } |
| 1745 | + |
| 1746 | + if (data.Length < nameFieldLen + 4) |
| 1747 | + { |
| 1748 | + return false; |
| 1749 | + } |
| 1750 | + |
| 1751 | + data = data[nameFieldLen..]; |
| 1752 | + |
| 1753 | + // Resource data size (4 bytes, big endian) |
| 1754 | + int size = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; |
| 1755 | + data = data[4..]; |
| 1756 | + |
| 1757 | + if (size < 0 || data.Length < size) |
| 1758 | + { |
| 1759 | + return false; |
| 1760 | + } |
| 1761 | + |
| 1762 | + ReadOnlySpan<byte> payload = data[..size]; |
| 1763 | + |
| 1764 | + // Data is padded to even. |
| 1765 | + int advance = size; |
| 1766 | + if ((advance & 1) != 0) |
| 1767 | + { |
| 1768 | + advance++; |
| 1769 | + } |
| 1770 | + |
| 1771 | + if (resourceId == adobeIptcResourceId) |
| 1772 | + { |
| 1773 | + iptcBytes = payload.ToArray(); |
| 1774 | + return true; |
| 1775 | + } |
| 1776 | + |
| 1777 | + if (data.Length < advance) |
| 1778 | + { |
| 1779 | + return false; |
| 1780 | + } |
| 1781 | + |
| 1782 | + data = data[advance..]; |
| 1783 | + } |
| 1784 | + |
| 1785 | + return false; |
| 1786 | + } |
| 1787 | + |
1574 | 1788 | /// <summary> |
1575 | 1789 | /// Reads the color profile chunk. The data is stored similar to the zTXt chunk. |
1576 | 1790 | /// </summary> |
|
0 commit comments