Skip to content
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions features/media-import.feature
Original file line number Diff line number Diff line change
Expand Up @@ -318,3 +318,69 @@ Feature: Manage WordPress attachments
"""
/foo/large-image.jpg
"""

Scenario: Skip importing a local file that was already imported
Given download:
| path | url |
| {CACHE_DIR}/large-image.jpg | http://wp-cli.org/behat-data/large-image.jpg |

When I run `wp media import {CACHE_DIR}/large-image.jpg --porcelain`
Then save STDOUT as {ATTACHMENT_ID}
And STDOUT should not be empty

When I run `wp media import {CACHE_DIR}/large-image.jpg --skip-duplicates`
Then STDOUT should contain:
"""
Skipped importing file
"""
And STDOUT should contain:
"""
already exists as attachment ID {ATTACHMENT_ID}
"""
And STDOUT should contain:
"""
Success: Imported 0 of 1 items (1 skipped).
"""
And the return code should be 0

Scenario: Skip importing a remote file that was already imported
When I run `wp media import 'http://wp-cli.org/behat-data/codeispoetry.png' --porcelain`
Then save STDOUT as {ATTACHMENT_ID}
And STDOUT should not be empty

When I run `wp media import 'http://wp-cli.org/behat-data/codeispoetry.png' --skip-duplicates`
Then STDOUT should contain:
"""
Skipped importing file
"""
And STDOUT should contain:
"""
already exists as attachment ID {ATTACHMENT_ID}
"""
And STDOUT should contain:
"""
Success: Imported 0 of 1 items (1 skipped).
"""
And the return code should be 0

Scenario: Import new file while skipping duplicates from a batch
Given download:
| path | url |
| {CACHE_DIR}/large-image.jpg | http://wp-cli.org/behat-data/large-image.jpg |

When I run `wp media import {CACHE_DIR}/large-image.jpg`
Then STDOUT should contain:
"""
Success: Imported 1 of 1 items.
"""

When I run `wp media import {CACHE_DIR}/large-image.jpg 'http://wp-cli.org/behat-data/codeispoetry.png' --skip-duplicates`
Then STDOUT should contain:
"""
Skipped importing file
"""
And STDOUT should contain:
"""
Success: Imported 1 of 2 items (1 skipped).
"""
And the return code should be 0
56 changes: 54 additions & 2 deletions src/Media_Command.php
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ public function regenerate( $args, $assoc_args = array() ) {
* [--featured_image]
* : If set, set the imported image as the Featured Image of the post it is attached to.
*
* [--skip-duplicates]
* : If set, media files that have already been imported will be skipped.
*
* [--porcelain[=<field>]]
* : Output a single field for each imported image. Defaults to attachment ID when used as flag.
* ---
Expand Down Expand Up @@ -308,7 +311,7 @@ public function regenerate( $args, $assoc_args = array() ) {
* http://wordpress-develop.dev/wp-header-logo/
*
* @param string[] $args Positional arguments.
* @param array{post_id?: string, post_name?: string, file_name?: string, title?: string, caption?: string, alt?: string, desc?: string, 'skip-copy'?: bool, 'destination-dir'?: string, 'preserve-filetime'?: bool, featured_image?: bool, porcelain?: bool|string} $assoc_args Associative arguments.
* @param array{post_id?: string, post_name?: string, file_name?: string, title?: string, caption?: string, alt?: string, desc?: string, 'skip-copy'?: bool, 'destination-dir'?: string, 'preserve-filetime'?: bool, featured_image?: bool, 'skip-duplicates'?: bool, porcelain?: bool|string} $assoc_args Associative arguments.
* @return void
*/
public function import( $args, $assoc_args = array() ) {
Expand Down Expand Up @@ -361,6 +364,7 @@ public function import( $args, $assoc_args = array() ) {
$number = 0;
$successes = 0;
$errors = 0;
$skips = 0;
foreach ( $args as $file ) {
++$number;
if ( 0 === $number % self::WP_CLEAR_OBJECT_CACHE_INTERVAL ) {
Expand All @@ -379,6 +383,16 @@ public function import( $args, $assoc_args = array() ) {
++$errors;
continue;
}
if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) {
$existing = $this->find_duplicate_attachment( Utils\basename( $file ) );
if ( false !== $existing ) {
if ( ! $porcelain ) {
WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." );
}
++$skips;
continue;
}
}
if ( Utils\get_flag_value( $assoc_args, 'skip-copy' ) ) {
$tempfile = $file;
} else {
Expand All @@ -390,6 +404,16 @@ public function import( $args, $assoc_args = array() ) {
$file_time = @filemtime( $file );
}
} else {
if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) {
$existing = $this->find_duplicate_attachment( (string) explode( '?', Utils\basename( $file ), 2 )[0] );
if ( false !== $existing ) {
if ( ! $porcelain ) {
WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." );
}
++$skips;
Comment thread
swissspidy marked this conversation as resolved.
Outdated
continue;
}
}
$tempfile = download_url( $file );
if ( is_wp_error( $tempfile ) ) {
WP_CLI::warning(
Expand Down Expand Up @@ -542,7 +566,7 @@ public function import( $args, $assoc_args = array() ) {

// Report the result of the operation
if ( ! Utils\get_flag_value( $assoc_args, 'porcelain' ) ) {
Utils\report_batch_operation_results( $noun, 'import', count( $args ), $successes, $errors );
Utils\report_batch_operation_results( $noun, 'import', count( $args ), $successes, $errors, Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ? $skips : null );
} elseif ( $errors ) {
WP_CLI::halt( 1 );
}
Expand Down Expand Up @@ -692,6 +716,34 @@ private function make_copy( $path ) {
return $filename;
}

/**
* Finds an existing attachment whose basename matches the given filename.
*
* Searches the `_wp_attached_file` post meta, which stores the path relative to
* the uploads directory (e.g. '2026/03/image.jpg' or just 'image.jpg'). Matches
* the first attachment found when multiple files share the same basename across
* different upload subdirectories.
*
* @param string $basename Filename basename to search for (e.g. 'image.jpg').
* @return int|false Attachment ID if found, false otherwise.
*/
private function find_duplicate_attachment( $basename ) {
global $wpdb;

$slash_basename = '/' . $basename;

$result = $wpdb->get_var(
$wpdb->prepare(
"SELECT post_id FROM {$wpdb->postmeta} WHERE meta_key = '_wp_attached_file' AND (meta_value = %s OR RIGHT(meta_value, %d) = %s) LIMIT 1",
Comment thread
swissspidy marked this conversation as resolved.
Outdated
$basename,
mb_strlen( $slash_basename, 'UTF-8' ),
Comment thread
swissspidy marked this conversation as resolved.
Outdated
$slash_basename
)
);

return $result ? (int) $result : false;
}

/**
* Returns a human-readable description for one or more image size names.
*
Expand Down
Loading