77
88use Codewithkyrian \Transformers \Tensor \Tensor ;
99use Codewithkyrian \Transformers \Utils \Image ;
10+ use Exception ;
1011use Imagine \Image \Point ;
1112
1213class ImageFeatureExtractor extends FeatureExtractor
1314{
14- /**
15- * The mean values for image normalization.
16- * @var int|int[]
17- */
15+ /** The mean values for image normalization. */
1816 protected int |array |null $ imageMean ;
1917
20- /**
21- * The standard deviation values for image normalization.
22- * @var int|int[]
23- */
18+ /** The standard deviation values for image normalization. */
2419 protected int |array |null $ imageStd ;
2520
26- /*
27- * What method to use for resampling.
28- */
21+ /* What method to use for resampling. */
2922 protected int $ resample ;
3023
31- /**
32- * Whether to rescale the image pixel values to the [0,1] range.
33- * @var bool
34- */
24+ /** Whether to rescale the image pixel values to the [0,1] range. */
3525 protected bool $ doRescale ;
3626
37- /**
38- * The factor to use for rescaling the image pixel values.
39- * @var float
40- */
27+ /** The factor to use for rescaling the image pixel values. */
4128 protected float $ rescaleFactor ;
4229
43- /**
44- * Whether to normalize the image pixel values.
45- * @var ?bool
46- */
30+ /** Whether to normalize the image pixel values. */
4731 protected ?bool $ doNormalize ;
4832
49- /**
50- * Whether to resize the image.
51- * @var ?bool
52- */
33+ /** Whether to resize the image. */
5334 protected ?bool $ doResize ;
5435
36+ /** The size to resize the image to. */
5537 protected ?bool $ doThumbnail ;
5638
57- /**
58- * The size to resize the image to.
59- * @var ?array
60- */
39+ /** The size to resize the image to. */
6140 protected ?array $ size ;
6241 protected mixed $ sizeDivisibility ;
6342 protected ?bool $ doCenterCrop ;
@@ -102,18 +81,20 @@ public function __construct(public array $config)
10281
10382 /**
10483 * Pad the image by a certain amount.
84+ *
10585 * @param Tensor $imageTensor The pixel data to pad.
10686 * @param int[]|int $padSize The dimensions of the padded image.
10787 * @param string $mode The type of padding to add.
10888 * @param bool $center Whether to center the image.
10989 * @param int $constantValues The constant value to use for padding.
90+ *
11091 * @return Tensor The padded pixel data and image dimensions.
111- * @throws \ Exception
92+ * @throws Exception
11293 */
11394 public function padImage (
11495 Tensor $ imageTensor ,
11596 int |array $ padSize ,
116- string $ tensorFormat = 'CHW ' , // 'HWC' or 'CHW
97+ string $ tensorFormat = 'CHW ' , // 'HWC' or 'CHW
11798 string $ mode = 'constant ' ,
11899 bool $ center = false ,
119100 int $ constantValues = 0
@@ -170,7 +151,7 @@ public function padImage(
170151
171152 if ($ mode === 'symmetric ' ) {
172153 if ($ center ) {
173- throw new \ Exception ('`center` padding is not supported when `mode` is set to `symmetric`. ' );
154+ throw new Exception ('`center` padding is not supported when `mode` is set to `symmetric`. ' );
174155 // TODO: Implement this
175156 }
176157 $ h1 = $ imageHeight - 1 ;
@@ -210,9 +191,12 @@ private function calculateReflectOffset(int $val, int $max): int
210191 /**
211192 * Find the target (width, height) dimension of the output image after
212193 * resizing given the input image and the desired size.
194+ *
213195 * @param Image $image The image to be resized.
214196 * @param int|array|null $size The size to use for resizing the image.
197+ *
215198 * @return array The target (width, height) dimension of the output image after resizing.
199+ * @throws Exception
216200 */
217201 public function getResizeOutputImageSize (Image $ image , int |array |null $ size ): array
218202 {
@@ -286,7 +270,7 @@ public function getResizeOutputImageSize(Image $image, int|array|null $size): ar
286270 } elseif ($ this ->sizeDivisibility != null ) {
287271 return $ this ->enforceSizeDivisibility ([$ srcWidth , $ srcHeight ], $ this ->sizeDivisibility );
288272 } else {
289- throw new \ Exception ("Could not resize image due to unsupported 'size' parameter passed: " . json_encode ($ size ));
273+ throw new Exception ("Could not resize image due to unsupported 'size' parameter passed: " . json_encode ($ size ));
290274 }
291275 }
292276
@@ -295,12 +279,13 @@ public function getResizeOutputImageSize(Image $image, int|array|null $size): ar
295279 * Preprocesses the given image.
296280 *
297281 * @param Image $image The image to preprocess.
298- * @param ?bool $doNormalize
299- * @param ?bool $doPad
300- * @param ?bool $doConvertRGB
301- * @param ?bool $doConvertGrayscale
282+ * @param ?bool $doNormalize Whether to normalize the image.
283+ * @param ?bool $doPad Whether to pad the image.
284+ * @param ?bool $doConvertRGB Whether to convert the image to RGB.
285+ * @param ?bool $doConvertGrayscale Whether to convert the image to grayscale.
286+ *
302287 * @return array The preprocessed image.
303- * @throws \ Exception
288+ * @throws Exception
304289 */
305290 public function preprocess (
306291 Image $ image ,
@@ -316,7 +301,6 @@ public function preprocess(
316301 $ image = $ image ->cropMargin ();
317302 }
318303
319-
320304 $ originalInputSize = $ image ->size (); // original image size
321305
322306 // Convert image to RGB if specified in config.
@@ -348,7 +332,7 @@ public function preprocess(
348332 $ cropHeight = $ this ->cropSize ['height ' ];
349333 }
350334
351- $ image = $ image ->centerCrop ($ cropWidth , $ cropHeight );
335+ $ image = $ image ->centerCrop ($ cropWidth , $ cropHeight );
352336 }
353337
354338 $ reshapedInputSize = $ image ->size ();
@@ -362,7 +346,7 @@ public function preprocess(
362346 if ($ doNormalize ?? $ this ->doNormalize ) {
363347 if (is_array ($ this ->imageMean )) {
364348 // Negate the mean values to add instead of subtract
365- $ negatedMean = array_map (fn ($ mean ) => -$ mean , $ this ->imageMean );
349+ $ negatedMean = array_map (fn ($ mean ) => -$ mean , $ this ->imageMean );
366350 $ imageMean = Tensor::repeat ($ negatedMean , $ image ->height () * $ image ->width (), 1 );
367351 } else {
368352 $ imageMean = Tensor::fill ([$ image ->channels * $ image ->height () * $ image ->width ()], -$ this ->imageMean );
@@ -371,7 +355,7 @@ public function preprocess(
371355
372356 if (is_array ($ this ->imageStd )) {
373357 // Inverse the standard deviation values to multiple instead of divide
374- $ inversedStd = array_map (fn ($ std ) => 1 / $ std , $ this ->imageStd );
358+ $ inversedStd = array_map (fn ($ std ) => 1 / $ std , $ this ->imageStd );
375359 $ imageStd = Tensor::repeat ($ inversedStd , $ image ->height () * $ image ->width (), 1 );
376360 } else {
377361 $ imageStd = Tensor::fill ([$ image ->channels * $ image ->height () * $ image ->width ()], 1 / $ this ->imageStd );
@@ -383,7 +367,7 @@ public function preprocess(
383367 $ imageStd = $ imageStd ->reshape ($ imageTensor ->shape ());
384368
385369 if (count ($ imageMean ) !== $ image ->channels || count ($ imageStd ) !== $ image ->channels ) {
386- throw new \ Exception ("When set to arrays, the length of `imageMean` ( " . count ($ imageMean ) . ") and `imageStd` ( " . count ($ imageStd ) . ") must match the number of channels in the image ( {$ image ->channels }). " );
370+ throw new Exception ("When set to arrays, the length of `imageMean` ( " . count ($ imageMean ). ") and `imageStd` ( " . count ($ imageStd ). ") must match the number of channels in the image ( {$ image ->channels }). " );
387371 }
388372
389373 // Normalize pixel data
@@ -411,31 +395,26 @@ public function preprocess(
411395 * Calls the feature extraction process on an array of images,
412396 * preprocesses each image, and concatenates the resulting
413397 * features into a single Tensor.
398+ *
414399 * @param Image|Image[] $images The image(s) to extract features from.
415400 * @param mixed ...$args Additional arguments.
401+ *
416402 * @return array An object containing the concatenated pixel values (and other metadata) of the preprocessed images.
417403 */
418404 public function __invoke (Image |array $ images , ...$ args ): array
419405 {
420- // Ensure $images is an array
421406 if (!is_array ($ images )) {
422407 $ images = [$ images ];
423408 }
424409
425- // Preprocess each image
426- $ imageData = [];
427- foreach ($ images as $ image ) {
428- $ imageData [] = $ this ->preprocess ($ image );
429- }
410+ $ imageData = array_map ([$ this , 'preprocess ' ], $ images );
430411
431412 $ pixelValues = array_column ($ imageData , 'pixel_values ' );
432413 $ originalSizes = array_column ($ imageData , 'original_size ' );
433414 $ reshapedInputSizes = array_column ($ imageData , 'reshaped_input_size ' );
434415
435- $ stackedPixelValues = Tensor::stack ($ pixelValues , 0 );
436-
437416 return [
438- 'pixel_values ' => $ stackedPixelValues ,
417+ 'pixel_values ' => Tensor:: stack ( $ pixelValues ) ,
439418 'original_sizes ' => $ originalSizes ,
440419 'reshaped_input_sizes ' => $ reshapedInputSizes
441420 ];
0 commit comments