|
1 | | -from typing import List |
| 1 | +from typing import List, Union |
2 | 2 |
|
3 | 3 | from mindee.error import MindeeError |
4 | 4 | from mindee.extraction import ExtractedImage, extract_multiple_images_from_source |
5 | | -from mindee.geometry import Polygon |
| 5 | +from mindee.geometry import Point, Polygon |
6 | 6 | from mindee.input.sources.local_input_source import LocalInputSource |
7 | 7 | from mindee.parsing.v2.field import FieldLocation |
| 8 | +from mindee.v2.file_operations.crop_files import CropFiles |
8 | 9 | from mindee.v2.product.crop.crop_box import CropBox |
9 | 10 |
|
10 | 11 |
|
11 | | -class Crop: |
12 | | - """Crop operations for V2.""" |
13 | | - |
14 | | - @classmethod |
15 | | - def extract_single_crop( |
16 | | - cls, input_source: LocalInputSource, crop: FieldLocation |
17 | | - ) -> ExtractedImage: |
18 | | - """ |
19 | | - Extracts a single crop as complete PDFs from the document. |
20 | | -
|
21 | | - :param input_source: Local Input Source to extract sub-receipts from. |
22 | | - :param crop: Crop to extract. |
23 | | - :return: ExtractedImage. |
24 | | - """ |
25 | | - |
26 | | - return extract_multiple_images_from_source( |
27 | | - input_source, crop.page, [crop.polygon] |
28 | | - )[0] |
29 | | - |
30 | | - @classmethod |
31 | | - def extract_crops( |
32 | | - cls, input_source: LocalInputSource, crops: List[CropBox] |
33 | | - ) -> List[ExtractedImage]: |
34 | | - """ |
35 | | - Extracts individual receipts from multi-receipts documents. |
36 | | -
|
37 | | - :param input_source: Local Input Source to extract sub-receipts from. |
38 | | - :param crops: List of crops. |
39 | | - :return: Individual extracted receipts as an array of ExtractedImage. |
40 | | - """ |
41 | | - images: List[ExtractedImage] = [] |
42 | | - if not crops: |
43 | | - raise MindeeError("No possible candidates found for Crop extraction.") |
44 | | - polygons: List[List[Polygon]] = [[] for _ in range(input_source.page_count)] |
45 | | - for i, crop in enumerate(crops): |
46 | | - polygons[crop.location.page].append(crop.location.polygon) |
47 | | - for i, polygon in enumerate(polygons): |
48 | | - images.extend( |
49 | | - extract_multiple_images_from_source( |
50 | | - input_source, |
51 | | - i, |
52 | | - polygon, |
53 | | - ) |
| 12 | +def extract_single_crop( |
| 13 | + input_source: LocalInputSource, crop: FieldLocation |
| 14 | +) -> ExtractedImage: |
| 15 | + """ |
| 16 | + Extracts a single crop as complete PDFs from the document. |
| 17 | +
|
| 18 | + :param input_source: Local Input Source to extract sub-receipts from. |
| 19 | + :param crop: Crop to extract. |
| 20 | + :return: ExtractedImage. |
| 21 | + """ |
| 22 | + |
| 23 | + polygons: List[Union[Polygon, List[Point]]] = [crop.polygon] |
| 24 | + return extract_multiple_images_from_source(input_source, crop.page, polygons)[0] |
| 25 | + |
| 26 | + |
| 27 | +def extract_crops(input_source: LocalInputSource, crops: List[CropBox]) -> CropFiles: |
| 28 | + """ |
| 29 | + Extracts individual receipts from multi-receipts documents. |
| 30 | +
|
| 31 | + :param input_source: Local Input Source to extract sub-receipts from. |
| 32 | + :param crops: List of crops. |
| 33 | + :return: Individual extracted receipts as an array of ExtractedImage. |
| 34 | + """ |
| 35 | + images: List[ExtractedImage] = [] |
| 36 | + if not crops: |
| 37 | + raise MindeeError("No possible candidates found for Crop extraction.") |
| 38 | + polygons: List[List[Union[Polygon, List[Point]]]] = [ |
| 39 | + [] for _ in range(input_source.page_count) |
| 40 | + ] |
| 41 | + for i, crop in enumerate(crops): |
| 42 | + polygons[crop.location.page].append(crop.location.polygon) |
| 43 | + for i, polygon in enumerate(polygons): |
| 44 | + images.extend( |
| 45 | + extract_multiple_images_from_source( |
| 46 | + input_source, |
| 47 | + i, |
| 48 | + polygon, |
54 | 49 | ) |
55 | | - return images |
56 | | - |
57 | | - @classmethod |
58 | | - def apply( |
59 | | - cls, |
60 | | - input_source: LocalInputSource, |
61 | | - crops: List[CropBox], |
62 | | - ) -> List[ExtractedImage]: |
63 | | - """Crop a document into multiple pages. |
64 | | -
|
65 | | - :param input_source: Input source to crop. |
66 | | - :param crops: List of crops. |
67 | | - """ |
68 | | - |
69 | | - return cls.extract_crops(input_source, crops) |
| 50 | + ) |
| 51 | + return CropFiles(images) |
0 commit comments