|
| 1 | +from datetime import datetime, timezone |
| 2 | +import logging |
| 3 | +import re |
| 4 | +from solesearch_api.models.enums import Audience, Platform |
| 5 | +from solesearch_api.models.sneaker import Image, Link, Sneaker |
| 6 | +from solesearch_api.tasks.db.base import create_or_update_sneaker |
| 7 | +from solesearch_api.tasks.scraping.retail.adidas.base import AdidasScrapingTask |
| 8 | +from solesearch_api.tasks.scraping.task_registry import register_scraping_task |
| 9 | +from solesearch_api.utils.extractors import next_json_extractor |
| 10 | + |
| 11 | +logger = logging.getLogger(__name__) |
| 12 | + |
| 13 | + |
| 14 | +class AdidasYouthBaseScrapingTask(AdidasScrapingTask): |
| 15 | + def __init__(self, *args, **kwargs): |
| 16 | + super().__init__( |
| 17 | + extractor=next_json_extractor, |
| 18 | + *args, |
| 19 | + **kwargs, |
| 20 | + ) |
| 21 | + |
| 22 | + def scrape(self, session, *args, **kwargs): |
| 23 | + json_data = self.get_json() |
| 24 | + |
| 25 | + # Process each product in the products array |
| 26 | + for product in json_data.get("products", []): |
| 27 | + sneakers = self.json_to_sneakers(product) |
| 28 | + for sneaker in sneakers: |
| 29 | + create_or_update_sneaker(session, sneaker) |
| 30 | + session.commit() |
| 31 | + |
| 32 | + def json_to_sneakers(self, data: dict) -> list[Sneaker] | None: |
| 33 | + sneakers = [] |
| 34 | + sku = data.get("id", "").strip() |
| 35 | + if not sku: |
| 36 | + logger.warning(f"No SKU found for {self.brand} product: {data}") |
| 37 | + return None |
| 38 | + |
| 39 | + price = data.get("priceData", {}).get("price") |
| 40 | + if price is not None: |
| 41 | + price = price * 100 |
| 42 | + |
| 43 | + sneaker = Sneaker( |
| 44 | + source=Platform.RETAIL, |
| 45 | + brand=self.brand, |
| 46 | + name=data.get("title"), |
| 47 | + sku=sku, |
| 48 | + parent_sku=data.get("modelNumber"), |
| 49 | + audience=self.audience, |
| 50 | + retail_price=price, |
| 51 | + ) |
| 52 | + |
| 53 | + sneaker_slug = data.get("url", "").strip() |
| 54 | + if sneaker_slug: |
| 55 | + sneaker_link = Link( |
| 56 | + url=f"https://www.adidas.com{sneaker_slug}", |
| 57 | + platform=Platform.RETAIL, |
| 58 | + ) |
| 59 | + sneaker.links.append(sneaker_link) |
| 60 | + |
| 61 | + for index, image in enumerate(data.get("images", [])): |
| 62 | + image_url = re.sub(r"images/[^/]+/", "images/", image.get("src")) |
| 63 | + position = image.get("metadata", {}).get("sortOrder") |
| 64 | + position = int(position) if position else index |
| 65 | + image = Image( |
| 66 | + platform=Platform.RETAIL, |
| 67 | + position=position, |
| 68 | + url=image_url, |
| 69 | + ) |
| 70 | + sneaker.images.append(image) |
| 71 | + |
| 72 | + sneakers.append(sneaker) |
| 73 | + |
| 74 | + for variant_sku in data.get("colourVariations", []): |
| 75 | + variant_sku = variant_sku.strip() |
| 76 | + if not variant_sku: |
| 77 | + continue |
| 78 | + |
| 79 | + variant_sneaker = Sneaker( |
| 80 | + source=Platform.RETAIL, |
| 81 | + brand=self.brand, |
| 82 | + name=data.get("title"), |
| 83 | + sku=variant_sku, |
| 84 | + parent_sku=data.get("modelNumber"), |
| 85 | + audience=self.audience, |
| 86 | + retail_price=price, |
| 87 | + ) |
| 88 | + |
| 89 | + variant_sneaker_slug = data.get("url", "").strip() |
| 90 | + if variant_sneaker_slug: |
| 91 | + variant_sneaker_slug = variant_sneaker_slug.split("/")[:-1] + [ |
| 92 | + f"{variant_sku}.html" |
| 93 | + ] |
| 94 | + variant_sneaker_slug = "/".join(variant_sneaker_slug) |
| 95 | + if not variant_sneaker_slug.startswith("/"): |
| 96 | + variant_sneaker_slug = f"/{variant_sneaker_slug}" |
| 97 | + variant_sneaker_link = Link( |
| 98 | + url=f"https://www.adidas.com{variant_sneaker_slug}", |
| 99 | + platform=Platform.RETAIL, |
| 100 | + ) |
| 101 | + variant_sneaker.links.append(variant_sneaker_link) |
| 102 | + |
| 103 | + sneakers.append(variant_sneaker) |
| 104 | + |
| 105 | + return sneakers |
| 106 | + |
| 107 | + |
| 108 | +class AdidasBabyAndToddlerScrapingTask(AdidasYouthBaseScrapingTask): |
| 109 | + audience = Audience.TODDLER |
| 110 | + |
| 111 | + def __init__(self): |
| 112 | + super().__init__( |
| 113 | + download_url="https://www.adidas.com/us/kids-infant_toddler-shoes" |
| 114 | + ) |
| 115 | + |
| 116 | + |
| 117 | +class AdidasChildrenAndLittleKidsScrapingTask(AdidasYouthBaseScrapingTask): |
| 118 | + audience = Audience.PRESCHOOL |
| 119 | + |
| 120 | + def __init__(self): |
| 121 | + super().__init__(download_url="https://www.adidas.com/us/children-shoes") |
| 122 | + |
| 123 | + |
| 124 | +class AdidasYouthAndBigKidsScrapingTask(AdidasYouthBaseScrapingTask): |
| 125 | + audience = Audience.GRADE_SCHOOL |
| 126 | + |
| 127 | + def __init__(self): |
| 128 | + super().__init__(download_url="https://www.adidas.com/us/youth-shoes") |
| 129 | + |
| 130 | + |
| 131 | +adidas_baby_and_toddler_task = register_scraping_task(AdidasBabyAndToddlerScrapingTask) |
| 132 | +adidas_children_and_little_kids_task = register_scraping_task( |
| 133 | + AdidasChildrenAndLittleKidsScrapingTask |
| 134 | +) |
| 135 | +adidas_youth_and_big_kids_task = register_scraping_task( |
| 136 | + AdidasYouthAndBigKidsScrapingTask |
| 137 | +) |
0 commit comments