Skip to content

Commit af13ae9

Browse files
Adidas paginated
1 parent 41cc64b commit af13ae9

12 files changed

Lines changed: 337 additions & 176 deletions

File tree

src/solesearch_api/main.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,67 @@ async def swagger_ui_html():
8080
@app.get("/", include_in_schema=False)
8181
def redirect_to_docs():
8282
return RedirectResponse(url="/docs")
83+
84+
85+
@app.get("/investigate", include_in_schema=False)
86+
async def investigate_url(url: str, extractor_type: str = "next_json_extractor"):
87+
"""
88+
Endpoint to trigger an investigation download for any URL.
89+
This will download the HTML and JSON data and store it in the Investigation folder.
90+
91+
Args:
92+
url: The URL to investigate
93+
extractor_type: Type of extractor to use (default: "json")
94+
"""
95+
try:
96+
# Import the appropriate extractor
97+
# This should be equivalent to:
98+
# from solesearch_api.utils.extractors import next_json_extractor as extractor
99+
import solesearch_api.utils.extractors as extractor_module
100+
101+
extractor = getattr(extractor_module, extractor_type)
102+
103+
# Create an instance of the investigation task
104+
from solesearch_api.tasks.scraping.investigation import GenericInvestigationTask
105+
106+
task_instance = GenericInvestigationTask(
107+
url=url,
108+
extractor=extractor,
109+
headers={
110+
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
111+
"accept-language": "en-US,en;q=0.9",
112+
"cache-control": "max-age=0",
113+
"priority": "u=0, i",
114+
"sec-ch-ua": '"Chromium";v="134", "Not:A-Brand";v="24", "Brave";v="134"',
115+
"sec-ch-ua-mobile": "?1",
116+
"sec-ch-ua-platform": '"Android"',
117+
"sec-fetch-dest": "document",
118+
"sec-fetch-mode": "navigate",
119+
"sec-fetch-site": "same-origin",
120+
"sec-fetch-user": "?1",
121+
"sec-gpc": "1",
122+
"upgrade-insecure-requests": "1",
123+
"Referer": "https://www.adidas.com/us/shoes",
124+
"Referrer-Policy": "strict-origin-when-cross-origin",
125+
},
126+
)
127+
128+
# Force download of HTML and JSON
129+
json_data = task_instance.get_json()
130+
131+
return {
132+
"status": "success",
133+
"message": f"Investigation data downloaded for URL: {url}",
134+
"json_size": len(str(json_data)),
135+
}
136+
except Exception as e:
137+
logfire.error(
138+
"Failed to download investigation data",
139+
url=url,
140+
extractor_type=extractor_type,
141+
error=str(e),
142+
)
143+
return {
144+
"status": "error",
145+
"message": f"Failed to download investigation data: {str(e)}",
146+
}

src/solesearch_api/models/sneaker.py

Lines changed: 47 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -12,73 +12,52 @@
1212

1313

1414
class SneakerBase(SQLModel):
15-
brand: str | None = None
15+
id: int | None = Field(default=None, primary_key=True)
1616
sku: str | None = None
17+
brand: str | None = None
1718
parent_sku: str | None = None
1819
name: str | None = None
1920
colorway: str | None = None
2021
audience: Audience | None = None
2122
release_date: datetime | None = None
2223
description: str | None = None
23-
retail_price: int | None = None # Monetary values stored as US cents
24+
retail_price: int | None = None
2425

2526

2627
class Sneaker(SneakerBase, TimestampedModel, table=True):
2728
__table_args__ = (
2829
UniqueConstraint("sku", "brand", name="uix_sneaker_sku_brand"),
2930
Index("ix_sneaker_sku_brand", "sku", "brand"),
3031
)
31-
id: int | None = Field(default=None, primary_key=True)
3232
last_observed: datetime | None = None
3333
stockx_id: str | None = None
3434
stadium_goods_id: str | None = None
3535
source: Platform | None = None
3636
meta: dict = Field(sa_type=JSONB, default_factory=dict)
3737

3838
# Relationships
39-
links: list["Link"] = Relationship(back_populates="sneaker", cascade_delete=True)
40-
images: list["Image"] = Relationship(back_populates="sneaker", cascade_delete=True)
41-
sizes: list["SneakerSize"] = Relationship(
39+
sneaker_links: list["Link"] = Relationship(
40+
back_populates="sneaker", cascade_delete=True
41+
)
42+
sneaker_images: list["Image"] = Relationship(
43+
back_populates="sneaker", cascade_delete=True
44+
)
45+
sneaker_sizes: list["SneakerSize"] = Relationship(
4246
back_populates="sneaker",
4347
cascade_delete=True,
4448
)
4549
nike_launches: list["NikeLaunch"] = Relationship(
4650
back_populates="sneaker", cascade_delete=True
4751
)
4852

49-
def get_links(self) -> list[str]:
50-
return [link.url for link in self.links]
51-
52-
def get_images(self) -> list[str]:
53-
return [image.url for image in sorted(self.images, key=lambda i: i.position)]
54-
55-
def get_sizes(
56-
self,
57-
size_standard: SizeStandard = SizeStandard.MENS_US,
58-
) -> list[str]:
59-
return [size.get_standardized(size_standard) for size in self.sizes]
60-
61-
@property
62-
def prices(self) -> list["Price"]:
63-
return list(reduce(lambda x, y: x.prices + y.prices, self.sizes, []))
64-
65-
def get_prices(self) -> list[int]:
66-
return [price.amount for price in self.prices]
67-
68-
def merge(self, other=None):
69-
if other:
70-
stockx_images = [
71-
img for img in other.images if img.platform == Platform.stockx
72-
]
73-
if stockx_images:
74-
self.images = stockx_images
75-
76-
if len(other.colorway) > len(self.colorway):
77-
self.colorway = other.colorway
78-
7953

8054
class SneakerPublic(SneakerBase):
81-
id: int
55+
sneaker_links: list["Link"] = Field(default_factory=list, exclude=True)
56+
sneaker_images: list["Image"] = Field(default_factory=list, exclude=True)
57+
58+
sneaker_sizes: list["SneakerSizePublic"] = Field(
59+
default_factory=list, alias="sizes"
60+
)
8261

8362
@computed_field
8463
@property
@@ -88,20 +67,17 @@ def retail_price_formatted(self) -> str | None:
8867
retail_price_in_dollars = Decimal(self.retail_price) / 100
8968
return f"${retail_price_in_dollars:.2f}"
9069

91-
@computed_field
92-
@property
93-
def links(self) -> list[str]:
94-
return self.get_links() if hasattr(self, "get_links") else []
95-
9670
@computed_field
9771
@property
9872
def images(self) -> list[str]:
99-
return self.get_images() if hasattr(self, "get_images") else []
73+
return [
74+
img.url for img in sorted(self.sneaker_images, key=lambda x: x.position)
75+
]
10076

10177
@computed_field
10278
@property
103-
def sizes(self) -> list[str]:
104-
return self.get_sizes() if hasattr(self, "get_sizes") else []
79+
def links(self) -> dict[Platform, str]:
80+
return {link.platform: link.url for link in self.sneaker_links}
10581

10682

10783
class PaginatedSneakersPublic(SQLModel):
@@ -129,9 +105,9 @@ class SneakerSize(TimestampedModel, table=True):
129105
meta: dict = Field(sa_type=JSONB, default_factory=dict)
130106

131107
sneaker_id: int | None = Field(default=None, foreign_key="sneaker.id")
132-
sneaker: Sneaker | None = Relationship(back_populates="sizes")
108+
sneaker: Sneaker | None = Relationship(back_populates="sneaker_sizes")
133109

134-
prices: list["Price"] = Relationship(back_populates="sneaker_size")
110+
sneaker_size_prices: list["Price"] = Relationship(back_populates="sneaker_size")
135111

136112

137113
class Price(SQLModel, table=True):
@@ -149,7 +125,9 @@ class Price(SQLModel, table=True):
149125
last_observed: datetime | None = None
150126

151127
sneaker_size_id: int | None = Field(default=None, foreign_key="sneakersize.id")
152-
sneaker_size: SneakerSize | None = Relationship(back_populates="prices")
128+
sneaker_size: SneakerSize | None = Relationship(
129+
back_populates="sneaker_size_prices"
130+
)
153131

154132
@property
155133
def in_dollars(self) -> Decimal:
@@ -167,7 +145,7 @@ class Link(TimestampedModel, table=True):
167145
url: str
168146

169147
sneaker_id: int | None = Field(default=None, foreign_key="sneaker.id")
170-
sneaker: Sneaker | None = Relationship(back_populates="links")
148+
sneaker: Sneaker | None = Relationship(back_populates="sneaker_links")
171149

172150

173151
class Image(TimestampedModel, table=True):
@@ -181,7 +159,7 @@ class Image(TimestampedModel, table=True):
181159
url: str
182160

183161
sneaker_id: int | None = Field(default=None, foreign_key="sneaker.id")
184-
sneaker: Sneaker | None = Relationship(back_populates="images")
162+
sneaker: Sneaker | None = Relationship(back_populates="sneaker_images")
185163

186164

187165
class NikeLaunch(SQLModel, table=True):
@@ -205,3 +183,22 @@ class NikeLaunch(SQLModel, table=True):
205183
merch_product_status: str | None = None
206184
is_launch_product: bool | None = None
207185
last_observed: datetime | None = None
186+
187+
188+
class SneakerSizePublic(SQLModel):
189+
value: str
190+
size_standard: SizeStandard
191+
sneaker_size_prices: list["PricePublic"] = []
192+
193+
194+
class PricePublic(SQLModel):
195+
platform: Platform | None = None
196+
amount: int
197+
198+
@computed_field
199+
@property
200+
def amount_formatted(self) -> str:
201+
return f"${Decimal(self.amount) / 100:.2f}"
202+
203+
last_observed: datetime | None = None
204+
first_observed: datetime | None = None

src/solesearch_api/routes/scrape.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
adidas_children_and_little_kids_task,
77
adidas_youth_and_big_kids_task,
88
adidas_new_releases_task,
9+
adidas_mens_sneakers_task,
10+
adidas_womens_sneakers_task,
911
nike_in_stock_scraping_task,
1012
nike_new_releases_task,
1113
)
@@ -43,6 +45,8 @@ async def scrape_retailer_new_releases(
4345
"adidas/baby": adidas_baby_and_toddler_task,
4446
"adidas/children": adidas_children_and_little_kids_task,
4547
"adidas/youth": adidas_youth_and_big_kids_task,
48+
"adidas/womens": adidas_womens_sneakers_task,
49+
"adidas/mens": adidas_mens_sneakers_task,
4650
}
4751
if task_name not in task_mapping:
4852
raise HTTPException(status_code=404, detail="Task not found")

src/solesearch_api/routes/sneakers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ async def get_sneakers(
7979
description="The order to sort in based on the sort key.",
8080
),
8181
] = SortOrder.DESCENDING,
82-
) -> Page[Sneaker]:
82+
) -> Page[SneakerPublic]:
8383
# set_items_transformer(sneaker_to_public)
8484
query = select(Sneaker)
8585

@@ -125,7 +125,7 @@ async def get_sneakers(
125125

126126
@router.get("/{product_id}", response_model=SneakerPublic)
127127
async def get_sneaker_by_id(*, db: Session = Depends(get_session), product_id: int):
128-
sneaker = db.get(Sneaker, product_id)
128+
sneaker = db.exec(select(Sneaker).where(Sneaker.id == product_id)).first()
129129
if not sneaker:
130130
raise HTTPException(status_code=404, detail="Sneaker not found")
131131
return sneaker

src/solesearch_api/tasks/db/base.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import logfire
22
from sqlmodel import Session, select
3+
from sqlalchemy.orm import joinedload
34

45
from solesearch_api.models.enums import Platform
56
from solesearch_api.models.sneaker import Sneaker
@@ -26,14 +27,22 @@ def create_or_update_sneaker(
2627
The created or updated sneaker object
2728
"""
2829
# Check if sneaker already exists with eager loading of relationships
29-
statement = select(Sneaker).where(
30-
Sneaker.sku == sneaker.sku,
31-
Sneaker.brand == sneaker.brand,
30+
statement = (
31+
select(Sneaker)
32+
.where(
33+
Sneaker.sku == sneaker.sku,
34+
Sneaker.brand == sneaker.brand,
35+
)
36+
.options(
37+
joinedload(Sneaker.sneaker_links),
38+
joinedload(Sneaker.sneaker_images),
39+
joinedload(Sneaker.sneaker_sizes),
40+
)
3241
)
3342
existing_sneaker = session.exec(statement).first()
3443

3544
if existing_sneaker:
36-
logfire.info(
45+
logfire.trace(
3746
f"{sneaker.brand} SKU '{sneaker.sku}' found, updating existing sneaker",
3847
sneaker=sneaker,
3948
existing_sneaker=existing_sneaker,
@@ -65,9 +74,9 @@ def create_or_update_sneaker(
6574

6675
# Add any new links
6776
existing_links = {
68-
link.platform: link.url for link in existing_sneaker.links
77+
link.platform: link.url for link in existing_sneaker.sneaker_links
6978
}
70-
for new_link in sneaker.links:
79+
for new_link in sneaker.sneaker_links:
7180
if existing_links.get(new_link.platform) == new_link.url:
7281
# Skip if we already have this exact link
7382
continue
@@ -84,14 +93,14 @@ def create_or_update_sneaker(
8493
else:
8594
# Only add if we don't have a link for this platform yet
8695
new_link.sneaker = existing_sneaker
87-
existing_sneaker.links.append(new_link)
96+
existing_sneaker.sneaker_links.append(new_link)
8897

8998
# Add any new images
9099
existing_images = {
91100
(image.platform, image.position): image.url
92-
for image in existing_sneaker.images
101+
for image in existing_sneaker.sneaker_images
93102
}
94-
for image in sneaker.images:
103+
for image in sneaker.sneaker_images:
95104
key = (image.platform, image.position)
96105
if existing_images.get(key) == image.url:
97106
# Skip if we already have this exact image
@@ -109,22 +118,22 @@ def create_or_update_sneaker(
109118
else:
110119
# Only add if we don't have an image for this platform/position yet
111120
image.sneaker = existing_sneaker
112-
existing_sneaker.images.append(image)
121+
existing_sneaker.sneaker_images.append(image)
113122

114123
# Add any new sizes
115124
existing_sizes = {
116125
(size.value, size.size_standard): size
117-
for size in existing_sneaker.sizes
126+
for size in existing_sneaker.sneaker_sizes
118127
}
119-
for size in sneaker.sizes:
128+
for size in sneaker.sneaker_sizes:
120129
key = (size.value, size.size_standard)
121130
if key in existing_sizes:
122131
# Skip if we already have this size
123132
continue
124133
else:
125134
# Only add if we don't have this size yet
126135
size.sneaker = existing_sneaker
127-
existing_sneaker.sizes.append(size)
136+
existing_sneaker.sneaker_sizes.append(size)
128137

129138
session.add(existing_sneaker)
130139
result = existing_sneaker

src/solesearch_api/tasks/scraping/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99
adidas_children_and_little_kids_task,
1010
adidas_youth_and_big_kids_task,
1111
)
12+
from solesearch_api.tasks.scraping.retail.adidas.mens_sneakers import (
13+
adidas_mens_sneakers_task,
14+
)
15+
from solesearch_api.tasks.scraping.retail.adidas.womens_sneakers import (
16+
adidas_womens_sneakers_task,
17+
)
1218

1319
# Nike Tasks
1420
from solesearch_api.tasks.scraping.retail.nike.new_releases import (

0 commit comments

Comments
 (0)