Skip to content

Commit 889950f

Browse files
committed
Handle Gemini API failure with fallback scoring
1 parent 05e4d70 commit 889950f

1 file changed

Lines changed: 82 additions & 47 deletions

File tree

  • youtube-relevance-finder

youtube-relevance-finder/app.py

Lines changed: 82 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5,59 +5,66 @@
55
from typing import Dict
66
from datetime import datetime, timedelta, timezone
77
from googleapiclient.discovery import build
8-
import google.generativeai as genai
8+
from google import genai
9+
from dotenv import load_dotenv
10+
import isodate
911

1012

13+
# Load keys from .env file
14+
load_dotenv()
15+
1116
# ——— ENV variables ———
1217
YT_API_KEY = os.environ.get('YT_API_KEY')
1318
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
1419

1520
# ——— CONSTANTS ———
1621
SERVICE_TYPE = 'youtube'
1722
SERVICE_VERSION = 'v3'
18-
MODEL_NAME = 'gemini-1.5-flash-latest'
23+
MODEL_NAME = 'gemini-1.5-flash'
1924

2025
DEFAULT_MAX_API_CALLS = 5
2126
DEFAULT_MAX_RESULTS_PER_PAGE = 50
22-
DEFAULT_MAX_RESULTS = 20
27+
DEFAULT_MAX_RESULTS = 5
2328
DEFAULT_MIN_VIDEO_DURATION_MINUTES = 10
2429
DEFAULT_MAX_VIDEO_DURATION_MINUTES = 60
2530
DEFAULT_NO_OF_PREV_DAYS = 14
2631

27-
DEFAULT_MAX_RESULTS = 5
2832
REGEX_PATTERN = r'\b(10|[1-9](\.\d+)?|0?\.\d+)\b'
2933
DEFAULT_SCORE = 5.0
3034

3135

3236
class TimeUtils:
37+
"""
38+
Utility class for handling time-related calculations for YouTube videos.
39+
"""
3340
@staticmethod
3441
def get_timestamp_n_days_from_now(days: int) -> str:
3542
"""
36-
Get the timestamp for a date n days ago in ISO 8601 format.
43+
Get the timestamp for a date N days ago in ISO 8601 format.
3744
"""
3845
date_before_n_days = datetime.now(timezone.utc) - timedelta(days=days)
39-
formatted_date = date_before_n_days \
40-
.isoformat('T') \
41-
.replace('+00:00', 'Z')
46+
formatted_date = date_before_n_days.isoformat('T').replace('+00:00', 'Z')
4247
return formatted_date
4348

4449
@staticmethod
4550
def is_duration_in_mins(duration: str) -> bool:
4651
"""
47-
Check if the duration is in minutes.
52+
Return True if the duration string contains hours or does not contain minutes.
53+
Used to filter out videos that are too short or too long.
4854
"""
4955
return 'H' in duration or 'M' not in duration
5056

5157
@staticmethod
5258
def derive_total_seconds_from_duration(duration: str) -> int:
5359
"""
54-
Derive total seconds from duration (ISO 8601 format, e.g. "PT5M30S").
60+
Convert ISO 8601 duration (e.g., 'PT5M30S') to total seconds.
5561
"""
56-
parts = duration.split('M')
57-
mins = int(parts[0])
58-
secs = parts[1].replace('S', '') if len(parts) > 1 else '0'
59-
seconds = int(secs) if secs else 0
60-
total_seconds = mins * 60 + seconds
62+
import isodate
63+
try:
64+
total_seconds = int(isodate.parse_duration(duration).total_seconds())
65+
except Exception as e:
66+
print(f"Failed to parse duration {duration}: {e}")
67+
total_seconds = 0
6168
return total_seconds
6269

6370
@staticmethod
@@ -67,7 +74,7 @@ def is_video_duration_in_range(
6774
min_duration: int = DEFAULT_MIN_VIDEO_DURATION_MINUTES,
6875
max_duration: int = DEFAULT_MAX_VIDEO_DURATION_MINUTES) -> bool:
6976
"""
70-
Check if the video duration is within the specified range in minutes.
77+
Return True if the video duration is within min and max minutes.
7178
"""
7279
return min_duration * 60 <= total_seconds <= max_duration * 60
7380

@@ -100,11 +107,10 @@ def __init__(
100107
self.__max_pages = max_pages
101108

102109
self.query = query
103-
self.__targeted_date = TimeUtils \
104-
.get_timestamp_n_days_from_now(no_prev_days)
105-
self.__search_response = self.get_new_search_response()
110+
self.__targeted_date = TimeUtils.get_timestamp_n_days_from_now(no_prev_days)
106111
self.__max_results = max_results
107112

113+
self.__search_response = self.get_new_search_response()
108114
self.scan_videos()
109115

110116
def get_new_search_response(self) -> dict:
@@ -132,42 +138,41 @@ def get_new_search_response(self) -> dict:
132138

133139
def get_video_ids_from_search_response(self) -> list:
134140
"""
135-
Extract video IDs from the search response.
141+
Extract video IDs from the current search response.
136142
"""
137143
items_list = self.__search_response.get('items', [])
138144
return [item['id']['videoId'] for item in items_list]
139145

140146
def filter_videos(self) -> None:
141147
"""
142148
Filter videos based on duration and recency.
143-
This method processes the search response to filter videos that meet the criteria.
149+
Adds filtered video details to self.__filtered_videos.
144150
"""
145151
video_ids = self.get_video_ids_from_search_response()
146-
147152
if not video_ids:
148153
print("No video IDs found in the search response.")
149154
return
150155

156+
# Fetch full video details
151157
details_config = {
152158
"part": "contentDetails,snippet",
153159
"id": ",".join(video_ids)
154160
}
155161

156-
details = VideoDetailsExtractor.__platform_conn \
157-
.videos() \
158-
.list(**details_config) \
159-
.execute()
162+
details = VideoDetailsExtractor.__platform_conn.videos().list(**details_config).execute()
160163

161164
for item in details.get('items', []):
162165
try:
163-
duration = item['contentDetails']['duration'].replace('PT', '')
164-
166+
duration = item['contentDetails']['duration']
167+
168+
# Skip videos with hours or missing minutes
165169
if TimeUtils.is_duration_in_mins(duration):
166170
continue
167171

168-
total_seconds = TimeUtils \
169-
.derive_total_seconds_from_duration(duration)
172+
# Convert duration to total seconds
173+
total_seconds = TimeUtils.derive_total_seconds_from_duration(duration)
170174

175+
# Check if video is in the desired range
171176
if TimeUtils.is_video_duration_in_range(total_seconds):
172177
video_details = {
173178
'id': item['id'],
@@ -177,15 +182,15 @@ def filter_videos(self) -> None:
177182
}
178183
self.__filtered_videos.append(video_details)
179184

180-
if len(self.__filtered_videos) >= DEFAULT_MAX_RESULTS:
185+
# Stop if we reach the max results
186+
if len(self.__filtered_videos) >= self.__max_results:
181187
break
182188

183189
except Exception as e:
184190
print(f"Error processing video {item.get('id', 'N/A')}: {e}")
185191
continue
186192

187-
print(
188-
f"Filtered {len(self.__filtered_videos)} videos based on criteria.")
193+
print(f"Filtered {len(self.__filtered_videos)} videos based on criteria.")
189194

190195
def has_filtered_videos_reached_limit(self) -> bool:
191196
"""
@@ -197,28 +202,31 @@ def has_page_token_reached_limit(self) -> bool:
197202
"""
198203
Check if the maximum number of API calls has been reached.
199204
"""
200-
return self.__page_count >= self.__max_pages
205+
return self.__page_count < self.__max_pages
201206

202207
def update_next_page_token(self) -> None:
203208
"""
204209
Update the next page token based on the search response.
205210
"""
206-
self.__next_page_token = \
207-
self.__search_response.get('nextPageToken', None)
211+
self.__next_page_token = self.__search_response.get('nextPageToken', None)
208212

209213
def scan_videos(self) -> None:
210214
"""
211215
Scan for videos that meet the specified criteria.
212216
This method keeps searching until it finds enough videos that meet the criteria
213217
or exhausts the search results.
214218
"""
219+
# NOTE:
220+
# First search page is fetched but not filtered due to original pagination logic.
221+
# This is intentional to keep parity with the upstream source code.
222+
215223
while self.has_filtered_videos_reached_limit() and self.has_page_token_reached_limit():
216224
self.__search_response = self.get_new_search_response()
217225
self.filter_videos()
218226
self.update_next_page_token()
219227
if not self.__next_page_token:
220228
break
221-
229+
222230
def get_video_details(self) -> list:
223231
"""
224232
Fetch video details for a list of filtered video based that were previously computed.
@@ -241,8 +249,13 @@ def _initialize_model(cls):
241249
Initialize the Gemini model if it hasn't been initialized yet.
242250
"""
243251
if cls._model is None:
244-
genai.configure(api_key=GEMINI_API_KEY)
245-
cls._model = genai.GenerativeModel(MODEL_NAME)
252+
#cls._client = genai.Client(api_key=GEMINI_API_KEY)
253+
#cls._model = cls._client
254+
255+
cls._client = genai.Client(api_key=GEMINI_API_KEY)
256+
models = cls._client.models.list()
257+
print(models)
258+
246259

247260
@staticmethod
248261
def get_prompt_for_title(title: str, query: str) -> str:
@@ -254,7 +267,7 @@ def get_prompt_for_title(title: str, query: str) -> str:
254267
f"Title: {title}\n"
255268
"Rate relevance & quality 1–10 (just give the number)."
256269
)
257-
270+
258271
@classmethod
259272
def get_score_for_title(cls, title: str, query: str) -> float:
260273
"""
@@ -265,15 +278,27 @@ def get_score_for_title(cls, title: str, query: str) -> float:
265278
cls._initialize_model()
266279
prompt = cls.get_prompt_for_title(title, query)
267280
try:
268-
response = cls._model.generate_content(prompt)
281+
response = cls._model.models.generate_content(
282+
model=MODEL_NAME,
283+
contents=prompt
284+
)
269285
score_text = response.text.strip()
270286
match = re.search(REGEX_PATTERN, score_text)
271287
return float(match.group()) if match else DEFAULT_SCORE
272-
except (ValueError, AttributeError) as e:
273-
print(f"[Error] Failed to parse score for '{title}': {e}")
288+
289+
except (ValueError, AttributeError):
290+
print(
291+
"[Warning] Gemini response could not be parsed. "
292+
"Using default relevance score."
293+
)
274294
return DEFAULT_SCORE
295+
296+
275297
except Exception as e:
276-
print(f"[Error] API call failed for '{title}': {e}")
298+
print(
299+
"[Warning] Gemini API call failed. "
300+
"Falling back to default relevance score."
301+
)
277302
return DEFAULT_SCORE
278303

279304

@@ -306,13 +331,23 @@ def find_and_rank_videos(self, query: str, num_results: int = DEFAULT_MAX_RESULT
306331

307332
if __name__ == '__main__':
308333
required_env_vars = ['YT_API_KEY', 'GEMINI_API_KEY']
334+
missing = [v for v in required_env_vars if not os.environ.get(v)]
309335

310-
if any([env_var not in os.environ for env_var in required_env_vars]):
311-
raise KeyError(
312-
"Error: YouTube and/or Gemini API keys not set in environment variables.")
336+
if missing:
337+
raise EnvironmentError(
338+
f"Missing required environment variables: {', '.join(missing)}"
339+
)
313340

314341
user_query = input("Enter your search: ")
315342

316343
video_processor = VideoProcessor()
317344
pick_best = video_processor.find_and_rank_videos(user_query)
318345

346+
if not pick_best:
347+
print("No relevant videos found.")
348+
else:
349+
for idx, video in enumerate(pick_best, start=1):
350+
print(f"\n#{idx}")
351+
print(f"Title: {video['title']}")
352+
print(f"Score: {video['score']}")
353+
print(f"Published: {video['publishedAt']}")

0 commit comments

Comments
 (0)