@@ -275,7 +275,7 @@ def main() -> int:
275275 )
276276 if not update_article_list .lower () in ["y" , "yes" ]:
277277 return 0
278- logging .info ("Starting article list update process." )
278+ logging .info ("Starting article list update process." )
279279 gc .collect () # 强制垃圾回收,释放内存
280280 success_count = 0
281281 fail_count = 0
@@ -304,38 +304,149 @@ def main() -> int:
304304 ):
305305 logging .error (f'Article list path is not a HTML file: "{ article_list_path } "' )
306306 return 1
307- title_list = []
308- path_list = []
309- for path in os .listdir (article_dir ):
307+
308+ # 收集所有文章信息(标题和路径)
309+ articles = [] # 列表元素为 (title, path)
310+ for filename in os .listdir (article_dir ):
310311 try :
311- if path .endswith (".html" ) or path .endswith (".htm" ):
312- article_path = os .path .join (article_dir , path )
313- path_list .append (article_path )
314- logging .info (f"Processing article: { article_path } " )
315- with open (article_path , "r" , encoding = "utf-8" ) as f :
316- article_content = f .read ()
317- title_els = html2 .fromstring (article_content ).xpath (".//h1" )
318- title_list .append (title_els [0 ].text_content () if title_els else "Untitled" )
312+ if not (filename .endswith (".html" ) or filename .endswith (".htm" )):
313+ continue
314+ article_path = os .path .join (article_dir , filename )
315+ logging .info (f"Processing article: { article_path } " )
316+ with open (article_path , "r" , encoding = "utf-8" ) as f :
317+ article_content = f .read ()
318+ title_els = html2 .fromstring (article_content ).xpath (".//h1" )
319+ title = title_els [0 ].text_content () if title_els else "Untitled"
320+ articles .append ((title , article_path ))
319321 except Exception as e :
320- logging .error (f"Failed to process article { path } : { e } " )
322+ logging .error (f"Failed to process article { filename } : { e } " )
321323 fail_count += 1
322- if not title_list :
324+
325+ if not articles :
323326 logging .error ("No valid articles found to update the list." )
324327 return 1
325- for title in title_list :
326- try :
327- title_list .append (f"<div class=\" card\" >\n <a href={ os .path .relpath (path_list [title_list .index (title )], os .path .dirname (article_list_path ))} >{ title } </a></div>" )
328- title_list .remove (title )
329- except Exception as e :
330- logging .error (f"Failed to create card for title { title } : { e } " )
331- fail_count += 1
332- else :
333- success_count += 1
334- title_list .sort ()
335- with open (article_list_path , "r" , encoding = "utf-8" ) as f :
336- list_content = f .read ()
337- with open (article_list_path , "w" , encoding = "utf-8" ) as f :
338- f .write (pretty_print_html (list_content .replace ("%%cards%%" , "\n " .join (title_list ))))
328+
329+ # 按标题排序
330+ articles .sort (key = lambda x : x [0 ])
331+
332+ # 生成卡片列表(每个卡片是一个div)
333+ cards = []
334+ base_dir = os .path .dirname (article_list_path )
335+ for title , path in articles :
336+ rel_path = os .path .relpath (path , base_dir )
337+ card = f'<div class="card"><a href="{ rel_path } ">{ title } </a></div>'
338+ cards .append (card )
339+ card_html = "\n " .join (cards )
340+
341+ # 读取文章列表文件,解析为HTML树
342+ try :
343+ with open (article_list_path , "r" , encoding = "utf-8" ) as f :
344+ list_content = f .read ()
345+ tree = html2 .document_fromstring (list_content )
346+ except Exception as e :
347+ logging .error (f"Failed to parse article list file: { e } " )
348+ return 1
349+
350+ # 删除所有 class 包含 "card" 的 div 元素
351+ for card_div in tree .xpath ('//div[contains(@class, "card")]' ):
352+ parent = card_div .getparent ()
353+ if parent is not None :
354+ parent .remove (card_div )
355+ logging .debug ("Removed an existing card div." )
356+
357+ # 查找占位符 %%card%% 所在的文本节点
358+ placeholder_found = False
359+ for element in tree .iter ():
360+ if element .text and "%%card%%" in element .text :
361+ # 将文本节点中的占位符替换为生成的卡片HTML(解析为元素后插入)
362+ before , after = element .text .split ("%%card%%" , 1 )
363+ element .text = before or None # 前半部分保留为text
364+ # 将卡片字符串解析为元素列表
365+ card_fragments = html2 .fragments_fromstring (card_html )
366+ # 在当前位置插入卡片元素
367+ pos = 0
368+ for frag in card_fragments :
369+ if isinstance (frag , str ):
370+ # 文本节点不能直接插入,需作为tail或新元素处理
371+ # 简单起见,将卡片整体作为HTML插入一个占位注释,然后替换
372+ # 但更好的方法是直接使用后续的replace逻辑
373+ pass
374+ else :
375+ element .insert (pos , frag )
376+ pos += 1
377+ # 处理剩余部分
378+ if after :
379+ # 如果after非空,作为tail添加到最后一个卡片元素,或创建新文本节点
380+ if card_fragments :
381+ last = card_fragments [- 1 ]
382+ if isinstance (last , str ):
383+ # 理论上不会出现
384+ pass
385+ else :
386+ if last .tail :
387+ last .tail = after + last .tail
388+ else :
389+ last .tail = after
390+ else :
391+ # 如果没有卡片,直接设置element的tail
392+ element .tail = after
393+ placeholder_found = True
394+ logging .debug ("Replaced placeholder %%card%% with cards." )
395+ break
396+ if element .tail and "%%card%%" in element .tail :
397+ # 处理tail中的占位符
398+ parent = element .getparent ()
399+ if parent is None :
400+ continue
401+ before , after = element .tail .split ("%%card%%" , 1 )
402+ element .tail = before or None
403+ # 创建卡片元素列表
404+ card_fragments = html2 .fragments_fromstring (card_html )
405+ # 插入到element之后
406+ idx = list (parent ).index (element )
407+ for i , frag in enumerate (card_fragments ):
408+ if isinstance (frag , str ):
409+ # 文本节点作为新的元素插入?实际上fragments_fromstring返回的字符串通常是空白
410+ # 忽略纯文本片段
411+ pass
412+ else :
413+ parent .insert (idx + 1 + i , frag )
414+ # 处理剩余部分
415+ if after :
416+ if card_fragments :
417+ last = card_fragments [- 1 ]
418+ if isinstance (last , str ):
419+ pass
420+ else :
421+ if last .tail :
422+ last .tail = after + last .tail
423+ else :
424+ last .tail = after
425+ else :
426+ # 如果没有卡片,将after设为某个元素的tail或父元素的text
427+ # 简单处理:创建一个注释节点?
428+ pass
429+ placeholder_found = True
430+ logging .debug ("Replaced placeholder %%card%% in tail." )
431+ break
432+
433+ if not placeholder_found :
434+ logging .error ('Placeholder "%%card%%" not found in the article list file.' )
435+ return 1
436+
437+ # 将修改后的树写回文件(使用pretty_print_html格式化)
438+ try :
439+ updated_html = etree .tostring (tree , encoding = "unicode" , method = "html" )
440+ # 使用pretty_print_html进行最终格式化(确保缩进统一)
441+ final_html = pretty_print_html (updated_html )
442+ with open (article_list_path , "w" , encoding = "utf-8" ) as f :
443+ f .write (final_html )
444+ success_count = len (articles )
445+ logging .info (f"Successfully updated { success_count } cards." )
446+ except Exception as e :
447+ logging .error (f"Failed to write updated article list: { e } " )
448+ return 1
449+
339450 logging .info (
340451 f"Finished update process. Success: { success_count } , Failed: { fail_count } "
341452 )
0 commit comments