11# converts some HTML tags to BBCode
22# pass --debug to save the output to readme.finalpass
33# may be better off replacing this with html to markdown (and then to bbcode). Lepture recommeds a JS html to markdown converter: sundown
4- from bs4 import BeautifulSoup
4+ from bs4 import BeautifulSoup , NavigableString
55import argparse
66
77def handle_font_tag (tag , replacements ):
@@ -54,44 +54,33 @@ def handle_style_tag(tag, replacements):
5454 inner_content = f"[{ attr } ]{ inner_content } [/{ attr } ]"
5555 return inner_content
5656
57- def recursive_html_to_bbcode (tag , replacements ):
58- """Recursively convert HTML content of a given tag to BBCode."""
59- if tag .name is None :
60- return str (tag )
61- elif tag .name == 'br' :
62- # Directly return a newline for <br> or </br> tags
63- return '\n '
64- elif tag .name in replacements :
65- bb_tag = replacements [tag .name ]
66- inner_content = ''
67- for child in tag .children :
68- inner_content += recursive_html_to_bbcode (child , replacements )
57+ def recursive_html_to_bbcode (element ):
58+ """Recursively convert HTML elements to BBCode."""
59+ bbcode = ''
60+
61+ if isinstance (element , NavigableString ):
62+ bbcode += str (element )
63+ elif element .name == 'details' :
64+ # Handle <details> tag
65+ summary = element .find ('summary' )
66+ spoiler_title = ''
67+ if summary :
68+ # Get the summary content and remove the summary element
69+ spoiler_title = '=' + '' .join ([recursive_html_to_bbcode (child ) for child in summary .contents ])
70+ summary .decompose ()
6971
70- if tag .name in ['a' , 'img' ]:
71- if tag .name == 'a' :
72- href = tag .get ('href' , '' )
73- return f"[URL={ href } ]{ inner_content } [/URL]"
74- elif tag .name == 'img' :
75- src = tag .get ('src' , '' )
76- alt = tag .get ('alt' , '' )
77- if alt :
78- return f"[IMG alt=\" { alt } \" ]{ src } [/IMG]"
79- else :
80- return f"[IMG]{ src } [/IMG]"
81- elif tag .name in ['ul' , 'ol' ]:
82- return f"[{ bb_tag } ]{ inner_content } [/LIST]"
83- elif tag .name == 'font' :
84- # Special handling for <font> tag with attributes
85- return handle_font_tag (tag , replacements ) # Pass replacements here
86- elif tag .name == 'li' :
87- return f"[*]{ inner_content } "
88- else :
89- return f"[{ bb_tag } ]{ inner_content } [/{ bb_tag } ]"
90- elif tag .name in ['span' , 'div' ]:
91- return handle_style_tag (tag , replacements )
72+ # Process remaining content
73+ content = '' .join ([recursive_html_to_bbcode (child ) for child in element .contents ])
74+ bbcode += f'[SPOILER{ spoiler_title } ]{ content } [/SPOILER]'
75+ elif element .name == 'summary' :
76+ # Skip summary tag as it's handled in details
77+ return ''
9278 else :
93- # For tags not in the replacements, concatenate the content
94- return '' .join (recursive_html_to_bbcode (child , replacements ) for child in tag .children )
79+ # Handle other tags or pass through
80+ content = '' .join ([recursive_html_to_bbcode (child ) for child in element .contents ])
81+ bbcode += content
82+
83+ return bbcode
9584
9685def html_to_bbcode (html ):
9786 replacements = {
@@ -116,7 +105,7 @@ def html_to_bbcode(html):
116105 }
117106
118107 soup = BeautifulSoup (html , 'html.parser' )
119- return recursive_html_to_bbcode (soup , replacements )
108+ return recursive_html_to_bbcode (soup )
120109
121110def process_html (input_html , debug = False , output_file = None ):
122111 converted_bbcode = html_to_bbcode (input_html )
0 commit comments