11# -*- coding: utf-8 -*-
22
3- import markdown
3+ import markdown_it
44import platform
55import importlib .metadata
66from rich .logging import RichHandler
99from rich .text import Text
1010import logging
1111import os
12+ from lxml import html as html2
13+ from lxml import etree
1214
1315__version__ : str = "Beta 0.1"
1416
@@ -25,41 +27,140 @@ def main() -> int:
2527 logging .basicConfig (
2628 level = logging .DEBUG , format = "%(message)s" , handlers = [RichHandler ()]
2729 )
30+
2831 logging .info ("Starting main process." )
2932 logging .debug (f"Platform: { platform .platform ()} " )
3033 logging .debug (f"Python version: { platform .python_version ()} " )
31- logging .debug (f"markdown module version: { markdown .__version__ } " )
34+ logging .debug (f"markdown-it module version: { markdown_it .__version__ } " )
3235 logging .debug (f"rich module version: { importlib .metadata .version ('rich' )} " )
3336 logging .debug (f"PTools module version: { __version__ } " )
34- input_paths : set [str ] = set (console .input (
35- "Input your markdown file [bold]path[/bold] " '("|" to split): '
36- ).split ("|" ))
37+
38+ input_paths : set [str ] = set (
39+ console .input (
40+ "Input your markdown file [bold]path[/bold] " '("|" to split): '
41+ ).split ("|" )
42+ )
3743 logging .debug (f"Input paths: { input_paths } " )
3844 # Does the file exist? Is it a file?
3945 vinput_paths : list [str ] = []
4046 for path in input_paths :
4147 if not os .path .exists (path ):
42- logging .warning (f" File not found: { path } " )
48+ logging .warning (f' File not found: " { path } "' )
4349 elif not os .path .isfile (path ):
4450 logging .warning (f"Path is not a file: { path } " )
4551 elif not (path .endswith (".md" ) or path .endswith (".markdown" )):
46- logging .warning (f" Path is not a markdown file: { path } " )
52+ logging .warning (f' Path is not a markdown file: " { path } "' )
4753 else :
4854 vinput_paths .append (path )
4955 logging .debug (f"Valid paths: { vinput_paths } " )
5056 if not vinput_paths :
5157 logging .error ("No valid input files." )
5258 return 1
5359 del input_paths
60+
5461 output_dir : str = console .input (
55- "Input your output directory [bold]path[/bold]: "
62+ "Input your output directory [bold]path[/bold]: " # ignore
5663 )
5764 if not os .path .exists (output_dir ):
58- logging .error (f" Output directory not found: { output_dir } " )
65+ logging .error (f' Output directory not found: " { output_dir } "' )
5966 return 1
6067 elif not os .path .isdir (output_dir ):
61- logging .error (f" Output path is not a directory: { output_dir } " )
68+ logging .error (f' Output path is not a directory: " { output_dir } "' )
6269 return 1
70+ logging .debug (f'Output directory: "{ output_dir } "' )
71+
72+ template : str = console .input (
73+ "Input your HTML template file [bold]path[/bold] "
74+ "(optional, press Enter to skip): "
75+ )
76+ if not os .path .exists (template ):
77+ logging .error (f'Template file not found: "{ template } "' )
78+ template = ""
79+ elif not os .path .isfile (template ):
80+ logging .error (f'Template path is not a file: "{ template } "' )
81+ template = ""
82+ elif not (template .endswith (".html" ) or template .endswith (".htm" )):
83+ logging .error (f'Template path is not a HTML file: "{ template } "' )
84+ template = ""
85+ if template :
86+ logging .debug (f'Template file: "{ template } "' )
87+ with open (template , "r" , encoding = "utf-8" ) as f :
88+ template_content : str = f .read ()
89+
90+ logging .info ("Strarting markdown to HTML conversion." )
91+ md = markdown_it .MarkdownIt ("gfm-like" , {"typographer" : True })
92+ md .enable (["replacements" , "smartquotes" ])
93+ for path in vinput_paths :
94+ with open (path , "r" , encoding = "utf-8" ) as f :
95+ content : str = f .read ()
96+ html : str = md .render (content )
97+ output_path : str = os .path .join (
98+ output_dir , os .path .basename (path ).replace (".md" , ".html" )
99+ )
100+ if template :
101+ title = html2 .fromstring (html ).xpath (".//h1" )
102+ title = title [0 ].text_content () if title else "Untitled"
103+ template_content = template_content .replace ("%%title%%" , title )
104+ html = template_content .replace ("%%content%%" , html )
105+ with open (output_path , "w" , encoding = "utf-8" ) as f :
106+ f .write (html )
107+ # pretty print
108+ pretty_input : str = console .input (
109+ "Is it necessary to format the output HTML file?(Y/N): "
110+ )
111+ if pretty_input .lower () in ["y" , "yes" ]:
112+ with open (output_path , "r" , encoding = "utf-8" ) as f :
113+ raw_html = f .read ()
114+
115+ def pretty_print_html (html_str : str ) -> str :
116+ """智能格式化HTML,保留完整结构和DOCTYPE。"""
117+ from io import StringIO
118+ try :
119+ # 使用 etree.HTMLParser 解析,它能保留 DOCTYPE
120+ parser = etree .HTMLParser (remove_blank_text = False ) # 保留空白以便格式化
121+ tree = etree .parse (StringIO (html_str ), parser )
122+ doctype = tree .docinfo .doctype if tree .docinfo .doctype else ''
123+ root = tree .getroot ()
124+ # 格式化根元素
125+ formatted_root = etree .tostring (
126+ root ,
127+ encoding = 'unicode' ,
128+ pretty_print = True ,
129+ method = 'html'
130+ )
131+ # 如果存在 DOCTYPE,则拼接到前面
132+ if doctype :
133+ return doctype + '\n ' + formatted_root
134+ else :
135+ return formatted_root
136+ except Exception as e :
137+ # 如果解析为完整文档失败(例如纯片段),回退到片段处理
138+ logging .warning (f"完整文档解析失败,尝试片段模式: { e } " )
139+ try :
140+ fragments = html2 .fragments_fromstring (html_str )
141+ pretty_parts = []
142+ for frag in fragments :
143+ if isinstance (frag , str ):
144+ pretty_parts .append (frag )
145+ else :
146+ pretty_parts .append (
147+ etree .tostring (
148+ frag ,
149+ encoding = 'unicode' ,
150+ pretty_print = True ,
151+ method = 'html'
152+ )
153+ )
154+ return '' .join (pretty_parts )
155+ except Exception as e2 :
156+ logging .error (f"片段解析也失败,返回原始内容: { e2 } " )
157+ return html_str
158+
159+ pretty_html = pretty_print_html (raw_html )
160+ with open (output_path , "w" , encoding = "utf-8" ) as f :
161+ f .write (pretty_html )
162+ console .print (pretty_html )
163+ logging .info (f'Converted "{ path } " to "{ output_path } ". OK!' )
63164 logging .info ("Finished main process." )
64165 return 0
65166
0 commit comments