Skip to content

Commit 5a1f014

Browse files
committed
fix: replace unicode ellipsis, fix image paths in pageindex content, remove empty dirs on init
1 parent 4938cd7 commit 5a1f014

2 files changed

Lines changed: 10 additions & 8 deletions

File tree

openkb/cli.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def _add_single_file(file_path: Path, kb_dir: Path) -> None:
158158

159159
# 3/4. Index and compile
160160
if result.is_long_doc:
161-
click.echo(f" Long document detected — indexing with PageIndex")
161+
click.echo(f" Long document detected — indexing with PageIndex...")
162162
try:
163163
from openkb.indexer import index_long_document
164164
index_result = index_long_document(result.raw_path, kb_dir)
@@ -168,7 +168,7 @@ def _add_single_file(file_path: Path, kb_dir: Path) -> None:
168168
return
169169

170170
summary_path = kb_dir / "wiki" / "summaries" / f"{doc_name}.md"
171-
click.echo(f" Compiling long doc (doc_id={index_result.doc_id})")
171+
click.echo(f" Compiling long doc (doc_id={index_result.doc_id})...")
172172
for attempt in range(2):
173173
try:
174174
asyncio.run(
@@ -185,7 +185,7 @@ def _add_single_file(file_path: Path, kb_dir: Path) -> None:
185185
logger.debug("Compilation traceback:", exc_info=True)
186186
return
187187
else:
188-
click.echo(f" Compiling short doc")
188+
click.echo(f" Compiling short doc...")
189189
for attempt in range(2):
190190
try:
191191
asyncio.run(compile_short_doc(doc_name, result.source_path, kb_dir, model))
@@ -277,8 +277,6 @@ def init():
277277
Path("wiki/sources/images").mkdir(parents=True, exist_ok=True)
278278
Path("wiki/summaries").mkdir(parents=True, exist_ok=True)
279279
Path("wiki/concepts").mkdir(parents=True, exist_ok=True)
280-
Path("wiki/explorations").mkdir(parents=True, exist_ok=True)
281-
Path("wiki/reports").mkdir(parents=True, exist_ok=True)
282280

283281
# Write wiki files
284282
Path("wiki/AGENTS.md").write_text(AGENTS_MD, encoding="utf-8")
@@ -430,12 +428,12 @@ def lint(ctx, fix):
430428
model: str = config.get("model", DEFAULT_CONFIG["model"])
431429

432430
# Structural lint
433-
click.echo("Running structural lint")
431+
click.echo("Running structural lint...")
434432
structural_report = run_structural_lint(kb_dir)
435433
click.echo(structural_report)
436434

437435
# Knowledge lint (semantic)
438-
click.echo("Running knowledge lint")
436+
click.echo("Running knowledge lint...")
439437
try:
440438
knowledge_report = asyncio.run(run_knowledge_lint(kb_dir, model))
441439
except Exception as exc:

openkb/indexer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,11 @@ def index_long_document(pdf_path: Path, kb_dir: Path) -> IndexResult:
106106
dest = dest_images_dir / filename
107107
if not dest.exists():
108108
shutil.copy2(src_path, dest)
109-
img["path"] = f"images/{pdf_path.stem}/{filename}"
109+
new_path = f"images/{pdf_path.stem}/{filename}"
110+
# Also fix image references in page content
111+
if "content" in page:
112+
page["content"] = page["content"].replace(str(src_path), new_path)
113+
img["path"] = new_path
110114

111115
(sources_dir / f"{pdf_path.stem}.json").write_text(
112116
json_mod.dumps(all_pages, ensure_ascii=False, indent=2), encoding="utf-8",

0 commit comments

Comments
 (0)