From d3bef4f096e707c759a99d3129c12ed6d1521d2d Mon Sep 17 00:00:00 2001
From: "hanzhi.421" <hanzhi.421@bytedance.com>
Date: Wed, 10 Sep 2025 12:11:53 +0800
Subject: [PATCH 1/7] fix: knowledgebase add method & list_chunks

---
 veadk/database/database_adapter.py       | 21 +++++---
 veadk/database/viking/viking_database.py |  5 +-
 veadk/knowledgebase/knowledgebase.py     | 65 ++++++++++++++++++++++--
 3 files changed, 79 insertions(+), 12 deletions(-)

diff --git a/veadk/database/database_adapter.py b/veadk/database/database_adapter.py
index b789cf03..b12fbb52 100644
--- a/veadk/database/database_adapter.py
+++ b/veadk/database/database_adapter.py
@@ -28,7 +28,7 @@ def __init__(self, client):
 
         self.client: RedisDatabase = client
 
-    def add(self, data: list[str], index: str):
+    def add(self, data: list[str], index: str, **kwargs):
         logger.debug(f"Adding documents to Redis database: index={index}")
 
         try:
@@ -78,7 +78,7 @@ def delete_doc(self, index: str, id: str) -> bool:
             )
             return False
 
-    def list_docs(self, index: str, offset: int = 0, limit: int = 100) -> list[dict]:
+    def list_chunks(self, index: str, offset: int = 0, limit: int = 100) -> list[dict]:
         logger.debug(f"Listing documents from Redis database: index={index}")
         try:
             # Get all documents from Redis
@@ -111,7 +111,7 @@ def create_table(self, table_name: str):
         """
         self.client.add(sql)
 
-    def add(self, data: list[str], index: str):
+    def add(self, data: list[str], index: str, **kwargs):
         logger.debug(
             f"Adding documents to SQL database: table_name={index} data_len={len(data)}"
         )
@@ -203,7 +203,7 @@ def _validate_index(self, index: str):
                 "The index name does not conform to the naming rules of OpenSearch"
             )
 
-    def add(self, data: list[str], index: str):
+    def add(self, data: list[str], index: str, **kwargs):
         self._validate_index(index)
 
         logger.debug(
@@ -247,7 +247,7 @@ def delete_doc(self, index: str, id: str) -> bool:
             )
             return False
 
-    def list_docs(self, index: str, offset: int = 0, limit: int = 1000) -> list[dict]:
+    def list_chunks(self, index: str, offset: int = 0, limit: int = 1000) -> list[dict]:
         self._validate_index(index)
         logger.debug(f"Listing documents from vector database: index={index}")
         return self.client.list_docs(collection_name=index, offset=offset, limit=limit)
@@ -322,6 +322,13 @@ def delete_doc(self, index: str, id: str) -> bool:
         logger.debug(f"Deleting documents from vector database: index={index} id={id}")
         return self.client.delete_by_id(collection_name=index, id=id)
 
+    def list_chunks(self, index: str, offset: int, limit: int) -> list[dict]:
+        self._validate_index(index)
+        logger.debug(f"Listing documents from vector database: index={index}")
+        return self.client.list_chunks(
+            collection_name=index, offset=offset, limit=limit
+        )
+
     def list_docs(self, index: str, offset: int, limit: int) -> list[dict]:
         self._validate_index(index)
         logger.debug(f"Listing documents from vector database: index={index}")
@@ -371,7 +378,7 @@ def delete(self, index: str) -> bool:
     def delete_docs(self, index: str, ids: list[int]):
         raise NotImplementedError("VikingMemoryDatabase does not support delete_docs")
 
-    def list_docs(self, index: str):
+    def list_chunks(self, index: str):
         raise NotImplementedError("VikingMemoryDatabase does not support list_docs")
 
 
@@ -393,7 +400,7 @@ def delete(self, index: str) -> bool:
     def delete_doc(self, index: str, id: str) -> bool:
         return self.client.delete_doc(id)
 
-    def list_docs(self, index: str, offset: int = 0, limit: int = 100) -> list[dict]:
+    def list_chunks(self, index: str, offset: int = 0, limit: int = 100) -> list[dict]:
         return self.client.list_docs(offset=offset, limit=limit)
 
 
diff --git a/veadk/database/viking/viking_database.py b/veadk/database/viking/viking_database.py
index 18474768..e94210d0 100644
--- a/veadk/database/viking/viking_database.py
+++ b/veadk/database/viking/viking_database.py
@@ -403,7 +403,7 @@ def collection_exists(self, collection_name: str) -> bool:
         else:
             return False
 
-    def list_docs(
+    def list_chunks(
         self, collection_name: str, offset: int = 0, limit: int = -1
     ) -> list[dict]:
         request_params = {
@@ -431,6 +431,9 @@ def list_docs(
             logger.error(f"Error in list_docs: {result['message']}")
             raise ValueError(f"Error in list_docs: {result['message']}")
 
+        if not result["data"]["point_list"]:
+            return []
+
         data = [
             {
                 "id": res["point_id"],
diff --git a/veadk/knowledgebase/knowledgebase.py b/veadk/knowledgebase/knowledgebase.py
index beab826f..9f2b12dd 100644
--- a/veadk/knowledgebase/knowledgebase.py
+++ b/veadk/knowledgebase/knowledgebase.py
@@ -11,13 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import io
+import os.path
 from typing import Any, BinaryIO, Literal, TextIO
 
 from pydantic import BaseModel
 
 from veadk.database.database_adapter import get_knowledgebase_database_adapter
 from veadk.database.database_factory import DatabaseFactory
+from veadk.utils.misc import formatted_timestamp
 from veadk.utils.logger import get_logger
 
 logger = get_logger(__name__)
@@ -66,10 +68,65 @@ def add(
             )
 
         index = build_knowledgebase_index(app_name)
-
         logger.info(f"Adding documents to knowledgebase: index={index}")
 
-        self._adapter.add(data=data, index=index)
+        if self.backend == "viking":
+            # Case 1: Handling file paths or lists of file paths (str)
+            if isinstance(data, str) and os.path.isfile(data):
+                # 单个文件路径，直接调用client.add
+                # 获取文件名（包括后缀名）
+                if "file_name" not in kwargs or not kwargs["file_name"]:
+                    kwargs["file_name"] = os.path.basename(data)
+                return self._adapter.add(data=data, index=index, **kwargs)
+            # Case 2: Handling when list[str] is a full path  (list[str])
+            if isinstance(data, list):
+                if all(isinstance(item, str) for item in data):
+                    all_paths = all(os.path.isfile(item) for item in data)
+                    all_not_paths = all(not os.path.isfile(item) for item in data)
+                    if all_paths:
+                        if "file_name" not in kwargs or not kwargs["file_name"]:
+                            kwargs["file_name"] = [
+                                os.path.basename(item) for item in data
+                            ]
+                        return self._adapter.add(data=data, index=index, **kwargs)
+                    elif (
+                        not all_not_paths
+                    ):  # Prevent the occurrence of non-existent paths
+                        # There is a mixture of paths and non-paths
+                        raise ValueError(
+                            "Mixed file paths and content strings in list are not allowed"
+                        )
+            # Case 3: Handling strings or string arrays (content)  (str or list[str])
+            if isinstance(data, str) or (
+                isinstance(data, list) and all(isinstance(item, str) for item in data)
+            ):
+                if "file_name" not in kwargs or not kwargs["file_name"]:
+                    if isinstance(data, str):
+                        kwargs["file_name"] = f"{formatted_timestamp()}.txt"
+                    else:  # list[str] without file_names
+                        prefix_file_name = formatted_timestamp()
+                        kwargs["file_name"] = [
+                            f"{prefix_file_name}_{i}.txt" for i in range(len(data))
+                        ]
+                return self._adapter.add(data=data, index=index, **kwargs)
+
+            # Case 4: Handling binary data (bytes)
+            if isinstance(data, bytes):
+                # user must give file_name
+                if "file_name" not in kwargs:
+                    raise ValueError("file_name must be provided for binary data")
+                return self._adapter.add(data=data, index=index, **kwargs)
+
+            # Case 5: Handling file objects TextIO or BinaryIO
+            if isinstance(data, (io.TextIOWrapper, io.BufferedReader)):
+                if not kwargs.get("file_name") and hasattr(data, "name"):
+                    kwargs["file_name"] = os.path.basename(data.name)
+                return self._adapter.add(data=data, index=index, **kwargs)
+            # Case6: Unsupported data type
+            raise TypeError(f"Unsupported data type: {type(data)}")
+
+        # not viking
+        return self._adapter.add(data=data, index=index, **kwargs)
 
     def search(self, query: str, app_name: str, top_k: int | None = None) -> list[str]:
         top_k = self.top_k if top_k is None else top_k
@@ -93,4 +150,4 @@ def delete_doc(self, app_name: str, id: str) -> bool:
 
     def list_docs(self, app_name: str, offset: int = 0, limit: int = 100) -> list[dict]:
         index = build_knowledgebase_index(app_name)
-        return self._adapter.list_docs(index=index, offset=offset, limit=limit)
+        return self._adapter.list_chunks(index=index, offset=offset, limit=limit)

From 76e99ce6549e43fee3332016e138c171157940f0 Mon Sep 17 00:00:00 2001
From: "hanzhi.421" <hanzhi.421@bytedance.com>
Date: Wed, 10 Sep 2025 12:43:26 +0800
Subject: [PATCH 2/7] fix: knowledgebase add method

---
 veadk/database/viking/viking_database.py | 195 +++++++++++++++++++----
 veadk/knowledgebase/knowledgebase.py     |  20 ++-
 2 files changed, 180 insertions(+), 35 deletions(-)

diff --git a/veadk/database/viking/viking_database.py b/veadk/database/viking/viking_database.py
index e94210d0..ea88b84c 100644
--- a/veadk/database/viking/viking_database.py
+++ b/veadk/database/viking/viking_database.py
@@ -136,11 +136,25 @@ def _upload_to_tos(
         self,
         data: str | list[str] | TextIO | BinaryIO | bytes,
         **kwargs: Any,
-    ):
-        file_ext = kwargs.get(
-            "file_ext", ".pdf"
-        )  # when bytes data, file_ext is required
+    ) -> tuple[int, str]:
+        """
+        Upload data to TOS (Tinder Object Storage).
 
+        Args:
+            data: The data to be uploaded. Can be one of the following types:
+                - str: File path or string data
+                - list[str]: List of strings
+                - TextIO: File object (text)
+                - BinaryIO: File object (binary)
+                - bytes: Binary data
+            **kwargs: Additional keyword arguments.
+                - file_name (str): The file name (including suffix).
+
+        Returns:
+            tuple: A tuple containing the status code and TOS URL.
+                - status_code (int): HTTP status code
+                - tos_url (str): The URL of the uploaded file in TOS
+        """
         ak = self.config.volcengine_ak
         sk = self.config.volcengine_sk
 
@@ -151,21 +165,31 @@ def _upload_to_tos(
 
         client = tos.TosClientV2(ak, sk, tos_endpoint, tos_region, max_connections=1024)
 
+        # Extract file_name from kwargs - this is now required and includes the extension
+        file_names = kwargs.get("file_name")
+
         if isinstance(data, str) and os.path.isfile(data):  # Process file path
-            file_ext = os.path.splitext(data)[1]
-            new_key = f"{tos_key}/{str(uuid.uuid4())}{file_ext}"
+            # Use provided file_name which includes the extension
+            new_key = f"{tos_key}/{file_names}"
             with open(data, "rb") as f:
                 upload_data = f.read()
 
+        elif (
+            isinstance(data, list)
+            and all(isinstance(item, str) for item in data)
+            and all(os.path.isfile(item) for item in data)
+        ):
+            # Process list of file paths - this should be handled at a higher level
+            raise ValueError(
+                "Uploading multiple files through a list of file paths is not supported in _upload_to_tos directly. Please call this function for each file individually."
+            )
+
         elif isinstance(
             data,
             (io.TextIOWrapper, io.BufferedReader),  # file type: TextIO | BinaryIO
         ):  # Process file stream
-            # Try to get the file extension from the file name, and use the default value if there is none
-            file_ext = ".unknown"
-            if hasattr(data, "name"):
-                _, file_ext = os.path.splitext(data.name)
-            new_key = f"{tos_key}/{str(uuid.uuid4())}{file_ext}"
+            # Use provided file_name which includes the extension
+            new_key = f"{tos_key}/{file_names}"
             if isinstance(data, TextIO):
                 # Encode the text stream content into bytes
                 upload_data = data.read().encode("utf-8")
@@ -174,16 +198,19 @@ def _upload_to_tos(
                 upload_data = data.read()
 
         elif isinstance(data, str):  # Process ordinary strings
-            new_key = f"{tos_key}/{str(uuid.uuid4())}.txt"
+            # Use provided file_name which includes the extension
+            new_key = f"{tos_key}/{file_names}"
             upload_data = data.encode("utf-8")  # Encode as byte type
 
         elif isinstance(data, list):  # Process list of strings
-            new_key = f"{tos_key}/{str(uuid.uuid4())}.txt"
+            # Use provided file_name which includes the extension
+            new_key = f"{tos_key}/{file_names}"
             # Join the strings in the list with newlines and encode as byte type
             upload_data = "\n".join(data).encode("utf-8")
 
         elif isinstance(data, bytes):  # Process bytes data
-            new_key = f"{tos_key}/{str(uuid.uuid4())}{file_ext}"
+            # Use provided file_name which includes the extension
+            new_key = f"{tos_key}/{file_names}"
             upload_data = data
 
         else:
@@ -231,28 +258,136 @@ def add(
         **kwargs,
     ):
         """
+        Add documents to the Viking database.
         Args:
-            data: str, file path or file stream:  Both file or file.read() are acceptable.
-            **kwargs: collection_name(required)
+            data: The data to be added. Can be one of the following types:
+                - str: File path or string data
+                - list[str]: List of file paths or list of strings
+                - TextIO: File object (text)
+                - BinaryIO: File object (binary)
+                - bytes: Binary data
+            collection_name: The name of the collection to add documents to.
+            **kwargs: Additional keyword arguments.
+                - file_name (str | list[str]): The file name or a list of file names (including suffix).
+                - doc_id (str): The document ID. If not provided, a UUID will be generated.
         Returns:
-            {
+            dict or list: A dictionary containing the TOS URL and document ID, or a list of such dictionaries for multiple file uploads.
+            Format: {
                 "tos_url": "tos://<bucket>/<key>",
                 "doc_id": "<doc_id>",
             }
         """
-
-        status, tos_url = self._upload_to_tos(data=data, **kwargs)
-        if status != 200:
-            raise ValueError(f"Error in upload_to_tos: {status}")
-        doc_id = self._add_doc(
-            collection_name=collection_name,
-            tos_url=tos_url,
-            doc_id=str(uuid.uuid4()),
-        )
-        return {
-            "tos_url": f"tos://{tos_url}",
-            "doc_id": doc_id,
-        }
+        # Handle list of file paths (multiple file upload)
+        if (
+            isinstance(data, list)
+            and all(isinstance(item, str) for item in data)
+            and all(os.path.isfile(item) for item in data)
+        ):
+            # Handle multiple file upload
+            file_names = kwargs.get("file_name")
+            if (
+                not file_names
+                or not isinstance(file_names, list)
+                or len(file_names) != len(data)
+            ):
+                raise ValueError(
+                    "For multiple file upload, file_name must be provided as a list with the same length as data"
+                )
+
+            results = []
+            for i, file_path in enumerate(data):
+                # Create kwargs for this specific file
+                single_kwargs = kwargs.copy()
+                single_kwargs["file_name"] = file_names[i]
+
+                # Generate or use provided doc_id for this file
+                doc_id = single_kwargs.get("doc_id")
+                if not doc_id:
+                    doc_id = str(uuid.uuid4())
+                    single_kwargs["doc_id"] = doc_id
+
+                status, tos_url = self._upload_to_tos(data=file_path, **single_kwargs)
+                if status != 200:
+                    raise ValueError(
+                        f"Error in upload_to_tos for file {file_path}: {status}"
+                    )
+
+                doc_id = self._add_doc(
+                    collection_name=collection_name,
+                    tos_url=tos_url,
+                    doc_id=doc_id,
+                )
+
+                results.append(
+                    {
+                        "tos_url": f"tos://{tos_url}",
+                        "doc_id": doc_id,
+                    }
+                )
+
+            return results
+
+        # Handle list of strings (multiple string upload)
+        elif isinstance(data, list) and all(isinstance(item, str) for item in data):
+            # Handle multiple string upload
+            file_names = kwargs.get("file_name")
+            if (
+                not file_names
+                or not isinstance(file_names, list)
+                or len(file_names) != len(data)
+            ):
+                raise ValueError(
+                    "For multiple string upload, file_name must be provided as a list with the same length as data"
+                )
+
+            results = []
+            for i, content in enumerate(data):
+                # Create kwargs for this specific string
+                single_kwargs = kwargs.copy()
+                single_kwargs["file_name"] = file_names[i]
+
+                # Generate or use provided doc_id for this string
+                doc_id = single_kwargs.get("doc_id")
+                if not doc_id:
+                    doc_id = str(uuid.uuid4())
+                    single_kwargs["doc_id"] = doc_id
+
+                status, tos_url = self._upload_to_tos(data=content, **single_kwargs)
+                if status != 200:
+                    raise ValueError(f"Error in upload_to_tos for string {i}: {status}")
+
+                doc_id = self._add_doc(
+                    collection_name=collection_name,
+                    tos_url=tos_url,
+                    doc_id=doc_id,
+                )
+
+                results.append(
+                    {
+                        "tos_url": f"tos://{tos_url}",
+                        "doc_id": doc_id,
+                    }
+                )
+
+            return results
+
+        # Handle single file upload or other data types
+        else:
+            # Handle doc_id from kwargs or generate a new one
+            doc_id = kwargs.get("doc_id", str(uuid.uuid4()))
+
+            status, tos_url = self._upload_to_tos(data=data, **kwargs)
+            if status != 200:
+                raise ValueError(f"Error in upload_to_tos: {status}")
+            doc_id = self._add_doc(
+                collection_name=collection_name,
+                tos_url=tos_url,
+                doc_id=doc_id,
+            )
+            return {
+                "tos_url": f"tos://{tos_url}",
+                "doc_id": doc_id,
+            }
 
     def delete(self, **kwargs: Any):
         name = kwargs.get("name")
diff --git a/veadk/knowledgebase/knowledgebase.py b/veadk/knowledgebase/knowledgebase.py
index 9f2b12dd..b2ed7b3c 100644
--- a/veadk/knowledgebase/knowledgebase.py
+++ b/veadk/knowledgebase/knowledgebase.py
@@ -56,9 +56,16 @@ def add(
     ):
         """
         Add documents to the vector database.
-        You can only upload files or file characters when the adapter type used is vikingdb.
-        In addition, if you upload data of the bytes type,
-            for example, if you read the file stream of a pdf, then you need to pass an additional parameter file_ext = '.pdf'.
+        Args:
+            data (str | list[str] | TextIO | BinaryIO | bytes): The data to be added.
+                - str: A single file path. (viking only)
+                - list[str]: A list of file paths.
+                - TextIO: A file object (TextIO). (viking only) file descriptor
+                - BinaryIO: A file object (BinaryIO). (viking only) file descriptor
+                - bytes: Binary data. (viking only) binary data (f.read())
+            app_name: index name
+            **kwargs: Additional keyword arguments.
+                - file_name (str | list[str]): The file name or a list of file names (including suffix). (viking only)
         """
         if self.backend != "viking" and not (
             isinstance(data, str) or isinstance(data, list)
@@ -73,8 +80,7 @@ def add(
         if self.backend == "viking":
             # Case 1: Handling file paths or lists of file paths (str)
             if isinstance(data, str) and os.path.isfile(data):
-                # 单个文件路径，直接调用client.add
-                # 获取文件名（包括后缀名）
+                # Get the file name (including the suffix)
                 if "file_name" not in kwargs or not kwargs["file_name"]:
                     kwargs["file_name"] = os.path.basename(data)
                 return self._adapter.add(data=data, index=index, **kwargs)
@@ -125,6 +131,10 @@ def add(
             # Case6: Unsupported data type
             raise TypeError(f"Unsupported data type: {type(data)}")
 
+        if isinstance(data, list):
+            raise TypeError(
+                f"Unsupported data type: {type(data)}, Only viking support file_path and file bytes"
+            )
         # not viking
         return self._adapter.add(data=data, index=index, **kwargs)
 

From e312b5460fb3fd59475d55a428d8f8ce62ad8a0b Mon Sep 17 00:00:00 2001
From: "hanzhi.421" <hanzhi.421@bytedance.com>
Date: Wed, 10 Sep 2025 13:13:42 +0800
Subject: [PATCH 3/7] feat: knowledge exists and list_docs

---
 veadk/database/database_adapter.py       | 111 +++++++++++++++++++++++
 veadk/database/viking/viking_database.py |  37 +++++++-
 veadk/knowledgebase/knowledgebase.py     |  17 +++-
 3 files changed, 162 insertions(+), 3 deletions(-)

diff --git a/veadk/database/database_adapter.py b/veadk/database/database_adapter.py
index b12fbb52..55081b0d 100644
--- a/veadk/database/database_adapter.py
+++ b/veadk/database/database_adapter.py
@@ -28,6 +28,25 @@ def __init__(self, client):
 
         self.client: RedisDatabase = client
 
+    def index_exists(self, index: str) -> bool:
+        """
+        Check if the index (key) exists in Redis.
+
+        Args:
+            index: The Redis key to check
+
+        Returns:
+            bool: True if the key exists, False otherwise
+        """
+        try:
+            # Use Redis EXISTS command to check if key exists
+            return bool(self.client._client.exists(index))
+        except Exception as e:
+            logger.error(
+                f"Failed to check if key exists in Redis: index={index} error={e}"
+            )
+            return False
+
     def add(self, data: list[str], index: str, **kwargs):
         logger.debug(f"Adding documents to Redis database: index={index}")
 
@@ -99,6 +118,24 @@ def __init__(self, client):
 
         self.client: MysqlDatabase = client
 
+    def index_exists(self, index: str) -> bool:
+        """
+        Check if the table (index) exists in MySQL database.
+
+        Args:
+            index: The table name to check
+
+        Returns:
+            bool: True if the table exists, False otherwise
+        """
+        try:
+            return self.client.table_exists(index)
+        except Exception as e:
+            logger.error(
+                f"Failed to check if table exists in MySQL: index={index} error={e}"
+            )
+            return False
+
     def create_table(self, table_name: str):
         logger.debug(f"Creating table for SQL database: table_name={table_name}")
 
@@ -188,6 +225,25 @@ def __init__(self, client):
 
         self.client: OpenSearchVectorDatabase = client
 
+    def index_exists(self, index: str) -> bool:
+        """
+        Check if the collection (index) exists in OpenSearch.
+
+        Args:
+            index: The collection name to check
+
+        Returns:
+            bool: True if the collection exists, False otherwise
+        """
+        try:
+            self._validate_index(index)
+            return self.client.collection_exists(index)
+        except Exception as e:
+            logger.error(
+                f"Failed to check if collection exists in OpenSearch: index={index} error={e}"
+            )
+            return False
+
     def _validate_index(self, index: str):
         """
         Verify whether the string conforms to the naming rules of index_name in OpenSearch.
@@ -259,6 +315,25 @@ def __init__(self, client):
 
         self.client: VikingDatabase = client
 
+    def index_exists(self, index: str) -> bool:
+        """
+        Check if the collection (index) exists in VikingDB.
+
+        Args:
+            index: The collection name to check
+
+        Returns:
+            bool: True if the collection exists, False otherwise
+        """
+        try:
+            self._validate_index(index)
+            return self.client.collection_exists(index)
+        except Exception as e:
+            logger.error(
+                f"Failed to check if collection exists in VikingDB: index={index} error={e}"
+            )
+            return False
+
     def _validate_index(self, index: str):
         """
         Only English letters, numbers, and underscores (_) are allowed.
@@ -341,6 +416,25 @@ def __init__(self, client):
 
         self.client: VikingMemoryDatabase = client
 
+    def index_exists(self, index: str) -> bool:
+        """
+        Check if the collection (index) exists in VikingMemoryDB.
+
+        Note:
+            VikingMemoryDatabase does not support checking if a collection exists.
+            This method always returns False.
+
+        Args:
+            index: The collection name to check
+
+        Returns:
+            bool: Always returns False as VikingMemoryDatabase does not support this functionality
+        """
+        logger.warning(
+            "VikingMemoryDatabase does not support checking if a collection exists"
+        )
+        raise NotImplementedError("VikingMemoryDatabase does not support index_exists")
+
     def _validate_index(self, index: str):
         if not (
             isinstance(index, str)
@@ -388,6 +482,23 @@ def __init__(self, client):
 
         self.client: LocalDataBase = client
 
+    def index_exists(self, index: str) -> bool:
+        """
+        Check if the index exists in LocalDataBase.
+
+        Note:
+            LocalDataBase does not support checking if an index exists.
+            This method always returns False.
+
+        Args:
+            index: The index name to check (not used in LocalDataBase)
+
+        Returns:
+            bool: Always returns False as LocalDataBase does not support this functionality
+        """
+        logger.warning("LocalDataBase does not support checking if an index exists")
+        return True
+
     def add(self, data: list[str], **kwargs):
         self.client.add(data)
 
diff --git a/veadk/database/viking/viking_database.py b/veadk/database/viking/viking_database.py
index ea88b84c..50510923 100644
--- a/veadk/database/viking/viking_database.py
+++ b/veadk/database/viking/viking_database.py
@@ -41,7 +41,8 @@
 doc_del_path = "/api/knowledge/collection/delete"
 doc_add_path = "/api/knowledge/doc/add"
 doc_info_path = "/api/knowledge/doc/info"
-list_docs_path = "/api/knowledge/point/list"
+list_point_path = "/api/knowledge/point/list"
+list_docs_path = "/api/knowledge/doc/list"
 delete_docs_path = "/api/knowledge/point/delete"
 
 
@@ -550,7 +551,7 @@ def list_chunks(
 
         list_doc_req = prepare_request(
             method="POST",
-            path=list_docs_path,
+            path=list_point_path,
             config=self.config,
             data=request_params,
         )
@@ -579,6 +580,38 @@ def list_chunks(
         ]
         return data
 
+    def list_docs(
+        self, collection_name: str, offset: int = 0, limit: int = -1
+    ) -> list[dict]:
+        request_params = {
+            "collection_name": collection_name,
+            "project": self.config.project,
+            "offset": offset,
+            "limit": limit,
+        }
+
+        list_doc_req = prepare_request(
+            method="POST",
+            path=list_docs_path,
+            config=self.config,
+            data=request_params,
+        )
+        resp = requests.request(
+            method=list_doc_req.method,
+            url="https://{}{}".format(g_knowledge_base_domain, list_doc_req.path),
+            headers=list_doc_req.headers,
+            data=list_doc_req.body,
+        )
+
+        result = resp.json()
+        if result["code"] != 0:
+            logger.error(f"Error in list_docs: {result['message']}")
+            raise ValueError(f"Error in list_docs: {result['message']}")
+
+        if not result["data"]["doc_list"]:
+            return []
+        return result["data"]["doc_list"]
+
     def delete_by_id(self, collection_name: str, id: str) -> bool:
         request_params = {
             "collection_name": collection_name,
diff --git a/veadk/knowledgebase/knowledgebase.py b/veadk/knowledgebase/knowledgebase.py
index b2ed7b3c..fce1bb75 100644
--- a/veadk/knowledgebase/knowledgebase.py
+++ b/veadk/knowledgebase/knowledgebase.py
@@ -158,6 +158,21 @@ def delete_doc(self, app_name: str, id: str) -> bool:
         index = build_knowledgebase_index(app_name)
         return self._adapter.delete_doc(index=index, id=id)
 
-    def list_docs(self, app_name: str, offset: int = 0, limit: int = 100) -> list[dict]:
+    def list_chunks(
+        self, app_name: str, offset: int = 0, limit: int = 100
+    ) -> list[dict]:
         index = build_knowledgebase_index(app_name)
         return self._adapter.list_chunks(index=index, offset=offset, limit=limit)
+
+    def list_docs(self, app_name: str, offset: int = 0, limit: int = 100) -> list[dict]:
+        if self.backend == "viking":
+            index = build_knowledgebase_index(app_name)
+            return self._adapter.list_docs(index=index, offset=offset, limit=limit)
+        else:
+            raise NotImplementedError(
+                f"list_docs not supported for {self.backend}, only viking support list_docs"
+            )
+
+    def exists(self, app_name: str) -> bool:
+        index = build_knowledgebase_index(app_name)
+        return self._adapter.index_exists(index=index)

From 5a0e2056da9221efd226a2b2e0b3018a2fd1e22a Mon Sep 17 00:00:00 2001
From: "hanzhi.421" <hanzhi.421@bytedance.com>
Date: Wed, 10 Sep 2025 13:18:18 +0800
Subject: [PATCH 4/7] fix: fix list_docs empty

---
 veadk/database/viking/viking_database.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/veadk/database/viking/viking_database.py b/veadk/database/viking/viking_database.py
index 50510923..fa1ac173 100644
--- a/veadk/database/viking/viking_database.py
+++ b/veadk/database/viking/viking_database.py
@@ -567,7 +567,7 @@ def list_chunks(
             logger.error(f"Error in list_docs: {result['message']}")
             raise ValueError(f"Error in list_docs: {result['message']}")
 
-        if not result["data"]["point_list"]:
+        if not result["data"].get("point_list", []):
             return []
 
         data = [
@@ -608,7 +608,7 @@ def list_docs(
             logger.error(f"Error in list_docs: {result['message']}")
             raise ValueError(f"Error in list_docs: {result['message']}")
 
-        if not result["data"]["doc_list"]:
+        if not result["data"].get("doc_list", []):
             return []
         return result["data"]["doc_list"]
 

From 237541f1e27bfc6d52bb6d7b260bfd07ea4f7318 Mon Sep 17 00:00:00 2001
From: "hanzhi.421" <hanzhi.421@bytedance.com>
Date: Wed, 10 Sep 2025 13:37:12 +0800
Subject: [PATCH 5/7] fix: fix bug

---
 veadk/knowledgebase/knowledgebase.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/veadk/knowledgebase/knowledgebase.py b/veadk/knowledgebase/knowledgebase.py
index fce1bb75..dacaf391 100644
--- a/veadk/knowledgebase/knowledgebase.py
+++ b/veadk/knowledgebase/knowledgebase.py
@@ -131,9 +131,9 @@ def add(
             # Case6: Unsupported data type
             raise TypeError(f"Unsupported data type: {type(data)}")
 
-        if isinstance(data, list):
+        if not isinstance(data, list):
             raise TypeError(
-                f"Unsupported data type: {type(data)}, Only viking support file_path and file bytes"
+                f"Unsupported data type: {type(data)}. Only viking support file_path and file bytes"
             )
         # not viking
         return self._adapter.add(data=data, index=index, **kwargs)

From 396e9c3c54cbdb9678d84f8b2746af3b679778fa Mon Sep 17 00:00:00 2001
From: "hanzhi.421" <hanzhi.421@bytedance.com>
Date: Wed, 10 Sep 2025 15:02:38 +0800
Subject: [PATCH 6/7] fix: viking delete as docs level

---
 veadk/database/viking/viking_database.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/veadk/database/viking/viking_database.py b/veadk/database/viking/viking_database.py
index fa1ac173..3127b0e2 100644
--- a/veadk/database/viking/viking_database.py
+++ b/veadk/database/viking/viking_database.py
@@ -43,7 +43,7 @@
 doc_info_path = "/api/knowledge/doc/info"
 list_point_path = "/api/knowledge/point/list"
 list_docs_path = "/api/knowledge/doc/list"
-delete_docs_path = "/api/knowledge/point/delete"
+delete_docs_path = "/api/knowledge/doc/delete"
 
 
 class VolcengineTOSConfig(BaseModel):
@@ -616,7 +616,7 @@ def delete_by_id(self, collection_name: str, id: str) -> bool:
         request_params = {
             "collection_name": collection_name,
             "project": self.config.project,
-            "point_id": id,
+            "doc_id": id,
         }
 
         delete_by_id_req = prepare_request(

From 676f99c4cc5d32bb6b93e3bb90d3c7d84474d784 Mon Sep 17 00:00:00 2001
From: "hanzhi.421" <hanzhi.421@bytedance.com>
Date: Wed, 10 Sep 2025 15:07:03 +0800
Subject: [PATCH 7/7] fix: delete method with adapter bug

---
 veadk/knowledgebase/knowledgebase.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/veadk/knowledgebase/knowledgebase.py b/veadk/knowledgebase/knowledgebase.py
index dacaf391..2fa9e833 100644
--- a/veadk/knowledgebase/knowledgebase.py
+++ b/veadk/knowledgebase/knowledgebase.py
@@ -152,7 +152,7 @@ def search(self, query: str, app_name: str, top_k: int | None = None) -> list[st
 
     def delete(self, app_name: str) -> bool:
         index = build_knowledgebase_index(app_name)
-        return self.adapter.delete(index=index)
+        return self._adapter.delete(index=index)
 
     def delete_doc(self, app_name: str, id: str) -> bool:
         index = build_knowledgebase_index(app_name)