We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
2 parents 5bb6deb + 87b9d1c commit 6778cd3Copy full SHA for 6778cd3
1 file changed
graphgen/models/reader/pickle_reader.py
@@ -0,0 +1,25 @@
1
+import pickle
2
+from typing import Any, Dict, List
3
+
4
+from graphgen.bases.base_reader import BaseReader
5
6
7
+class PickleReader(BaseReader):
8
+ """
9
+ Read pickle files, requiring the top-level object to be List[Dict[str, Any]].
10
11
12
+ def read(self, file_path: str) -> List[Dict[str, Any]]:
13
+ with open(file_path, "rb") as f:
14
+ data = pickle.load(f)
15
16
+ if not isinstance(data, list):
17
+ raise ValueError("Pickle file must contain a list of documents.")
18
19
+ for doc in data:
20
+ if not isinstance(doc, dict):
21
+ raise ValueError("Every item in the list must be a dict.")
22
+ if doc.get("type") == "text" and self.text_column not in doc:
23
+ raise ValueError(f"Missing '{self.text_column}' in document: {doc}")
24
25
+ return self.filter(data)
0 commit comments