|
| 1 | +from graphgen.bases import BaseExtractor, BaseLLMWrapper |
| 2 | + |
| 3 | + |
| 4 | +class SchemaGuidedExtractor(BaseExtractor): |
| 5 | + """ |
| 6 | + Use JSON/YAML Schema or Pydantic Model to guide the LLM to extract structured information from text. |
| 7 | +
|
| 8 | + Usage example: |
| 9 | + schema = { |
| 10 | + "type": "legal contract", |
| 11 | + "description": "A legal contract for leasing property.", |
| 12 | + "properties": { |
| 13 | + "end_date": {"type": "string", "description": "The end date of the lease."}, |
| 14 | + "leased_space": {"type": "string", "description": "Description of the space that is being leased."}, |
| 15 | + "lessee": {"type": "string", "description": "The lessee's name (and possibly address)."}, |
| 16 | + "lessor": {"type": "string", "description": "The lessor's name (and possibly address)."}, |
| 17 | + "signing_date": {"type": "string", "description": "The date the contract was signed."}, |
| 18 | + "start_date": {"type": "string", "description": "The start date of the lease."}, |
| 19 | + "term_of_payment": {"type": "string", "description": "Description of the payment terms."}, |
| 20 | + "designated_use": {"type": "string", |
| 21 | + "description": "Description of the designated use of the property being leased."}, |
| 22 | + "extension_period": {"type": "string", |
| 23 | + "description": "Description of the extension options for the lease."}, |
| 24 | + "expiration_date_of_lease": {"type": "string", "description": "The expiration data of the lease."} |
| 25 | + }, |
| 26 | + "required": ["lessee", "lessor", "start_date", "end_date"] |
| 27 | + } |
| 28 | + extractor = SchemaGuidedExtractor(llm_client, schema) |
| 29 | + result = extractor.extract(text) |
| 30 | +
|
| 31 | + """ |
| 32 | + |
| 33 | + def __init__(self, llm_client: BaseLLMWrapper, schema: dict): |
| 34 | + super().__init__(llm_client) |
| 35 | + self.schema = schema |
| 36 | + |
| 37 | + def build_prompt(self, text: str) -> str: |
| 38 | + pass |
| 39 | + |
| 40 | + def extract(self, text_or_documents: str) -> dict: |
| 41 | + pass |
0 commit comments