Skip to content

Commit c9c0133

Browse files
author
Adam Soos
committed
WS-3151: add record similarity top level object and example
1 parent 0c93c5b commit c9c0133

2 files changed

Lines changed: 144 additions & 1 deletion

File tree

examples/record_similarity.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Example code to call Rosette API to get similarity score between a list of records
4+
"""
5+
6+
import argparse
7+
import json
8+
import os
9+
10+
from rosette.api import API, RecordSimilarityParameters, RosetteException
11+
12+
13+
def run(key, alt_url='https://api.rosette.com/rest/v1/'):
14+
""" Run the example """
15+
# Create an API instance
16+
api = API(user_key=key, service_url=alt_url)
17+
18+
fields = {
19+
"primaryName": {
20+
"type": "rni_name",
21+
"weight": 0.5
22+
},
23+
"dob": {
24+
"type": "rni_date",
25+
"weight": 0.2
26+
},
27+
"addr": {
28+
"type": "rni_address",
29+
"weight": 0.5
30+
},
31+
"dob2": {
32+
"type": "rni_date",
33+
"weight": 0.1
34+
}
35+
}
36+
properties = {
37+
"threshold": 0.1,
38+
"includeExplainInfo": False
39+
}
40+
records = {
41+
"left": [
42+
{
43+
"primaryName": {
44+
"text": "Ethan R",
45+
"entityType": "PERSON",
46+
"language": "eng",
47+
"languageOfOrigin": "eng",
48+
"script": "Latn"
49+
},
50+
"dob": "1993-04-16",
51+
"addr": "123 Roadlane Ave",
52+
"dob2": {
53+
"date": "1993/04/16"
54+
}
55+
},
56+
{
57+
"dob": {
58+
"date": "1993-04-16"
59+
},
60+
"primaryName": {
61+
"text": "Evan R"
62+
}
63+
}
64+
],
65+
"right": [
66+
{
67+
"dob": {
68+
"date": "1993-04-16"
69+
},
70+
"primaryName": {
71+
"text": "Seth R",
72+
"language": "eng"
73+
}
74+
},
75+
{
76+
"primaryName": "Ivan R",
77+
"dob": {
78+
"date": "1993-04-16"
79+
},
80+
"addr": {
81+
"address": "123 Roadlane Ave"
82+
},
83+
"dob2": {
84+
"date": "1993/04/16"
85+
}
86+
}
87+
]
88+
}
89+
params = RecordSimilarityParameters()
90+
params["fields"] = fields
91+
params["properties"] = properties
92+
params["records"] = records
93+
94+
#params["parameters"] = {"conflictScore": "0.9", "deletionScore": "0.2"}
95+
96+
try:
97+
return api.record_similarity(params)
98+
except RosetteException as exception:
99+
print(exception)
100+
101+
102+
PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
103+
description='Calls the ' +
104+
os.path.splitext(os.path.basename(__file__))[0] + ' endpoint')
105+
PARSER.add_argument('-k', '--key', help='Rosette API Key', required=True)
106+
PARSER.add_argument('-u', '--url', help="Alternative API URL",
107+
default='https://api.rosette.com/rest/v1/')
108+
109+
if __name__ == '__main__':
110+
# ARGS = PARSER.parse_args()
111+
RESULT = run("key", "http://localhost:8181/rest/v1/")
112+
print(RESULT)

rosette/api.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,27 @@ def validate(self):
344344
"Required Name De-Duplication parameter is missing: names",
345345
repr("names"))
346346

347+
class RecordSimilarityParameters(_RequestParametersBase):
348+
"""Parameter object for C{record-similarity} endpoint.
349+
Required:
350+
C{records} A list of C{record} objects
351+
C{properties} A C{property} object
352+
C{fields} A dictionary of C{field} objects
353+
"""
354+
355+
def __init__(self):
356+
self.use_multipart = False
357+
_RequestParametersBase.__init__(self, ("fields", "properties", "records"))
358+
359+
def validate(self):
360+
"""Internal. Do not use."""
361+
for option in "fields", "properties", "records": # required
362+
if self[option] is None:
363+
raise RosetteException(
364+
"missingParameter",
365+
"Required Name Similarity parameter is missing: " + option,
366+
repr(option))
367+
347368

348369
class EndpointCaller(object):
349370
"""L{EndpointCaller} objects are invoked via their instance methods to obtain results
@@ -592,7 +613,8 @@ def __init__(
592613
'TOKENS': 'tokens',
593614
'TOPICS': 'topics',
594615
'TRANSLITERATION': 'transliteration',
595-
'EVENTS': 'events'
616+
'EVENTS': 'events',
617+
'RECORD_SIMILARITY': 'record-similarity'
596618
}
597619

598620
def __del__(self):
@@ -966,6 +988,15 @@ def name_deduplication(self, parameters):
966988
@return: A python dictionary containing the results of de-duplication"""
967989
return EndpointCaller(self, self.endpoints['NAME_DEDUPLICATION']).call(parameters, NameDeduplicationParameters)
968990

991+
def record_similarity(self, parameters):
992+
"""
993+
Create an L{EndpointCaller} to get similarity core between a list of records and call it.
994+
@param parameters: An object specifying the data,
995+
and possible metadata, to be processed by the record matcher.
996+
@type parameters: L{RecordSimilarityParameters}
997+
@return: A python dictionary containing the results of record matching."""
998+
return EndpointCaller(self, self.endpoints['RECORD_SIMILARITY']).call(parameters, RecordSimilarityParameters)
999+
9691000
def text_embedding(self, parameters):
9701001
""" deprecated
9711002
Create an L{EndpointCaller} to identify text vectors found in the texts

0 commit comments

Comments
 (0)