11# -*- coding: utf-8 -*-
22
33"""Miscellaneous utils of the package."""
4-
4+ import itertools
55import json
66import logging
77import pickle
@@ -123,23 +123,30 @@ def get_idx_scores_mapping(scores):
123123 return {i : score for i , score in enumerate (scores )}
124124
125125
126- def print_dict_dimensions (entities_db , message = 'Total number of ' ):
126+ def map_intersection_type_background (background_labels : Dict [str , list ], input_labels : list ):
127+ """Intersection mapping."""
128+ labels_dict = {}
129+
130+ for bck_label , bck_entities in background_labels .items ():
131+ labels_dict [bck_label ] = set (background_labels [bck_label ]).intersection (input_labels )
132+
133+ return labels_dict
134+
135+
136+ def print_dict_dimensions (entities_db , title = 'Title' , message = '' ):
127137 """Print dimension of the dictionary."""
128138 total = 0
139+ m = f'{ title } \n '
129140
130141 for k1 , v1 in entities_db .items ():
131- m = ' '
142+ m += f' \n { message } { k1 } : \n '
132143 if isinstance (v1 , dict ):
133144 for k2 , v2 in v1 .items ():
134- m += f'{ k2 } ({ len (v2 )} ), '
135- total += len (v2 )
145+ m += f'{ k2 } ({ v2 } )\n '
136146 else :
137- m += f'{ len (v1 )} '
138- total += len (v1 )
139-
140- log_dict ({k1 : m }, message )
147+ m += f'{ v1 } '
141148
142- print (f'Total: { total } ' )
149+ print (f'{ m } \n \n ' )
143150
144151
145152def log_dict (dict_to_print : dict , message : str = '' ):
@@ -159,7 +166,12 @@ def get_random_value_from_dict(d: dict):
159166 return d [get_random_key_from_dict (d )]
160167
161168
162- """File loading utils."""
169+ def lists_combinations (list_1 , list_2 ):
170+ """Return all string combination from two list of strings."""
171+ return [x [0 ] + ' ' + x [1 ] for x in itertools .product (list_1 , list_2 )]
172+
173+
174+ """File loading/writting utils."""
163175
164176
165177def format_checker (fmt : str , fmt_list : list = GRAPH_FORMATS ) -> None :
@@ -188,6 +200,12 @@ def from_json(path: str):
188200 return json .load (f )
189201
190202
203+ def to_json (data , path : str ):
204+ """Save json file."""
205+ with open (path , 'w' ) as f :
206+ json .dump (data , f )
207+
208+
191209def from_pickle (input_path ):
192210 """Read from pickle file."""
193211 with open (input_path , 'rb' ) as f :
@@ -277,8 +295,11 @@ def munge_cell(cell):
277295 elif isinstance (cell , float ) or isinstance (cell , int ):
278296 return cell
279297
298+ elif cell is None :
299+ return 'NA'
300+
280301 else :
281- raise TypeError ('The cell type could not be processed.' )
302+ raise TypeError (f 'The cell " { cell } " could not be processed.' )
282303
283304
284305def parse_xls_sheet_to_df (sheet : opxl .workbook ,
@@ -314,7 +335,7 @@ def parse_xls_to_df(path: str,
314335 return {sheets [ix ].lower (): parse_xls_sheet_to_df (sheet , min_row , relevant_cols , irrelevant_cols )
315336 for ix , sheet in enumerate (wb )
316337 if (relevant_sheets is not None and sheets [ix ] in relevant_sheets ) or (
317- irrelevant_sheets is not None and sheets [ix ] in irrelevant_sheets )
338+ irrelevant_sheets is not None and sheets [ix ] not in irrelevant_sheets )
318339 }
319340
320341 else :
0 commit comments