@@ -528,39 +528,103 @@ def map_labels_input(input_labels: Union[list, Dict[str, int], Dict[str, Dict[st
528528 f'{ EMOJI } The background mapping labels should be provided as a label list or as a type dict of label list.'
529529 )
530530
531- if show_statistics :
532- log_dict (mapping_statistics (mapped_labels , input_labels ) )
531+ if show_descriptive_stat :
532+ print_dict_dimensions (mapping_statistics (input_labels , mapped_labels ), title = 'Mapping descriptive statistics' )
533533
534534 return mapped_labels
535535
536536
537- def mapping_statistics (input_labels : Union [list , Dict [str , Dict [str , int ]], Dict [str , int ], Dict [str , list ]],
538- mapped_labels : Union [list , Dict [str , Dict [str , int ]], Dict [str , int ], Dict [str , list ]]) -> Dict :
539- """Get the mapping statistics."""
540- percentage_dict = {}
541- total_mapping = 0
542- total_labels = 0
537+ def mapping_statistics (
538+ input_labels : Union [list , Dict [str , Dict [str , list ]], Dict [str , list ]],
539+ mapped_labels : Union [list , Dict [str , Dict [str , list ]], Dict [str , list ]],
540+ subtotals : Dict [str , int ] = None
541+ ) -> Dict :
542+ """Calculate mapping descriptive statistics."""
543+ statistics_dict = {}
543544
544- if _label_list_data_struct_check (input_labels ) or _label_scores_dict_data_struct_check (input_labels ):
545- total_mapping = len (input_labels )
546- total_labels = len (mapped_labels )
545+ total_mapping = set ()
546+ total_input = set ()
547547
548- elif _type_dict_label_list_data_struct_check (input_labels ) or _type_dict_label_scores_dict_data_struct_check (
549- input_labels ):
550- for input_type , mapping in input_labels .items ():
551- if input_type in mapped_labels :
552- percentage_dict [input_type ] = len (mapping ) / len (mapped_labels [input_type ])
553- total_mapping += len (mapping )
554- total_labels += len (mapped_labels [input_type ])
548+ if _label_list_data_struct_check (mapped_labels ) or _label_scores_dict_data_struct_check (mapped_labels ):
549+ total_mapping = mapped_labels
550+ total_input = input_labels
551+ if len (total_input ) != 0 :
552+ statistics_dict ['total' ] = (len (total_mapping ), len (total_mapping ) / len (total_input ))
553+
554+ elif _type_dict_label_list_data_struct_check (mapped_labels ) or _type_dict_label_scores_dict_data_struct_check (
555+ mapped_labels ):
556+ for mapping_type , mapping in mapped_labels .items ():
557+
558+ if (_type_dict_label_list_data_struct_check (input_labels ) or _type_dict_label_scores_dict_data_struct_check (
559+ input_labels )) and mapping_type in input_labels .keys ():
560+ if len (input_labels [mapping_type ]) != 0 :
561+ statistics_dict [mapping_type ] = (len (mapping ), len (mapping ) / len (input_labels [mapping_type ]))
562+ else :
563+ statistics_dict [mapping_type ] = (0 , 0 )
564+
565+ total_mapping .update (mapping )
566+ total_input .update (input_labels [mapping_type ])
567+ else :
568+ if subtotals is None :
569+ subtotal_input = len (input_labels )
570+ else :
571+ subtotal_input = subtotals [mapping_type ]
572+
573+ if subtotal_input != 0 :
574+ statistics_dict [mapping_type ] = (len (mapping ), len (mapping ) / subtotal_input )
575+ else :
576+ statistics_dict [mapping_type ] = (0 , 0 )
577+
578+ total_input .update (input_labels )
579+
580+ total_mapping .update (mapping )
581+
582+ if subtotals :
583+ statistics_dict ['total_mapping' ] = total_mapping
584+ statistics_dict ['total_input' ] = total_input
585+
586+ if len (total_input ) != 0 :
587+ statistics_dict ['total' ] = (len (total_mapping ), len (total_mapping ) / len (total_input ))
588+
589+ elif _two_dimensional_type_dict_label_scores_dict_data_struct_check (
590+ mapped_labels ) or _two_dimensional_type_dict_label_list_data_struct_check (mapped_labels ):
591+
592+ subtotals_dict = defaultdict (set )
593+
594+ for _ , mapping_subdict in mapped_labels .items ():
595+ for mapping_subtype , mapping_subdict in mapping_subdict .items ():
596+ subtotals_dict [mapping_subtype ].update (mapping_subdict )
597+
598+ subtotals_dict = {
599+ mapping_subtype : len (mapping_subdict )
600+ for mapping_subtype , mapping_subdict in
601+ subtotals_dict .items ()
602+ }
603+
604+ for mapping_type , mapping_subdict in mapped_labels .items ():
605+ percentage_dict_i = mapping_statistics (input_labels , mapping_subdict , subtotals = subtotals_dict )
606+
607+ statistics_dict [mapping_type ] = percentage_dict_i
608+
609+ total_mapping .update (percentage_dict_i .pop ('total_mapping' ))
610+ total_input .update (percentage_dict_i .pop ('total_input' ))
611+
612+ if len (total_input ) != 0 :
613+ subtotals_dict = {mapping_type : (mapping , mapping / len (total_input )) for mapping_type , mapping in
614+ subtotals_dict .items ()}
615+ subtotals_dict ['total' ] = (len (total_mapping ), len (total_mapping ) / len (total_input ))
616+
617+ statistics_dict ['total' ] = subtotals_dict
555618
556619 else :
557620 raise TypeError (
558621 f'{ EMOJI } The input labels data structure can not be processed for label mapping'
559622 )
560623
561- percentage_dict ['General mapping' ] = total_mapping / total_labels
624+ if len (total_input ) == 0 :
625+ statistics_dict ['total' ] = (0 , 0 )
562626
563- return percentage_dict
627+ return statistics_dict
564628
565629
566630def _map_labels (input_labels : Union [list , Dict [str , Dict [str , int ]], Dict [str , int ], Dict [str , list ]],
0 commit comments