Skip to content

Commit 4431de1

Browse files
committed
mapping_statistics recoding and fixed error
1 parent 0c29e52 commit 4431de1

1 file changed

Lines changed: 84 additions & 20 deletions

File tree

src/diffupy/process_input.py

Lines changed: 84 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -528,39 +528,103 @@ def map_labels_input(input_labels: Union[list, Dict[str, int], Dict[str, Dict[st
528528
f'{EMOJI} The background mapping labels should be provided as a label list or as a type dict of label list.'
529529
)
530530

531-
if show_statistics:
532-
log_dict(mapping_statistics(mapped_labels, input_labels))
531+
if show_descriptive_stat:
532+
print_dict_dimensions(mapping_statistics(input_labels, mapped_labels), title='Mapping descriptive statistics')
533533

534534
return mapped_labels
535535

536536

537-
def mapping_statistics(input_labels: Union[list, Dict[str, Dict[str, int]], Dict[str, int], Dict[str, list]],
538-
mapped_labels: Union[list, Dict[str, Dict[str, int]], Dict[str, int], Dict[str, list]]) -> Dict:
539-
"""Get the mapping statistics."""
540-
percentage_dict = {}
541-
total_mapping = 0
542-
total_labels = 0
537+
def mapping_statistics(
538+
input_labels: Union[list, Dict[str, Dict[str, list]], Dict[str, list]],
539+
mapped_labels: Union[list, Dict[str, Dict[str, list]], Dict[str, list]],
540+
subtotals: Dict[str, int] = None
541+
) -> Dict:
542+
"""Calculate mapping descriptive statistics."""
543+
statistics_dict = {}
543544

544-
if _label_list_data_struct_check(input_labels) or _label_scores_dict_data_struct_check(input_labels):
545-
total_mapping = len(input_labels)
546-
total_labels = len(mapped_labels)
545+
total_mapping = set()
546+
total_input = set()
547547

548-
elif _type_dict_label_list_data_struct_check(input_labels) or _type_dict_label_scores_dict_data_struct_check(
549-
input_labels):
550-
for input_type, mapping in input_labels.items():
551-
if input_type in mapped_labels:
552-
percentage_dict[input_type] = len(mapping) / len(mapped_labels[input_type])
553-
total_mapping += len(mapping)
554-
total_labels += len(mapped_labels[input_type])
548+
if _label_list_data_struct_check(mapped_labels) or _label_scores_dict_data_struct_check(mapped_labels):
549+
total_mapping = mapped_labels
550+
total_input = input_labels
551+
if len(total_input) != 0:
552+
statistics_dict['total'] = (len(total_mapping), len(total_mapping) / len(total_input))
553+
554+
elif _type_dict_label_list_data_struct_check(mapped_labels) or _type_dict_label_scores_dict_data_struct_check(
555+
mapped_labels):
556+
for mapping_type, mapping in mapped_labels.items():
557+
558+
if (_type_dict_label_list_data_struct_check(input_labels) or _type_dict_label_scores_dict_data_struct_check(
559+
input_labels)) and mapping_type in input_labels.keys():
560+
if len(input_labels[mapping_type]) != 0:
561+
statistics_dict[mapping_type] = (len(mapping), len(mapping) / len(input_labels[mapping_type]))
562+
else:
563+
statistics_dict[mapping_type] = (0, 0)
564+
565+
total_mapping.update(mapping)
566+
total_input.update(input_labels[mapping_type])
567+
else:
568+
if subtotals is None:
569+
subtotal_input = len(input_labels)
570+
else:
571+
subtotal_input = subtotals[mapping_type]
572+
573+
if subtotal_input != 0:
574+
statistics_dict[mapping_type] = (len(mapping), len(mapping) / subtotal_input)
575+
else:
576+
statistics_dict[mapping_type] = (0, 0)
577+
578+
total_input.update(input_labels)
579+
580+
total_mapping.update(mapping)
581+
582+
if subtotals:
583+
statistics_dict['total_mapping'] = total_mapping
584+
statistics_dict['total_input'] = total_input
585+
586+
if len(total_input) != 0:
587+
statistics_dict['total'] = (len(total_mapping), len(total_mapping) / len(total_input))
588+
589+
elif _two_dimensional_type_dict_label_scores_dict_data_struct_check(
590+
mapped_labels) or _two_dimensional_type_dict_label_list_data_struct_check(mapped_labels):
591+
592+
subtotals_dict = defaultdict(set)
593+
594+
for _, mapping_subdict in mapped_labels.items():
595+
for mapping_subtype, mapping_subdict in mapping_subdict.items():
596+
subtotals_dict[mapping_subtype].update(mapping_subdict)
597+
598+
subtotals_dict = {
599+
mapping_subtype: len(mapping_subdict)
600+
for mapping_subtype, mapping_subdict in
601+
subtotals_dict.items()
602+
}
603+
604+
for mapping_type, mapping_subdict in mapped_labels.items():
605+
percentage_dict_i = mapping_statistics(input_labels, mapping_subdict, subtotals=subtotals_dict)
606+
607+
statistics_dict[mapping_type] = percentage_dict_i
608+
609+
total_mapping.update(percentage_dict_i.pop('total_mapping'))
610+
total_input.update(percentage_dict_i.pop('total_input'))
611+
612+
if len(total_input) != 0:
613+
subtotals_dict = {mapping_type: (mapping, mapping / len(total_input)) for mapping_type, mapping in
614+
subtotals_dict.items()}
615+
subtotals_dict['total'] = (len(total_mapping), len(total_mapping) / len(total_input))
616+
617+
statistics_dict['total'] = subtotals_dict
555618

556619
else:
557620
raise TypeError(
558621
f'{EMOJI} The input labels data structure can not be processed for label mapping'
559622
)
560623

561-
percentage_dict['General mapping'] = total_mapping / total_labels
624+
if len(total_input) == 0:
625+
statistics_dict['total'] = (0, 0)
562626

563-
return percentage_dict
627+
return statistics_dict
564628

565629

566630
def _map_labels(input_labels: Union[list, Dict[str, Dict[str, int]], Dict[str, int], Dict[str, list]],

0 commit comments

Comments
 (0)