2020import numpy as np
2121import six
2222from runtime .feature .column import (CategoryIDColumn , EmbeddingColumn ,
23- FeatureColumn , IndicatorColumn ,
24- NumericColumn )
23+ IndicatorColumn , NumericColumn )
2524from runtime .feature .field_desc import DataFormat , DataType , FieldDesc
2625from runtime .verifier import fetch_samples
2726
@@ -38,8 +37,8 @@ def init_column_map(target_fc_map, fc):
3837 Returns:
3938 None.
4039 """
41- if isinstance (fc , (EmbeddingColumn , IndicatorColumn )) and \
42- len (fc .get_field_desc ()) == 0 :
40+ if isinstance (fc , (EmbeddingColumn , IndicatorColumn )) \
41+ and len (fc .get_field_desc ()) == 0 :
4342 if fc .name not in target_fc_map :
4443 target_fc_map [fc .name ] = []
4544
@@ -129,7 +128,7 @@ def new_default_field_desc(name):
129128BLANK_PATTERN = re .compile ("\\ s+" )
130129
131130# The Python 2/3 int64 type
132- INT64_TYPE = long if six .PY2 else int
131+ INT64_TYPE = long if six .PY2 else int # noqa: F821
133132
134133
135134def infer_string_data_format (str_data ):
@@ -165,7 +164,8 @@ def fill_csv_field_desc(cell, field_desc):
165164 """
166165 values = cell .split ("," )
167166 if field_desc .is_sparse :
168- assert field_desc .shape is not None , "the shape of CSV format data must be given"
167+ assert field_desc .shape is not None , \
168+ "the shape of CSV format data must be given"
169169 else :
170170 if field_desc .shape is None :
171171 field_desc .shape = [len (values )]
@@ -174,8 +174,8 @@ def fill_csv_field_desc(cell, field_desc):
174174 if np .prod (field_desc .shape ) != len (values ):
175175 if size > 1 :
176176 raise ValueError (
177- "column %s should be csv format dense tensor of %d element(s), but got %d element(s) "
178- %
177+ "column %s should be csv format dense tensor "
178+ "of %d element(s), but got %d element(s)" %
179179 (field_desc .name , np .prod (field_desc .shape ), len (values )))
180180
181181 field_desc .shape = [len (values )]
@@ -356,10 +356,13 @@ def update_feature_column(fc, fd_map):
356356 raise ValueError ("column not found or inferred: %s" % fc .name )
357357
358358 # FIXME(typhoonzero): when to use sequence_category_id_column?
359- # if column fieldDesc is SPARSE, the sparse shape should be in cs.Shape[0]
359+ # if column fieldDesc is SPARSE, the sparse shape should
360+ # be in cs.Shape[0]
360361 bucket_size = field_desc .shape [0 ]
361362 if not field_desc .is_sparse :
362- assert field_desc .max_id > 0 , "use dense column on embedding column but did not got a correct MaxID"
363+ assert field_desc .max_id > 0 , \
364+ "use dense column on embedding column " \
365+ "but did not got a correct MaxID"
363366 bucket_size = field_desc .max_id + 1
364367
365368 fc .category_column = CategoryIDColumn (field_desc , bucket_size )
@@ -370,8 +373,10 @@ def update_feature_column(fc, fd_map):
370373 if field_desc is None :
371374 raise ValueError ("column not found or inferred: %s" % fc .name )
372375
373- assert field_desc .is_sparse , "cannot use sparse column with indicator column"
374- assert field_desc .max_id > 0 , "use indicator column but did not got a correct MaxID"
376+ assert field_desc .is_sparse , \
377+ "cannot use sparse column with indicator column"
378+ assert field_desc .max_id > 0 , \
379+ "use indicator column but did not got a correct MaxID"
375380 bucket_size = field_desc .max_id + 1
376381 fc .category_column = CategoryIDColumn (field_desc , bucket_size )
377382
@@ -392,7 +397,8 @@ def new_feature_column(field_desc):
392397 else :
393398 category_column = CategoryIDColumn (field_desc ,
394399 len (field_desc .vocabulary ))
395- # NOTE(typhoonzero): a default embedding size of 128 is enough for most cases.
400+ # NOTE(typhoonzero): a default embedding size of 128 is enough
401+ # for most cases.
396402 embedding = EmbeddingColumn (category_column = category_column ,
397403 dimension = 128 ,
398404 combiner = "sum" )
@@ -406,7 +412,8 @@ def derive_feature_columns(targets, fc_map, fd_map, selected_field_names,
406412 Derive the FeatureColumn.
407413
408414 Args:
409- targets (list[str]): the feature column targets, e.g. "feature_columns".
415+ targets (list[str]): the feature column targets,
416+ e.g. "feature_columns".
410417 fc_map (dict[str -> dict[str -> list[FeatureColumn]]]): a FeatureColumn
411418 map, where the key of the outer dict is the target name, e.g.
412419 "feature_columns", and the key of the inner dict is the field name.
@@ -439,7 +446,8 @@ def derive_feature_columns(targets, fc_map, fd_map, selected_field_names,
439446 match_field_name = None
440447 for selected_field_name in selected_field_names :
441448 if field_pattern .fullmatch (selected_field_name ):
442- assert match_field_name is None , "%s matches duplicate fields" % field_name
449+ assert match_field_name is None , \
450+ "%s matches duplicate fields" % field_name
443451 match_field_name = selected_field_name
444452
445453 if match_field_name is None :
@@ -464,8 +472,8 @@ def derive_feature_columns(targets, fc_map, fd_map, selected_field_names,
464472 update_feature_column (fc , fd_map )
465473 else :
466474 if len (fc_map ) > 1 :
467- # if column clause have more than one target, each target should specify the
468- # full list of the columns to use.
475+ # if column clause have more than one target, each target
476+ # should specify the full list of the columns to use.
469477 continue
470478
471479 field_desc = fd_map [selected_field_name ]
@@ -479,13 +487,13 @@ def derive_feature_columns(targets, fc_map, fd_map, selected_field_names,
479487 fc_target_map .update (new_fc_target_map )
480488
481489
482- def update_ir_feature_column_map_by_derived_feature_column_map (
483- features , fc_map , selected_field_names , label_name ):
490+ def update_ir_feature_columns ( features , fc_map , selected_field_names ,
491+ label_name ):
484492 """
485493 Update the IR FeatureColumn map `features` by the derived FeatureColumn map
486- `fc_map` . If any FeatureColumn inside `fc_map` does not exist in `features`,
487- it would be added to `features` . Notice that `features` is not updated
488- in-place, and we would return a new updated IR FeatureColumn map in
494+ `fc_map` . If any FeatureColumn inside `fc_map` does not exist in
495+ `features`, it would be added to `features` . Notice that `features` is not
496+ updated in-place, and we would return a new updated IR FeatureColumn map in
489497 this method.
490498
491499 Args:
@@ -542,9 +550,8 @@ def update_ir_feature_column_map_by_derived_feature_column_map(
542550 break
543551
544552 if not found :
545- raise ValueError (
546- "some feature column is missing in the derivation stage"
547- )
553+ raise ValueError ("some feature column is missing in the "
554+ "derivation stage" )
548555
549556 sorted_pos = sorted (range (len (indices )), key = lambda k : indices [k ])
550557 multi_fd_fcs = [multi_fd_fcs [i ] for i in sorted_pos ]
@@ -572,9 +579,11 @@ def derive_label(label, fd_map):
572579 return # NOTE: clustering model may not specify Label
573580
574581 label_field_desc = fd_map [label_name ]
575- assert label_field_desc is not None , "deriveLabel: LABEL COLUMN '%s' not found" % label_name
582+ assert label_field_desc is not None , \
583+ "deriveLabel: LABEL COLUMN '%s' not found" % label_name
576584
577- # use shape [] if label shape is [1] for Tensorflow scalar label shape should be [].
585+ # use shape [] if label shape is [1] for Tensorflow scalar label
586+ # shape should be [].
578587 shape = label_field_desc .shape
579588 if shape is None or (len (shape ) == 1 and shape [0 ] == 1 ):
580589 label_field_desc .shape = []
@@ -626,7 +635,7 @@ def infer_feature_columns(conn, select, features, label, n=1000):
626635
627636 derive_feature_columns (targets , fc_map , fd_map , selected_field_names ,
628637 label_name )
629- features = update_ir_feature_column_map_by_derived_feature_column_map (
630- features , fc_map , selected_field_names , label_name )
638+ features = update_ir_feature_columns ( features , fc_map ,
639+ selected_field_names , label_name )
631640 label = derive_label (label , fd_map )
632641 return features , label
0 commit comments