Unify eval estimator code (#2757)

sneaxiy · web-flow · commit c5300f145248 · 2020-07-28T20:27:28.000+08:00
* unify eval estimator code

* update

* update

* polish import_model_module and doc

* polish doc

* fix ut
diff --git a/go/codegen/pai/template_tf.go b/go/codegen/pai/template_tf.go
@@ -66,7 +66,7 @@ type requirementsFiller struct {
 const tfImportsText = `
 import tensorflow as tf
 from runtime.tensorflow import is_tf_estimator
-from tensorflow.estimator import DNNClassifier, DNNRegressor, LinearClassifier, LinearRegressor, BoostedTreesClassifier, BoostedTreesRegressor, DNNLinearCombinedClassifier, DNNLinearCombinedRegressor
+from runtime.import_model import import_model
 try:
 	from runtime import oss
 	from runtime.pai.pai_distributed import define_tf_flags, set_oss_environs
@@ -79,7 +79,7 @@ const tfLoadModelTmplText = tfImportsText + `
 FLAGS = define_tf_flags()
 set_oss_environs(FLAGS)
 
-estimator = {{.Estimator}}
+estimator = import_model('''{{.Estimator}}''')
 is_estimator = is_tf_estimator(estimator)
 
 # Keras single node is using h5 format to save the model, no need to deal with export model format.
@@ -95,7 +95,7 @@ else:
 const tfSaveModelTmplText = tfImportsText + `
 import types
 
-estimator = {{.Estimator}}
+estimator = import_model('''{{.Estimator}}''')
 is_estimator = is_tf_estimator(estimator)
 
 # Keras single node is using h5 format to save the model, no need to deal with export model format.
@@ -173,7 +173,7 @@ feature_columns = eval(feature_columns_code)
 # NOTE(typhoonzero): No need to eval model_params["optimizer"] and model_params["loss"]
 # because predicting do not need these parameters.
 
-is_estimator = is_tf_estimator(eval(estimator))
+is_estimator = is_tf_estimator(import_model(estimator))
 
 # Keras single node is using h5 format to save the model, no need to deal with export model format.
 # Keras distributed mode will use estimator, so this is also needed.
@@ -233,7 +233,7 @@ feature_columns = eval(feature_columns_code)
 # NOTE(typhoonzero): No need to eval model_params["optimizer"] and model_params["loss"]
 # because predicting do not need these parameters.
 
-is_estimator = is_tf_estimator(eval(estimator))
+is_estimator = is_tf_estimator(import_model(estimator))
 
 # Keras single node is using h5 format to save the model, no need to deal with export model format.
 # Keras distributed mode will use estimator, so this is also needed.
@@ -296,7 +296,7 @@ feature_columns = eval(feature_columns_code)
 # NOTE(typhoonzero): No need to eval model_params["optimizer"] and model_params["loss"]
 # because predicting do not need these parameters.
 
-is_estimator = is_tf_estimator(eval(estimator))
+is_estimator = is_tf_estimator(import_model(estimator))
 
 # Keras single node is using h5 format to save the model, no need to deal with export model format.
 # Keras distributed mode will use estimator, so this is also needed.
diff --git a/python/runtime/__init__.py b/python/runtime/__init__.py
@@ -10,5 +10,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from runtime.import_custom_models import import_model_def
diff --git a/python/runtime/import_custom_models.py b/python/runtime/import_custom_models.py
diff --git a/python/runtime/import_model.py b/python/runtime/import_model.py
@@ -0,0 +1,65 @@
+# Copyright 2020 The SQLFlow Authors. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+try:
+    import sqlflow_models  # noqa: F401
+except:  # noqa: E722
+    pass
+
+from tensorflow.estimator import BoostedTreesClassifier  # noqa: F401
+from tensorflow.estimator import BoostedTreesRegressor  # noqa: F401
+from tensorflow.estimator import DNNClassifier  # noqa: F401
+from tensorflow.estimator import DNNLinearCombinedClassifier  # noqa: F401
+from tensorflow.estimator import DNNLinearCombinedRegressor  # noqa: F401
+from tensorflow.estimator import DNNRegressor  # noqa: F401
+from tensorflow.estimator import LinearClassifier  # noqa: F401
+from tensorflow.estimator import LinearRegressor  # noqa: F401
+
+
+def import_model_module(model, namespace):
+    """
+    Import the model module into namespace. For example,
+    If model = "my_model_module.my_model", "my_model_module"
+    would be imported into namespace.
+
+    Args:
+        model (str): the model name.
+        namespace (dict): the namespace to be imported into.
+
+    Returns:
+        None.
+    """
+    # try import the custom model's python package, if the estimator is of
+    # format: my_model_package.MyModel
+    model_name_parts = model.split(".")
+    if len(model_name_parts) == 2:
+        module = model_name_parts[0]
+        if module and module.lower() not in ['xgboost', 'sqlflow_models']:
+            try:
+                namespace[module] = __import__(module)
+            except Exception as e:
+                print("failed to import %s: %s" % (module, e))
+
+
+def import_model(model):
+    """
+    Import the model class or function from the given model name.
+
+    Args:
+        model (str): the model name.
+
+    Returns:
+        An imported model class or function.
+    """
+    import_model_module(model, globals())
+    return eval(model)
diff --git a/python/runtime/pai/tensorflow/evaluate.py b/python/runtime/pai/tensorflow/evaluate.py
@@ -19,6 +19,7 @@
 import runtime
 import tensorflow as tf
 from runtime import oss
+from runtime.import_model import import_model
 from runtime.pai.pai_distributed import define_tf_flags
 from runtime.tensorflow import is_tf_estimator
 from runtime.tensorflow.evaluate import (estimator_evaluate, keras_evaluate,
@@ -27,11 +28,6 @@
 from runtime.tensorflow.keras_with_feature_column_input import \
     init_model_with_feature_column
 from runtime.tensorflow.set_log_level import set_log_level
-from tensorflow.estimator import (BoostedTreesClassifier,
-                                  BoostedTreesRegressor, DNNClassifier,
-                                  DNNLinearCombinedClassifier,
-                                  DNNLinearCombinedRegressor, DNNRegressor,
-                                  LinearClassifier, LinearRegressor)
 
 try:
     tf.enable_eager_execution()
@@ -66,7 +62,7 @@ def evaluate(datasource, select, data_table, result_table, oss_model_path,
     # NOTE(typhoonzero): No need to eval model_params["optimizer"] and model_params["loss"]
     # because predicting do not need these parameters.
 
-    is_estimator = is_tf_estimator(eval(estimator))
+    is_estimator = is_tf_estimator(import_model(estimator))
 
     # Keras single node is using h5 format to save the model, no need to deal with export model format.
     # Keras distributed mode will use estimator, so this is also needed.
@@ -110,8 +106,7 @@ def _evaluate(datasource,
               validation_steps=None,
               verbose=0,
               pai_table=""):
-    runtime.import_model_def(estimator_string, globals())
-    estimator_cls = eval(estimator_string)
+    estimator_cls = import_model(estimator_string)
     is_estimator = is_tf_estimator(estimator_cls)
     set_log_level(verbose, is_estimator)
     eval_dataset = get_dataset_fn(select,
diff --git a/python/runtime/pai/tensorflow/explain.py b/python/runtime/pai/tensorflow/explain.py
@@ -21,16 +21,12 @@
 import runtime
 import tensorflow as tf
 from runtime import oss
+from runtime.import_model import import_model
 from runtime.tensorflow import is_tf_estimator
 from runtime.tensorflow.explain import explain_boosted_trees, explain_dnns
 from runtime.tensorflow.input_fn import input_fn
 from runtime.tensorflow.keras_with_feature_column_input import \
     init_model_with_feature_column
-from tensorflow.estimator import (BoostedTreesClassifier,
-                                  BoostedTreesRegressor, DNNClassifier,
-                                  DNNLinearCombinedClassifier,
-                                  DNNLinearCombinedRegressor, DNNRegressor,
-                                  LinearClassifier, LinearRegressor)
 
 try:
     from runtime.pai.pai_distributed import define_tf_flags, set_oss_environs
@@ -59,7 +55,7 @@ def explain(datasource, select, data_table, result_table, label_column,
     # NOTE(typhoonzero): No need to eval model_params["optimizer"] and model_params["loss"]
     # because predicting do not need these parameters.
 
-    is_estimator = is_tf_estimator(eval(estimator))
+    is_estimator = is_tf_estimator(import_model(estimator))
 
     # Keras single node is using h5 format to save the model, no need to deal with export model format.
     # Keras distributed mode will use estimator, so this is also needed.
@@ -106,8 +102,7 @@ def _explain(datasource,
              oss_sk=None,
              oss_endpoint=None,
              oss_bucket_name=None):
-    runtime.import_model_def(estimator_string, globals())
-    estimator_cls = eval(estimator_string)
+    estimator_cls = import_model(estimator_string)
     FLAGS = tf.app.flags.FLAGS
     model_params["model_dir"] = FLAGS.checkpointDir
     model_params.update(feature_columns)
diff --git a/python/runtime/pai/tensorflow/predict.py b/python/runtime/pai/tensorflow/predict.py
@@ -19,14 +19,10 @@
 import tensorflow as tf
 from runtime import db, oss
 from runtime.diagnostics import SQLFlowDiagnostic
+from runtime.import_model import import_model
 from runtime.pai.pai_distributed import define_tf_flags
 from runtime.tensorflow import is_tf_estimator
 from runtime.tensorflow.predict import estimator_predict, keras_predict
-from tensorflow.estimator import (BoostedTreesClassifier,
-                                  BoostedTreesRegressor, DNNClassifier,
-                                  DNNLinearCombinedClassifier,
-                                  DNNLinearCombinedRegressor, DNNRegressor,
-                                  LinearClassifier, LinearRegressor)
 
 try:
     import sqlflow_models
@@ -65,7 +61,7 @@ def predict(datasource, select, data_table, result_table, label_column,
     # NOTE(typhoonzero): No need to eval model_params["optimizer"] and model_params["loss"]
     # because predicting do not need these parameters.
 
-    is_estimator = is_tf_estimator(eval(estimator))
+    is_estimator = is_tf_estimator(import_model(estimator))
 
     # Keras single node is using h5 format to save the model, no need to deal with export model format.
     # Keras distributed mode will use estimator, so this is also needed.
@@ -106,8 +102,7 @@ def _predict(datasource,
              save="",
              batch_size=1,
              pai_table=""):
-    runtime.import_model_def(estimator_string, globals())
-    estimator = eval(estimator_string)
+    estimator = import_model(estimator_string)
     model_params.update(feature_columns)
     is_estimator = is_tf_estimator(estimator)
 
diff --git a/python/runtime/pai/tensorflow/train.py b/python/runtime/pai/tensorflow/train.py
@@ -25,6 +25,7 @@
 from runtime import oss
 from runtime.db import (connect_with_data_source, db_generator,
                         parseMaxComputeDSN)
+from runtime.import_model import import_model
 from runtime.model_metadata import collect_model_metadata
 from runtime.pai.pai_distributed import define_tf_flags, set_oss_environs
 from runtime.pai.tensorflow.train_estimator import estimator_train_and_save
@@ -33,11 +34,6 @@
 from runtime.tensorflow.get_tf_version import tf_is_version2
 from runtime.tensorflow.input_fn import get_dataset_fn
 from runtime.tensorflow.set_log_level import set_log_level
-from tensorflow.estimator import (BoostedTreesClassifier,
-                                  BoostedTreesRegressor, DNNClassifier,
-                                  DNNLinearCombinedClassifier,
-                                  DNNLinearCombinedRegressor, DNNRegressor,
-                                  LinearClassifier, LinearRegressor)
 
 try:
     import sqlflow_models
@@ -81,8 +77,7 @@ def train(datasource,
                                         model_params, feature_columns_code,
                                         feature_metas, label_meta, None,
                                         model_repo_image)
-    runtime.import_model_def(estimator_string, globals())
-    estimator = eval(estimator_string)
+    estimator = import_model(estimator_string)
     is_estimator = is_tf_estimator(estimator)
 
     if verbose < 1:  # always use verbose == 1 when using PAI to get more logs
diff --git a/python/runtime/tensorflow/estimator_example.py b/python/runtime/tensorflow/estimator_example.py
@@ -82,7 +82,7 @@
 if __name__ == "__main__":
     # tf.python.training.basic_session_run_hooks.LoggingTensorHook = runtime.tensorflow.train.PrintTensorsHook
     train(datasource=datasource,
-          estimator_string="tf.estimator.DNNClassifier",
+          estimator_string="DNNClassifier",
           select=select,
           validation_select=validate_select,
           feature_columns=feature_columns,
@@ -98,7 +98,7 @@
           epoch=3,
           verbose=0)
     train(datasource=datasource,
-          estimator_string="tf.estimator.DNNClassifier",
+          estimator_string="DNNClassifier",
           select=select_binary,
           validation_select=validate_select_binary,
           feature_columns=feature_columns,
@@ -114,7 +114,7 @@
           epoch=3,
           verbose=1)
     pred(datasource=datasource,
-         estimator_string="tf.estimator.DNNClassifier",
+         estimator_string="DNNClassifier",
          select=select,
          result_table="iris.predict",
          feature_columns=feature_columns,
diff --git a/python/runtime/tensorflow/evaluate.py b/python/runtime/tensorflow/evaluate.py
@@ -18,17 +18,13 @@
 import runtime
 import tensorflow as tf
 from runtime.db import buffered_db_writer, connect_with_data_source
+from runtime.import_model import import_model
 from runtime.tensorflow import metrics
 from runtime.tensorflow.get_tf_model_type import is_tf_estimator
 from runtime.tensorflow.input_fn import get_dataset_fn
 from runtime.tensorflow.keras_with_feature_column_input import \
     init_model_with_feature_column
 from runtime.tensorflow.set_log_level import set_log_level
-from tensorflow.estimator import (BoostedTreesClassifier,
-                                  BoostedTreesRegressor, DNNClassifier,
-                                  DNNLinearCombinedClassifier,
-                                  DNNLinearCombinedRegressor, DNNRegressor,
-                                  LinearClassifier, LinearRegressor)
 
 try:
     import sqlflow_models
@@ -54,8 +50,7 @@ def evaluate(datasource,
              hive_location="",
              hdfs_user="",
              hdfs_pass=""):
-    runtime.import_model_def(estimator_string, globals())
-    estimator_cls = eval(estimator_string)
+    estimator_cls = import_model(estimator_string)
     is_estimator = is_tf_estimator(estimator_cls)
     set_log_level(verbose, is_estimator)
     eval_dataset = get_dataset_fn(select,
diff --git a/python/runtime/tensorflow/explain.py b/python/runtime/tensorflow/explain.py
@@ -23,15 +23,11 @@
 import tensorflow as tf
 from runtime import explainer
 from runtime.db import buffered_db_writer, connect_with_data_source
+from runtime.import_model import import_model
 from runtime.tensorflow.get_tf_version import tf_is_version2
 from runtime.tensorflow.input_fn import input_fn
 from runtime.tensorflow.keras_with_feature_column_input import \
     init_model_with_feature_column
-from tensorflow.estimator import (BoostedTreesClassifier,
-                                  BoostedTreesRegressor, DNNClassifier,
-                                  DNNLinearCombinedClassifier,
-                                  DNNLinearCombinedRegressor, DNNRegressor,
-                                  LinearClassifier, LinearRegressor)
 
 sns_colors = sns.color_palette('colorblind')
 # Disable Tensorflow INFO and WARNING logs
@@ -73,8 +69,7 @@ def explain(datasource,
             oss_sk=None,
             oss_endpoint=None,
             oss_bucket_name=None):
-    runtime.import_model_def(estimator_string, globals())
-    estimator_cls = eval(estimator_string)
+    estimator_cls = import_model(estimator_string)
     model_params['model_dir'] = save
     model_params.update(feature_columns)
 
diff --git a/python/runtime/tensorflow/explain_example.py b/python/runtime/tensorflow/explain_example.py
diff --git a/python/runtime/tensorflow/predict.py b/python/runtime/tensorflow/predict.py
diff --git a/python/runtime/tensorflow/train.py b/python/runtime/tensorflow/train.py
diff --git a/python/symbol_extractor.py b/python/symbol_extractor.py