Merge pull request #67 from APLA-Toolbox/add-data-analyst-docs

guilyx · web-flow · commit 5321d7876bde · 2020-12-24T17:15:42.000+01:00
Resolve issues 63 61 65
diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
@@ -1,6 +1,6 @@
 name: format
 on:
-  pull_request:
+  push:
     branches: [main]
 jobs:
   format:
@@ -25,4 +25,4 @@ jobs:
         uses: stefanzweifel/git-auto-commit-action@v4.8.0
         with:
           commit_message: Apply formatting changes
-          branch: ${{ github.head_ref }}
+          branch: main
diff --git a/README.md b/README.md
@@ -51,7 +51,9 @@ $ python3 -m pip install jupyddl
 # REFL Mode
 
 - Run `python3` in the terminal.
-- Use the AutomatedPlanner class to do what you want:
+
+## [AutomatedPlanner]
+
 ```python
 from jupyddl import AutomatedPlanner # takes some time because it has to instantiate the Julia interface
 apl = AutomatedPlanner("data/domain.pddl", "data/problem.pddl)
@@ -77,6 +79,32 @@ print(apl.get_actions_from_path(path))
 [<PyCall.jlwrap flip_row(r1)>, <PyCall.jlwrap flip_row(r3)>, <PyCall.jlwrap flip_column(c2)>]
 ```
 
+## [Data Analyst]
+
+Make sure you have a data folder where you run your environment that contains independent folders with "domain.pddl" and "problem.pddl" files, with those standard names.
+
+```python
+from jupyddl import DataAnalyst
+
+da = DataAnalyst()
+da.plot_astar_data() # plots complexity statistics for all the problem.pddl/domain.pddl couples in the data/ folder
+
+da.plot_astar_data(problem="data/flip/problem.pddl", domain="data/flip/domain.pddl") # scatter complexity statistics for the provided pddl
+
+da.plot_astar_data(heuristic_key="zero") # use h=0 instead of goal_count for your computation
+
+da.plot_dfs() # same as astar
+
+da.comparative_data_plot() # Run all planners on the data folder and plots them on the same figure, data is stored in a data.json file 
+
+da.comparative_data_plot(astar=False) # Exclude astar from the comparative plot
+
+da.comparative_data_plot(heuristic_key="zero") # use zero heuristic for h based planners
+
+da.comparative_data_plot(collect_new_data=False) # uses data.json to plot the data
+```
+
+
 # Contribute
 
 Open an issue to state clearly the contribution you want to make. Upon aproval send in a PR with the Issue referenced. (Implement Issue #No / Fix Issue #No).
diff --git a/jupyddl/automated_planner.py b/jupyddl/automated_planner.py
@@ -31,6 +31,15 @@ def __init__(self, domain_path, problem_path, log_level="DEBUG"):
         self.logger = logging.getLogger("automated_planning")
         coloredlogs.install(level=log_level)
 
+        # Running external Julia functions once to create the routes
+        self.__run_julia_once()
+
+    def __run_julia_once(self):
+        self.satisfies(self.problem.goal, self.initial_state)
+        self.state_has_term(self.initial_state, self.goals[0])
+        actions = self.available_actions(self.initial_state)
+        self.transition(self.initial_state, actions[0])
+
     def __init_logger(self, log_level):
         import os
 
diff --git a/jupyddl/data_analyst.py b/jupyddl/data_analyst.py
@@ -37,22 +37,27 @@ def __get_all_pddl_from_data(self):
         return [("data/problem.pddl", "data/domain.pddl")]
 
     def __plot_data(self, times, total_nodes, plot_title):
-        plt.plot(total_nodes, times, "b:o")
+        data = dict()
+        for i, val in enumerate(total_nodes):
+            data[val] = times[i]
+        nodes_sorted = sorted(list(data.keys()))
+        times_y = []
+        for node_opened in nodes_sorted:
+            times_y.append(data[node_opened])
+        plt.plot(nodes_sorted, times_y, "r:o")
         plt.xlabel("Number of opened nodes")
         plt.ylabel("Planning computation time")
+        plt.xscale('symlog')
         plt.title(plot_title)
-        plt.xscale("symlog")
-        plt.yscale("log")
         plt.grid(True)
         plt.show(block=False)
 
     def __scatter_data(self, times, total_nodes, plot_title):
         plt.scatter(total_nodes, times)
         plt.xlabel("Number of opened nodes")
         plt.ylabel("Planning computation time")
+        plt.xscale('symlog')
         plt.title(plot_title)
-        plt.xscale("symlog")
-        plt.yscale("log")
         plt.grid(True)
         plt.show(block=False)
 
@@ -61,41 +66,46 @@ def __gather_data_astar(
     ):
         has_multiple_files_tested = True
         if not domain_path or not problem_path:
-            has_multiple_files_tested = False
             metrics = dict()
             for problem, domain in self.__get_all_pddl_from_data():
                 logging.debug("Loading new PDDL instance planned with A*...")
                 logging.debug("Domain: " + domain)
                 logging.debug("Problem: " + problem)
                 apla = AutomatedPlanner(domain, problem)
                 if heuristic_key in apla.available_heuristics:
-                    _, total_time, opened_nodes = apla.astar_best_first_search(
+                    path, total_time, opened_nodes = apla.astar_best_first_search(
                         heuristic=apla.available_heuristics[heuristic_key]
                     )
                 else:
                     logging.critical(
                         "Heuristic is not implemented! (Key not found in registered heuristics dict)"
                     )
                     return [0], [0], has_multiple_files_tested
-                metrics[total_time] = opened_nodes
+                if path:
+                    metrics[total_time] = opened_nodes
+                else:
+                    metrics[0] = 0
 
             total_nodes = list(metrics.values())
             times = list(metrics.keys())
             return times, total_nodes, has_multiple_files_tested
+        has_multiple_files_tested = False
         logging.debug("Loading new PDDL instance...")
         logging.debug("Domain: " + domain_path)
         logging.debug("Problem: " + problem_path)
         apla = AutomatedPlanner(domain_path, problem_path)
         if heuristic_key in apla.available_heuristics:
-            _, total_time, opened_nodes = apla.astar_best_first_search(
+            path, total_time, opened_nodes = apla.astar_best_first_search(
                 heuristic=apla.available_heuristics[heuristic_key]
             )
         else:
             logging.critical(
                 "Heuristic is not implemented! (Key not found in registered heuristics dict)"
             )
             return [0], [0], has_multiple_files_tested
-        return [total_time], [opened_nodes], has_multiple_files_tested
+        if path:
+            return [total_time], [opened_nodes], has_multiple_files_tested
+        return [0], [0], has_multiple_files_tested
 
     def plot_astar_data(self, heuristic_key="goal_count", domain="", problem=""):
         if bool(not problem) != bool(not domain):
@@ -115,25 +125,30 @@ def plot_astar_data(self, heuristic_key="goal_count", domain="", problem=""):
     def __gather_data_bfs(self, domain_path="", problem_path=""):
         has_multiple_files_tested = True
         if not domain_path or not problem_path:
-            has_multiple_files_tested = False
             metrics = dict()
             for problem, domain in self.__get_all_pddl_from_data():
                 logging.debug("Loading new PDDL instance planned with BFS...")
                 logging.debug("Domain: " + domain)
                 logging.debug("Problem: " + problem)
                 apla = AutomatedPlanner(domain, problem)
-                _, total_time, opened_nodes = apla.breadth_first_search()
-                metrics[total_time] = opened_nodes
+                path, total_time, opened_nodes = apla.breadth_first_search()
+                if path:
+                    metrics[total_time] = opened_nodes
+                else:
+                    metrics[0] = 0
 
             total_nodes = list(metrics.values())
             times = list(metrics.keys())
             return times, total_nodes, has_multiple_files_tested
+        has_multiple_files_tested = False
         logging.debug("Loading new PDDL instance...")
         logging.debug("Domain: " + domain_path)
         logging.debug("Problem: " + problem_path)
         apla = AutomatedPlanner(domain_path, problem_path)
-        _, total_time, opened_nodes = apla.breadth_first_search()
-        return [total_time], [opened_nodes], has_multiple_files_tested
+        path, total_time, opened_nodes = apla.breadth_first_search()
+        if path:
+            return [total_time], [opened_nodes], has_multiple_files_tested
+        return [0], [0], has_multiple_files_tested
 
     def plot_bfs(self, domain="", problem=""):
         title = "BFS Statistics"
@@ -153,25 +168,30 @@ def plot_bfs(self, domain="", problem=""):
     def __gather_data_dfs(self, domain_path="", problem_path=""):
         has_multiple_files_tested = True
         if not domain_path or not problem_path:
-            has_multiple_files_tested = False
             metrics = dict()
             for problem, domain in self.__get_all_pddl_from_data():
                 logging.debug("Loading new PDDL instance planned with DFS...")
                 logging.debug("Domain: " + domain)
                 logging.debug("Problem: " + problem)
                 apla = AutomatedPlanner(domain, problem)
-                _, total_time, opened_nodes = apla.depth_first_search()
-                metrics[total_time] = opened_nodes
+                path, total_time, opened_nodes = apla.depth_first_search()
+                if path:
+                    metrics[total_time] = opened_nodes
+                else:
+                    metrics[0] = 0
 
             total_nodes = list(metrics.values())
             times = list(metrics.keys())
             return times, total_nodes, has_multiple_files_tested
+        has_multiple_files_tested = False
         logging.debug("Loading new PDDL instance...")
         logging.debug("Domain: " + domain_path)
         logging.debug("Problem: " + problem_path)
         apla = AutomatedPlanner(domain_path, problem_path)
-        _, total_time, opened_nodes = apla.depth_first_search()
-        return [total_time], [opened_nodes], has_multiple_files_tested
+        path, total_time, opened_nodes = apla.depth_first_search()
+        if path:
+            return [total_time], [opened_nodes], has_multiple_files_tested
+        return [0], [0], has_multiple_files_tested
 
     def plot_dfs(self, problem="", domain=""):
         title = "DFS Statistics"
@@ -191,25 +211,30 @@ def plot_dfs(self, problem="", domain=""):
     def __gather_data_dijkstra(self, domain_path="", problem_path=""):
         has_multiple_files_tested = True
         if not domain_path or not problem_path:
-            has_multiple_files_tested = False
             metrics = dict()
             for problem, domain in self.__get_all_pddl_from_data():
                 logging.debug("Loading new PDDL instance planned with Dijkstra...")
                 logging.debug("Domain: " + domain)
                 logging.debug("Problem: " + problem)
                 apla = AutomatedPlanner(domain, problem)
-                _, total_time, opened_nodes = apla.dijktra_best_first_search()
-                metrics[total_time] = opened_nodes
+                path, total_time, opened_nodes = apla.dijktra_best_first_search()
+                if path:
+                    metrics[total_time] = opened_nodes
+                else:
+                    metrics[0] = 0
 
             total_nodes = list(metrics.values())
             times = list(metrics.keys())
             return times, total_nodes, has_multiple_files_tested
+        has_multiple_files_tested = False
         logging.debug("Loading new PDDL instance...")
         logging.debug("Domain: " + domain_path)
         logging.debug("Problem: " + problem_path)
         apla = AutomatedPlanner(domain_path, problem_path)
-        _, total_time, opened_nodes = apla.dijktra_best_first_search()
-        return [total_time], [opened_nodes], has_multiple_files_tested
+        path, total_time, opened_nodes = apla.dijktra_best_first_search()
+        if path:
+            return [total_time], [opened_nodes], has_multiple_files_tested
+        return [0], [0], has_multiple_files_tested
 
     def plot_dijkstra(self, problem="", domain=""):
         title = "Dijkstra Statistics"