[refactor] Simplify and unify layer planners by centralizing inference plan creation logic in layerplanner package

orionpapadakis · orionpapadakis · commit 080fea4d4b6e · 2026-03-26T22:41:04.000+02:00
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/base/QuantizedLayerPlanner.java b/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/base/QuantizedLayerPlanner.java
@@ -4,21 +4,28 @@
 import org.beehive.gpullama3.inference.weights.Weights;
 import org.beehive.gpullama3.model.Configuration;
 import org.beehive.gpullama3.model.Model;
-import org.beehive.gpullama3.tornadovm.GenericLayerPlanner;
+import org.beehive.gpullama3.tornadovm.layerplanner.GenericLayerPlanner;
 import org.beehive.gpullama3.tornadovm.layerplanner.strategy.SchedulerDetectionService;
 import org.beehive.gpullama3.tornadovm.layerplanner.strategy.SchedulerType;
+import org.beehive.gpullama3.tornadovm.layers.AbstractFFNLayers;
+import org.beehive.gpullama3.tornadovm.layers.AbstractLogitsLayer;
+import org.beehive.gpullama3.tornadovm.layers.Activation;
+import uk.ac.manchester.tornado.api.GridScheduler;
+import uk.ac.manchester.tornado.api.ImmutableTaskGraph;
 import uk.ac.manchester.tornado.api.KernelContext;
 
+import java.util.ArrayList;
+import java.util.List;
+
 /**
  * Abstract base for all quantization-specific planners.
  *
- * Contains shared logic that works regardless of model type but depends on quantization. Subclasses: FP16LayerPlanner, Q8_0LayerPlanner, etc.
+ * Extracts common state from the model, detects the hardware scheduler type,
+ * and assembles the full execution plan via createTornadoInferencePlan().
+ * Subclasses (FP16LayerPlanner, Q8_0LayerPlanner) only provide quantization validation.
  */
-public abstract class QuantizedLayerPlanner<S extends State, C extends Configuration, W extends Weights> implements GenericLayerPlanner {
-
-    // Common state for all quantizations
-    protected static final int LOCAL_WORK_GROUP_SIZE_ALLOC = 32;
-    protected static final int THREAD_SCALE_FOR_LOGITS = 8;
+public abstract class QuantizedLayerPlanner<S extends State, C extends Configuration, W extends Weights>
+        implements GenericLayerPlanner {
 
     protected final S state;
     protected final C config;
@@ -27,9 +34,14 @@ public abstract class QuantizedLayerPlanner<S extends State, C extends Configura
     protected final Model model;
     protected final SchedulerType schedulerType;
 
-    /**
-     * Constructor: validate quantization type, extract model components
-     */
+    protected Activation activationLayer;
+    protected AbstractFFNLayers<W, C> ffnLayers;
+    protected AbstractLogitsLayer logitsLayer;
+
+    private List<ImmutableTaskGraph> immutableTaskGraphs;
+    private GridScheduler gridScheduler;
+
+    @SuppressWarnings("unchecked")
     protected QuantizedLayerPlanner(S state, Model model) {
         this.state = state;
         this.model = model;
@@ -40,26 +52,53 @@ protected QuantizedLayerPlanner(S state, Model model) {
         validateQuantizationType();
     }
 
-    /**
-     * Override in subclasses to validate correct quantization format. E.g., FP16LayerPlanner checks: weights instanceof FP16Weights
-     */
+    /** Validates that the model weights match the expected quantization type. */
     protected abstract void validateQuantizationType();
 
     /**
-     * Override in subclasses for model-specific initialization
+     * Creates the TornadoVM inference execution pipeline.
+     * It represents the entire Feed-Forward Network (FFN) and consists of:
+     * <ul>
+     *     <li>Activation layer</li>
+     *     <li>FFN layers (N transformer layers, model-specific)</li>
+     *     <li>Logits layer</li>
+     * </ul>
+     * <p>
+     * Each component is represented as an {@link ImmutableTaskGraph}, along with a
+     * corresponding {@link GridScheduler} configuration that defines how tasks are
+     * mapped on the GPU.
+     * </p>
+     * This method assembles all components into a unified execution pipeline and
+     * caches the resulting task graphs and scheduler for reuse across inference runs.
      */
-    protected abstract void initializeLayerComponents();
+    protected final void createTornadoInferencePlan() {
+        List<ImmutableTaskGraph> allTaskGraphs = new ArrayList<>();
+        GridScheduler masterScheduler = new GridScheduler();
+
+        // 1. Activation layer (common to all models)
+        allTaskGraphs.add(activationLayer.getImmutableTaskGraph());
+        activationLayer.updateGridScheduler(masterScheduler);
+
+        // 2. FFN layers (N transformer layers - model-specific)
+        allTaskGraphs.addAll(ffnLayers.getFFNLayerImmutableTaskGraphs());
+        ffnLayers.updateGridScheduler(masterScheduler);
+
+        // 3. Logits layer (common to all models)
+        allTaskGraphs.add(logitsLayer.getImmutableTaskGraph());
+        logitsLayer.updateGridScheduler(masterScheduler);
 
-    // Common helper methods for all quantizations
-    protected C getConfig() {
-        return config;
+        // Cache for future retrievals
+        this.immutableTaskGraphs = allTaskGraphs;
+        this.gridScheduler = masterScheduler;
     }
 
-    protected W getWeights() {
-        return weights;
+    @Override
+    public final List<ImmutableTaskGraph> getImmutableTaskGraphs() {
+        return this.immutableTaskGraphs;
     }
 
-    protected S getState() {
-        return state;
+    @Override
+    public final GridScheduler getGridScheduler() {
+        return this.gridScheduler;
     }
-}
+}
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/fp16/GraniteFP16LayerPlanner.java b/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/fp16/GraniteFP16LayerPlanner.java
@@ -10,17 +10,12 @@
 import org.beehive.gpullama3.tornadovm.layers.type.fp16.LogitsGraniteFP16Layer;
 
 public class GraniteFP16LayerPlanner extends FP16LayerPlanner<GraniteState, GraniteConfiguration, GraniteTornadoWeights> {
+
     public GraniteFP16LayerPlanner(GraniteState state, Model model) {
         super(state, model);
-        validateQuantizationType();
-        setupTornadoForwardPlan();
-    }
-
-    @Override
-    protected void initializeLayerComponents() {
-        this.activationLayer = new ActivationGranite("activationUpdate", this.state, this.weights, this.config);
-        this.ffnLayers = new GraniteFP16FFNLayers("graniteFFN", this.state, this.weights, this.config, this.schedulerType);
-        this.logitsLayer = new LogitsGraniteFP16Layer("graniteLogits", this.state, this.weights, this.config, ffnLayers.getLastFFNLayerTaskGraphID(), this.schedulerType);
+        this.activationLayer = new ActivationGranite("activationUpdate", state, weights, config);
+        this.ffnLayers = new GraniteFP16FFNLayers("graniteFFN", state, weights, config, schedulerType);
+        this.logitsLayer = new LogitsGraniteFP16Layer("logits", state, weights, config, ffnLayers.getLastFFNLayerTaskGraphID(), schedulerType);
+        createTornadoInferencePlan();
     }
-
 }
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/fp16/LlamaFP16LayerPlanner.java b/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/fp16/LlamaFP16LayerPlanner.java
@@ -13,15 +13,9 @@ public class LlamaFP16LayerPlanner extends FP16LayerPlanner<LlamaState, LlamaCon
 
     public LlamaFP16LayerPlanner(LlamaState state, Model model) {
         super(state, model);
-        validateQuantizationType();
-        setupTornadoForwardPlan();
+        this.activationLayer = new Activation("activationUpdate", state, weights, config);
+        this.ffnLayers = new LlamaFP16FFNLayers("llamaFFN", state, weights, config, schedulerType);
+        this.logitsLayer = new LogitsFP16Layer("logits", state, weights, config, ffnLayers.getLastFFNLayerTaskGraphID(), schedulerType);
+        createTornadoInferencePlan();
     }
-
-    @Override
-    protected void initializeLayerComponents() {
-        this.activationLayer = new Activation("activationUpdate", this.state, this.weights, this.config);
-        this.ffnLayers = new LlamaFP16FFNLayers("llamaFFN", this.state, this.weights, this.config, this.schedulerType);
-        this.logitsLayer = new LogitsFP16Layer("llamaLogits", this.state, this.weights, this.config, ffnLayers.getLastFFNLayerTaskGraphID(), this.schedulerType);
-    }
-
-}
+}
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/fp16/MistralFP16LayerPlanner.java b/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/fp16/MistralFP16LayerPlanner.java
@@ -16,6 +16,6 @@ public MistralFP16LayerPlanner(LlamaState state, Model model) {
         this.activationLayer = new Activation("activationUpdate", state, weights, config);
         this.ffnLayers = new MistralFP16FFNLayers("mistralFFN", state, weights, config, schedulerType);
         this.logitsLayer = new LogitsFP16Layer("logits", state, weights, config, ffnLayers.getLastFFNLayerTaskGraphID(), schedulerType);
-        buildForwardPlan();
+        createTornadoInferencePlan();
     }
 }
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/fp16/Phi3FP16LayerPlanner.java b/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/fp16/Phi3FP16LayerPlanner.java
@@ -20,15 +20,9 @@ public class Phi3FP16LayerPlanner extends FP16LayerPlanner<Phi3State, Phi3Config
 
     public Phi3FP16LayerPlanner(Phi3State state, Model model) {
         super(state, model);
-        validateQuantizationType();
-        setupTornadoForwardPlan();
+        this.activationLayer = new Activation("activationUpdate", state, weights, config);
+        this.ffnLayers = new Phi3FP16FFNLayers("phi3FFN", state, weights, config, schedulerType);
+        this.logitsLayer = new LogitsFP16Layer("logits", state, weights, config, ffnLayers.getLastFFNLayerTaskGraphID(), schedulerType);
+        createTornadoInferencePlan();
     }
-
-    @Override
-    protected void initializeLayerComponents() {
-        this.activationLayer = new Activation("activationUpdate", this.state, this.weights, this.config);
-        this.ffnLayers = new Phi3FP16FFNLayers("phi3FFN", this.state, this.weights, this.config, this.schedulerType);
-        this.logitsLayer = new LogitsFP16Layer("phi3Logits", this.state, this.weights, this.config, ffnLayers.getLastFFNLayerTaskGraphID(),this.schedulerType);
-    }
-
 }
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/fp16/Qwen2FP16LayerPlanner.java b/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/fp16/Qwen2FP16LayerPlanner.java
@@ -20,14 +20,9 @@ public class Qwen2FP16LayerPlanner extends FP16LayerPlanner<Qwen2State, Qwen2Con
 
     public Qwen2FP16LayerPlanner(Qwen2State state, Model model) {
         super(state, model);
-        validateQuantizationType();
-        setupTornadoForwardPlan();
-    }
-
-    @Override
-    protected void initializeLayerComponents() {
-        this.activationLayer = new Activation("activationUpdate", this.state, this.weights, this.config);
-        this.ffnLayers = new Qwen2FP16FFNLayers("qwen2FFN", this.state, this.weights, this.config, this.schedulerType);
-        this.logitsLayer = new LogitsFP16Layer("qwen2Logits", this.state, this.weights, this.config, ffnLayers.getLastFFNLayerTaskGraphID(), this.schedulerType);
+        this.activationLayer = new Activation("activationUpdate", state, weights, config);
+        this.ffnLayers = new Qwen2FP16FFNLayers("qwen2FFN", state, weights, config, schedulerType);
+        this.logitsLayer = new LogitsFP16Layer("logits", state, weights, config, ffnLayers.getLastFFNLayerTaskGraphID(), schedulerType);
+        createTornadoInferencePlan();
     }
 }
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/fp16/Qwen3FP16LayerPlanner.java b/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/fp16/Qwen3FP16LayerPlanner.java
@@ -20,15 +20,9 @@ public class Qwen3FP16LayerPlanner extends FP16LayerPlanner<Qwen3State, Qwen3Con
 
     public Qwen3FP16LayerPlanner(Qwen3State state, Model model) {
         super(state, model);
-        validateQuantizationType();
-        setupTornadoForwardPlan();
+        this.activationLayer = new Activation("activationUpdate", state, weights, config);
+        this.ffnLayers = new Qwen3FP16FFNLayers("qwen3FFN", state, weights, config, schedulerType);
+        this.logitsLayer = new LogitsFP16Layer("logits", state, weights, config, ffnLayers.getLastFFNLayerTaskGraphID(), schedulerType);
+        createTornadoInferencePlan();
     }
-
-    @Override
-    protected void initializeLayerComponents() {
-        this.activationLayer = new Activation("activationUpdate", this.state, this.weights, this.config);
-        this.ffnLayers = new Qwen3FP16FFNLayers("qwen3FFN", this.state, this.weights, this.config, this.schedulerType);
-        this.logitsLayer = new LogitsFP16Layer("qwen3Logits", this.state, this.weights, this.config, ffnLayers.getLastFFNLayerTaskGraphID(), this.schedulerType);
-    }
-
-}
+}
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/q8_0/GraniteQ8_0LayerPlanner.java b/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/q8_0/GraniteQ8_0LayerPlanner.java
@@ -13,14 +13,9 @@ public class GraniteQ8_0LayerPlanner extends Q8_0LayerPlanner<GraniteState, Gran
 
     public GraniteQ8_0LayerPlanner(GraniteState state, Model model) {
         super(state, model);
-        validateQuantizationType();
-        setupTornadoForwardPlan();
-    }
-
-    @Override
-    protected void initializeLayerComponents() {
-        this.activationLayer = new ActivationGranite("activationUpdate", this.state, this.weights, this.config);
-        this.ffnLayers = new GraniteQ8_0FFNLayers("graniteFFN", this.state, this.weights, this.config, this.schedulerType);
-        this.logitsLayer = new LogitsGraniteQ8_0Layer("graniteLogits", this.state, this.weights, this.config, ffnLayers.getLastFFNLayerTaskGraphID(), this.schedulerType);
+        this.activationLayer = new ActivationGranite("activationUpdate", state, weights, config);
+        this.ffnLayers = new GraniteQ8_0FFNLayers("graniteFFN", state, weights, config, schedulerType);
+        this.logitsLayer = new LogitsGraniteQ8_0Layer("logits", state, weights, config, ffnLayers.getLastFFNLayerTaskGraphID(), schedulerType);
+        createTornadoInferencePlan();
     }
 }
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/q8_0/LlamaQ8_0LayerPlanner.java b/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/q8_0/LlamaQ8_0LayerPlanner.java
@@ -13,15 +13,9 @@ public class LlamaQ8_0LayerPlanner extends Q8_0LayerPlanner<LlamaState, LlamaCon
 
     public LlamaQ8_0LayerPlanner(LlamaState state, Model model) {
         super(state, model);
-        validateQuantizationType();
-        setupTornadoForwardPlan();
+        this.activationLayer = new Activation("activationUpdate", state, weights, config);
+        this.ffnLayers = new LlamaQ8_0FFNLayers("llamaFFN", state, weights, config, schedulerType);
+        this.logitsLayer = new LogitsQ8_0Layer("logits", state, weights, config, ffnLayers.getLastFFNLayerTaskGraphID(), schedulerType);
+        createTornadoInferencePlan();
     }
-
-    @Override
-    protected void initializeLayerComponents() {
-        this.activationLayer = new Activation("activationUpdate", this.state, this.weights, this.config);
-        this.ffnLayers = new LlamaQ8_0FFNLayers("llamaFFN", this.state, this.weights, this.config, this.schedulerType);
-        this.logitsLayer = new LogitsQ8_0Layer("llamaLogits", this.state, this.weights, this.config, ffnLayers.getLastFFNLayerTaskGraphID(), this.schedulerType);
-    }
-
-}
+}
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/q8_0/MistralQ8_0LayerPlanner.java b/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/q8_0/MistralQ8_0LayerPlanner.java
@@ -16,6 +16,6 @@ public MistralQ8_0LayerPlanner(LlamaState state, Model model) {
         this.activationLayer = new Activation("activationUpdate", state, weights, config);
         this.ffnLayers = new MistralQ8_0FFNLayers("mistralFFN", state, weights, config, schedulerType);
         this.logitsLayer = new LogitsQ8_0Layer("logits", state, weights, config, ffnLayers.getLastFFNLayerTaskGraphID(), schedulerType);
-        buildForwardPlan();
+        createTornadoInferencePlan();
     }
 }
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/q8_0/Phi3Q8_0LayerPlanner.java b/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/q8_0/Phi3Q8_0LayerPlanner.java
@@ -21,15 +21,9 @@ public class Phi3Q8_0LayerPlanner extends Q8_0LayerPlanner<Phi3State, Phi3Config
 
     public Phi3Q8_0LayerPlanner(Phi3State state, Model model) {
         super(state, model);
-        validateQuantizationType();
-        setupTornadoForwardPlan();
+        this.activationLayer = new Activation("activationUpdate", state, weights, config);
+        this.ffnLayers = new Phi3Q8_0FFNLayers("phi3FFN", state, weights, config, schedulerType);
+        this.logitsLayer = new LogitsQ8_0Layer("logits", state, weights, config, ffnLayers.getLastFFNLayerTaskGraphID(), schedulerType);
+        createTornadoInferencePlan();
     }
-
-    @Override
-    protected void initializeLayerComponents() {
-        this.activationLayer = new Activation("activationUpdate", this.state, this.weights, this.config);
-        this.ffnLayers = new Phi3Q8_0FFNLayers("phi3FFN", this.state, this.weights, this.config, this.schedulerType);
-        this.logitsLayer = new LogitsQ8_0Layer("phi3Logits", this.state, this.weights, this.config, ffnLayers.getLastFFNLayerTaskGraphID(), this.schedulerType);
-    }
-
 }
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/q8_0/Qwen2Q8_0LayerPlanner.java b/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/q8_0/Qwen2Q8_0LayerPlanner.java
@@ -21,15 +21,9 @@ public class Qwen2Q8_0LayerPlanner extends Q8_0LayerPlanner<Qwen2State, Qwen2Con
 
     public Qwen2Q8_0LayerPlanner(Qwen2State state, Model model) {
         super(state, model);
-        validateQuantizationType();
-        setupTornadoForwardPlan();
+        this.activationLayer = new Activation("activationUpdate", state, weights, config);
+        this.ffnLayers = new Qwen2Q8_0FFNLayers("qwen2FFN", state, weights, config, schedulerType);
+        this.logitsLayer = new LogitsQ8_0Layer("logits", state, weights, config, ffnLayers.getLastFFNLayerTaskGraphID(), schedulerType);
+        createTornadoInferencePlan();
     }
-
-    @Override
-    protected void initializeLayerComponents() {
-        this.activationLayer = new Activation("activationUpdate", this.state, this.weights, this.config);
-        this.ffnLayers = new Qwen2Q8_0FFNLayers("qwen2FFN", this.state, this.weights, this.config, this.schedulerType);
-        this.logitsLayer = new LogitsQ8_0Layer("qwen2Logits", this.state, this.weights, this.config, ffnLayers.getLastFFNLayerTaskGraphID(), this.schedulerType);
-    }
-
 }
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/q8_0/Qwen3Q8_0LayerPlanner.java b/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/model/q8_0/Qwen3Q8_0LayerPlanner.java
@@ -21,14 +21,9 @@ public class Qwen3Q8_0LayerPlanner extends Q8_0LayerPlanner<Qwen3State, Qwen3Con
 
     public Qwen3Q8_0LayerPlanner(Qwen3State state, Model model) {
         super(state, model);
-        validateQuantizationType();
-        setupTornadoForwardPlan();
+        this.activationLayer = new Activation("activationUpdate", state, weights, config);
+        this.ffnLayers = new Qwen3Q8_0FFNLayers("qwen3FFN", state, weights, config, schedulerType);
+        this.logitsLayer = new LogitsQ8_0Layer("logits", state, weights, config, ffnLayers.getLastFFNLayerTaskGraphID(), schedulerType);
+        createTornadoInferencePlan();
     }
-
-    @Override
-    protected void initializeLayerComponents() {
-        this.activationLayer = new Activation("activationUpdate", this.state, this.weights, this.config);
-        this.ffnLayers = new Qwen3Q8_0FFNLayers("qwen3FFN", this.state, this.weights, this.config, this.schedulerType);
-        this.logitsLayer = new LogitsQ8_0Layer("qwen3Logits", this.state, this.weights, this.config, ffnLayers.getLastFFNLayerTaskGraphID(),this.schedulerType);
-    }
-}
+}
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/quantization/FP16LayerPlanner.java b/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/quantization/FP16LayerPlanner.java
diff --git a/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/quantization/Q8_0LayerPlanner.java b/src/main/java/org/beehive/gpullama3/tornadovm/layerplanner/quantization/Q8_0LayerPlanner.java

Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,6 @@ public MistralFP16LayerPlanner(LlamaState state, Model model) {`
`16`	`16`	`this.activationLayer = new Activation("activationUpdate", state, weights, config);`
`17`	`17`	`this.ffnLayers = new MistralFP16FFNLayers("mistralFFN", state, weights, config, schedulerType);`
`18`	`18`	`this.logitsLayer = new LogitsFP16Layer("logits", state, weights, config, ffnLayers.getLastFFNLayerTaskGraphID(), schedulerType);`
`19`		`- buildForwardPlan();`
	`19`	`+ createTornadoInferencePlan();`
`20`	`20`	`}`
`21`	`21`	`}`
Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,6 @@ public MistralQ8_0LayerPlanner(LlamaState state, Model model) {`
`16`	`16`	`this.activationLayer = new Activation("activationUpdate", state, weights, config);`
`17`	`17`	`this.ffnLayers = new MistralQ8_0FFNLayers("mistralFFN", state, weights, config, schedulerType);`
`18`	`18`	`this.logitsLayer = new LogitsQ8_0Layer("logits", state, weights, config, ffnLayers.getLastFFNLayerTaskGraphID(), schedulerType);`
`19`		`- buildForwardPlan();`
	`19`	`+ createTornadoInferencePlan();`
`20`	`20`	`}`
`21`	`21`	`}`