Merge pull request #24 from CodeWithKyrian/modify-inference-session-tensor

CodeWithKyrian · web-flow · commit 2200fa3a8a76 · 2024-04-23T08:49:30.000+01:00
Custom inference session for improved ONNX model handling
diff --git a/composer.json b/composer.json
@@ -21,7 +21,8 @@
     "codewithkyrian/onnxruntime-downloader-plugin": "^1.1",
     "symfony/console": "^6.4|^7.0",
     "imagine/imagine": "^1.3",
-    "rokka/imagine-vips": "^0.31.0"
+    "rokka/imagine-vips": "^0.31.0",
+    "spatie/fork": "^1.2"
   },
   "require-dev": {
     "pestphp/pest": "^2.31",
diff --git a/examples/composer.json b/examples/composer.json
@@ -1,29 +1,34 @@
 {
-    "name": "kyrian/examples",
-    "autoload": {
-        "psr-4": {
-            "Kyrian\\Examples\\": "/"
-        }
-    },
-    "authors": [
-        {
-            "name": "Kyrian Obikwelu",
-            "email": "koshnawaza@gmail.com"
-        }
-    ],
-    "require": {
-        "php": "^8.1",
-        "symfony/console": "^7.0",
-        "codewithkyrian/transformers": "dev-change-init-process"
-    },
-    "minimum-stability": "dev",
-    "require-dev": {
-        "symfony/var-dumper": "^7.0"
-    },
-    "repositories": [
-        {
-            "type" : "path",
-            "url": "../"
-        }
-    ]
+  "name": "kyrian/examples",
+  "autoload": {
+    "psr-4": {
+      "Kyrian\\Examples\\": "/"
+    }
+  },
+  "authors": [
+    {
+      "name": "Kyrian Obikwelu",
+      "email": "koshnawaza@gmail.com"
+    }
+  ],
+  "require": {
+    "php": "^8.1",
+    "symfony/console": "^7.0",
+    "codewithkyrian/transformers": "dev-main"
+  },
+  "minimum-stability": "dev",
+  "require-dev": {
+    "symfony/var-dumper": "^7.0"
+  },
+  "repositories": [
+    {
+      "type": "path",
+      "url": "../"
+    }
+  ],
+  "config": {
+    "allow-plugins": {
+      "codewithkyrian/onnxruntime-downloader-plugin": true
+    }
+  }
 }
diff --git a/examples/pipelines/text-generation.php b/examples/pipelines/text-generation.php
@@ -13,12 +13,12 @@
 //
 //$generator = pipeline('text-generation', 'Xenova/gpt2');
 $generator = pipeline('text-generation', 'Xenova/Qwen1.5-0.5B-Chat');
-
+//
 $streamer = StdOutStreamer::make();
 
 $messages = [
     ['role' => 'system', 'content' => 'You are a helpful assistant.'],
-    ['role' => 'user', 'content' => 'What is the product of 5 and 4'],
+    ['role' => 'user', 'content' => 'What is diffusion?'],
 ];
 
 $input = $generator->tokenizer->applyChatTemplate($messages, addGenerationPrompt: true, tokenize: false);
diff --git a/src/Models/ModelArchitecture.php b/src/Models/ModelArchitecture.php
@@ -119,8 +119,6 @@ protected function decoderRunBeam(PretrainedModel $model, array &$beam): array
             'past_key_values' => $beam['prev_model_outputs']['past_key_values'] ?? null,
         ];
 
-
-        // 2. Run
         $output = $model->forward($modelInputs);
 
         // 3. Update
@@ -222,14 +220,6 @@ protected function decoderForward(PretrainedModel $model, array $modelInputs): a
         $model->preparePositionIds($inputNames, $decoderFeeds, $useCacheBranch);
         $model->addPastKeyValues($decoderFeeds, $pastKeyValues);
 
-        // The initial past key values should have a shape of 0 in one of the dimensions, which
-        // is the sequence length. However, I haven't found a way to pass a tensor with a shape of 0
-        // to the model, so I'm using a sequence length of 1 instead for the first step, and then
-        // offsetting the sequence length by 1 for the subsequent steps. This is a workaround for now.
-        $prevSequenceLength = $decoderFeeds['past_key_values.0.key']->shape()[2];
-        $attnMaskLength = $prevSequenceLength == 1 ? 1 : $prevSequenceLength + 1;
-        $decoderFeeds['attention_mask'] = Tensor::ones([1, $attnMaskLength], dtype: NDArray::int64);
-
         $decoderResults = $model->runSession($model->session, $decoderFeeds);
 
         $logits = $decoderResults['logits'];
@@ -372,7 +362,6 @@ protected function seq2seqForward(PretrainedModel $model, array $modelInputs): a
         $model->addPastKeyValues($decoderFeeds, $pastKeyValues);
 
         $decoderResults = $model->runSession($model->decoderMergedSession, $decoderFeeds);
-
         $logits = $decoderResults['logits'];
         $pastKeyValues = $model->getPastKeyValues($decoderResults, $pastKeyValues);
 
diff --git a/src/Models/Pretrained/BartForConditionalGeneration.php b/src/Models/Pretrained/BartForConditionalGeneration.php
@@ -8,7 +8,7 @@
 use Codewithkyrian\Transformers\Models\ModelArchitecture;
 use Codewithkyrian\Transformers\Utils\AutoConfig;
 use Codewithkyrian\Transformers\Utils\GenerationConfig;
-use OnnxRuntime\InferenceSession;
+use Codewithkyrian\Transformers\Utils\InferenceSession;
 
 /**
  * The BART Model with a language modeling head. Can be used for summarization.
diff --git a/src/Models/Pretrained/GPT2PretrainedModel.php b/src/Models/Pretrained/GPT2PretrainedModel.php
@@ -8,7 +8,7 @@
 use Codewithkyrian\Transformers\Models\ModelArchitecture;
 use Codewithkyrian\Transformers\Utils\AutoConfig;
 use Codewithkyrian\Transformers\Utils\GenerationConfig;
-use OnnxRuntime\InferenceSession;
+use Codewithkyrian\Transformers\Utils\InferenceSession;
 
 class GPT2PretrainedModel extends PretrainedModel
 {
diff --git a/src/Models/Pretrained/M2M100ForConditionalGeneration.php b/src/Models/Pretrained/M2M100ForConditionalGeneration.php
@@ -8,7 +8,7 @@
 use Codewithkyrian\Transformers\Models\ModelArchitecture;
 use Codewithkyrian\Transformers\Utils\AutoConfig;
 use Codewithkyrian\Transformers\Utils\GenerationConfig;
-use OnnxRuntime\InferenceSession;
+use Codewithkyrian\Transformers\Utils\InferenceSession;
 
 class M2M100ForConditionalGeneration extends M2M100PretrainedModel
 {
diff --git a/src/Models/Pretrained/PretrainedModel.php b/src/Models/Pretrained/PretrainedModel.php
@@ -27,10 +27,10 @@
 use Codewithkyrian\Transformers\Utils\AutoConfig;
 use Codewithkyrian\Transformers\Utils\GenerationConfig;
 use Codewithkyrian\Transformers\Utils\Hub;
+use Codewithkyrian\Transformers\Utils\InferenceSession;
 use Codewithkyrian\Transformers\Utils\Tensor;
 use Error;
 use Exception;
-use OnnxRuntime\InferenceSession;
 use Symfony\Component\Console\Output\OutputInterface;
 use function Codewithkyrian\Transformers\Utils\array_some;
 
@@ -281,9 +281,7 @@ public function runSession(InferenceSession $session, array $inputs): array
 
             $outputNames = array_column($session->outputs(), 'name');
 
-            $outputs = $session->run($outputNames, $inputs);
-
-            return array_combine($outputNames, array_map([Tensor::class, 'fromArray'], $outputs));
+            return $session->run($outputNames, $inputs);
         } catch (MissingModelInputException $e) {
             throw $e;
         } catch (Exception $e) {
@@ -331,7 +329,8 @@ public function validateInputs(array $inputNames, array $inputs): array
             The following inputs will be ignored: "' . implode(', ', $ignored) . '".';
         }
 
-        return array_map(fn($i) => $i->toArray(), $inputs);
+//        return array_map(fn($i) => $i->toArray(), $inputs);
+        return $inputs;
     }
 
     /**
@@ -468,50 +467,50 @@ public function addPastKeyValues(array &$decoderFeeds, ?array $pastKeyValues): v
             $decoderFeeds = array_merge($decoderFeeds, $pastKeyValues);
         } else {
             // TODO support batches (i.e., batch_size > 1)
-            $batch_size = 1;
+            $batchSize = 1;
 
             if ($this->config->isEncoderDecoder && ($this->addEncoderPkv ?? true)) {
-                $encoderShape = [$batch_size, $this->numEncoderHeads, 1, $this->encoderDimKv];
-                $decoderShape = [$batch_size, $this->numDecoderHeads, 1, $this->decoderDimKv];
+                $encoderShape = [$batchSize, $this->numEncoderHeads, 0, $this->encoderDimKv];
+                $decoderShape = [$batchSize, $this->numDecoderHeads, 0, $this->decoderDimKv];
 
 
                 for ($i = 0; $i < $this->numDecoderLayers; ++$i) {
                     $decoderFeeds["past_key_values.$i.encoder.key"]
                         = $decoderFeeds["past_key_values.$i.encoder.value"]
-                        = new Tensor(null, shape: $encoderShape);
+                        = new Tensor([], shape: $encoderShape);
                     $decoderFeeds["past_key_values.$i.decoder.key"]
                         = $decoderFeeds["past_key_values.$i.decoder.value"]
-                        = new Tensor(null, shape: $decoderShape);
+                        = new Tensor([], shape: $decoderShape);
                 }
             } else if ($this->config->modelType === 'falcon') {
                 // NOTE: Custom implementation for Falcon
-                $shape = [$batch_size * $this->numHeads, 1, $this->dimKv];
+                $shape = [$batchSize * $this->numHeads, 0, $this->dimKv];
 
                 for ($i = 0; $i < $this->numLayers; ++$i) {
-                    $decoderFeeds["past_key_values.$i.key"] = new Tensor(null, shape: $shape);
-                    $decoderFeeds["past_key_values.$i.value"] = new Tensor(null, shape: $shape);
+                    $decoderFeeds["past_key_values.$i.key"] = new Tensor([], shape: $shape);
+                    $decoderFeeds["past_key_values.$i.value"] = new Tensor([], shape: $shape);
                 }
             } else if ($this->config['multi_query'] ?? null) { // e.g., for `gpt_bigcode`
-                $shape = [$batch_size * $this->numHeads, 1, 2 * $this->dimKv];
+                $shape = [$batchSize * $this->numHeads, 0, 2 * $this->dimKv];
 
                 for ($i = 0; $i < $this->numLayers; ++$i) {
-                    $decoderFeeds["past_key_values.$i.key_value"] = new Tensor(null, shape: $shape);
+                    $decoderFeeds["past_key_values.$i.key_value"] = new Tensor([], shape: $shape);
                 }
             } else if ($this->config['model_type'] === 'bloom') {
                 // NOTE: Custom implementation for Bloom
-                $keyShape = [$batch_size * $this->numHeads, $this->dimKv, 1];
-                $valueShape = [$batch_size * $this->numHeads, 1, $this->dimKv];
+                $keyShape = [$batchSize * $this->numHeads, $this->dimKv, 0];
+                $valueShape = [$batchSize * $this->numHeads, 0, $this->dimKv];
 
                 for ($i = 0; $i < $this->numLayers; ++$i) {
-                    $decoderFeeds["past_key_values.$i.key"] = new Tensor(null, shape: $keyShape);
-                    $decoderFeeds["past_key_values.$i.value"] = new Tensor(null, shape: $valueShape);
+                    $decoderFeeds["past_key_values.$i.key"] = new Tensor([], shape: $keyShape);
+                    $decoderFeeds["past_key_values.$i.value"] = new Tensor([], shape: $valueShape);
                 }
             } else { // Decoder-only
-                $shape = [$batch_size, $this->numHeads, 1, $this->dimKv];
+                $shape = [$batchSize, $this->numHeads, 0, $this->dimKv];
 
                 for ($i = 0; $i < $this->numLayers; ++$i) {
-                    $decoderFeeds["past_key_values.$i.key"] = new Tensor(null, shape: $shape);
-                    $decoderFeeds["past_key_values.$i.value"] = new Tensor(null, shape: $shape);
+                    $decoderFeeds["past_key_values.$i.key"] = new Tensor([], shape: $shape);
+                    $decoderFeeds["past_key_values.$i.value"] = new Tensor([], shape: $shape);
                 }
             }
         }
@@ -521,8 +520,10 @@ public function addPastKeyValues(array &$decoderFeeds, ?array $pastKeyValues): v
      * @param Tensor $inputs The input token ids.
      * @param GenerationConfig|null $generationConfig The generation configuration to use. If null, default configuration will be used.
      * @param LogitsProcessorList|null $logitsProcessor An optional logits processor to use. If null, a new LogitsProcessorList instance will be created.
-     * @param array|null $inputsAttentionMask An optional attention mask for the inputs.
+     * @param Tensor|null $inputsAttentionMask An optional attention mask for the inputs.
+     * @param Streamer|null $streamer
      * @return array An array of generated output sequences, where each sequence is an array of token IDs.
+     * @throws Exception
      */
     public function generate(
         Tensor               $inputs,
@@ -609,6 +610,7 @@ public function generate(
 
                 $output = $this->runBeam($beam);
 
+
                 // add attentions/scores to beam only if user requested
                 if ($generationConfig->output_attentions) {
                     $this->addAttentionsToBeam($beam, $output);
@@ -626,6 +628,7 @@ public function generate(
                 $logits = $output['logits']->slice(null, -1, null);
 //                $logits = $output['logits'];
 
+
                 // Apply logits processor
                 $logitsProcessor($beam['output_token_ids'], $logits);
 
@@ -649,7 +652,6 @@ public function generate(
 
             }
 
-
             ++$numOutputTokens;
 
             // Group and select best beams
@@ -665,15 +667,13 @@ function ($group) use ($generationConfig) {
                 $this->groupBeams($newestBeams)
             ));
 
-
             // Flatten beams
             $beams = $newestBeams;
 
             // Stream the beams if a streamer is provided
             $streamer?->put($beams);
         }
 
-
         // TODO: Ensure that we can return non-batched outputs
 
         $groupedBeams = $this->groupBeams($beams);
diff --git a/src/Models/Pretrained/Qwen2PreTrainedModel.php b/src/Models/Pretrained/Qwen2PreTrainedModel.php
@@ -8,7 +8,7 @@
 use Codewithkyrian\Transformers\Models\ModelArchitecture;
 use Codewithkyrian\Transformers\Utils\AutoConfig;
 use Codewithkyrian\Transformers\Utils\GenerationConfig;
-use OnnxRuntime\InferenceSession;
+use Codewithkyrian\Transformers\Utils\InferenceSession;
 
 /**
  * The bare Qwen2 Model outputting raw hidden-states without any specific head on top.
@@ -32,7 +32,7 @@ public function __construct(
         $this->config['pad_token_id'] = $this->config['eos_token_id'];
         $this->config->padTokenId = $this->config['eos_token_id'];
 
-        $this->numHeads = $this->config['num_key_value_heads'] ??  $this->config['num_attention_heads'];
+        $this->numHeads = $this->config['num_key_value_heads'] ?? $this->config['num_attention_heads'];
         $this->numLayers = $this->config['num_hidden_layers'];
         $this->dimKv = $this->config['hidden_size'] / $this->config['num_attention_heads'];
     }
diff --git a/src/Models/Pretrained/T5ForConditionalGeneration.php b/src/Models/Pretrained/T5ForConditionalGeneration.php
@@ -8,7 +8,7 @@
 use Codewithkyrian\Transformers\Models\ModelArchitecture;
 use Codewithkyrian\Transformers\Utils\AutoConfig;
 use Codewithkyrian\Transformers\Utils\GenerationConfig;
-use OnnxRuntime\InferenceSession;
+use Codewithkyrian\Transformers\Utils\InferenceSession;
 
 /**
  * T5Model is a class representing a T5 model for conditional generation.
diff --git a/src/Models/Pretrained/TrOCRPretrainedModel.php b/src/Models/Pretrained/TrOCRPretrainedModel.php
@@ -8,7 +8,7 @@
 use Codewithkyrian\Transformers\Models\ModelArchitecture;
 use Codewithkyrian\Transformers\Utils\AutoConfig;
 use Codewithkyrian\Transformers\Utils\GenerationConfig;
-use OnnxRuntime\InferenceSession;
+use Codewithkyrian\Transformers\Utils\InferenceSession;
 
 class TrOCRPretrainedModel extends PretrainedModel
 {
@@ -29,8 +29,8 @@ public function __construct(
         parent::__construct($config, $session, $modelArchitecture);
 
 
-        $this->numEncoderLayers =  $this->numDecoderLayers = $this->config['decoder_layers'];
-        $this->numEncoderHeads =  $this->numDecoderHeads = $this->config['decoder_attention_heads'];
-        $this->encoderDimKv =  $this->decoderDimKv = $this->config['d_model'] / $this->numDecoderHeads;
+        $this->numEncoderLayers = $this->numDecoderLayers = $this->config['decoder_layers'];
+        $this->numEncoderHeads = $this->numDecoderHeads = $this->config['decoder_attention_heads'];
+        $this->encoderDimKv = $this->decoderDimKv = $this->config['d_model'] / $this->numDecoderHeads;
     }
 }
diff --git a/src/Models/Pretrained/VisionEncoderDecoderModel.php b/src/Models/Pretrained/VisionEncoderDecoderModel.php
@@ -11,7 +11,7 @@
 use Codewithkyrian\Transformers\Models\ModelArchitecture;
 use Codewithkyrian\Transformers\Utils\AutoConfig;
 use Codewithkyrian\Transformers\Utils\GenerationConfig;
-use OnnxRuntime\InferenceSession;
+use Codewithkyrian\Transformers\Utils\InferenceSession;
 
 /**
  * Vision Encoder-Decoder model based on OpenAI's GPT architecture for image captioning and other vision tasks
diff --git a/src/Utils/Helpers.php b/src/Utils/Helpers.php
@@ -21,7 +21,7 @@ function memoryPeak(): string
 }
 
 
-function timeUsage(bool $milliseconds = false, bool $sinceLastCall = true): string
+function timeUsage(bool $milliseconds = false, bool $sinceLastCall = true, bool $returnString = true): string|float
 {
     static $lastCallTime = 0;
 
@@ -35,7 +35,8 @@ function timeUsage(bool $milliseconds = false, bool $sinceLastCall = true): stri
 
     $timeDiff = $milliseconds ? $timeDiff * 1000 : $timeDiff;
 
-    return @round($timeDiff, 4) . ($milliseconds ? ' ms' : ' s');
+//    return @round($timeDiff, 4) . ($milliseconds ? ' ms' : ' s');
+    return $returnString ? @round($timeDiff, 4) . ($milliseconds ? ' ms' : ' s') : @round($timeDiff, 4);
 }
 
 function array_some(array $array, callable $callback): bool
diff --git a/src/Utils/InferenceSession.php b/src/Utils/InferenceSession.php
diff --git a/src/Utils/Tensor.php b/src/Utils/Tensor.php

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`use Codewithkyrian\Transformers\Models\ModelArchitecture;`
`9`	`9`	`use Codewithkyrian\Transformers\Utils\AutoConfig;`
`10`	`10`	`use Codewithkyrian\Transformers\Utils\GenerationConfig;`
`11`		`-use OnnxRuntime\InferenceSession;`
	`11`	`+use Codewithkyrian\Transformers\Utils\InferenceSession;`
`12`	`12`
`13`	`13`	`class GPT2PretrainedModel extends PretrainedModel`
`14`	`14`	`{`