Use a custom Inference session class to make Tensor a first class output

CodeWithKyrian · CodeWithKyrian · commit e7f55a4cbd8b · 2024-04-22T17:37:45.000+01:00
diff --git a/composer.json b/composer.json
@@ -21,7 +21,8 @@
     "codewithkyrian/onnxruntime-downloader-plugin": "^1.1",
     "symfony/console": "^6.4|^7.0",
     "imagine/imagine": "^1.3",
-    "rokka/imagine-vips": "^0.31.0"
+    "rokka/imagine-vips": "^0.31.0",
+    "spatie/fork": "^1.2"
   },
   "require-dev": {
     "pestphp/pest": "^2.31",
diff --git a/examples/composer.json b/examples/composer.json
@@ -1,29 +1,34 @@
 {
-    "name": "kyrian/examples",
-    "autoload": {
-        "psr-4": {
-            "Kyrian\\Examples\\": "/"
-        }
-    },
-    "authors": [
-        {
-            "name": "Kyrian Obikwelu",
-            "email": "koshnawaza@gmail.com"
-        }
-    ],
-    "require": {
-        "php": "^8.1",
-        "symfony/console": "^7.0",
-        "codewithkyrian/transformers": "dev-change-init-process"
-    },
-    "minimum-stability": "dev",
-    "require-dev": {
-        "symfony/var-dumper": "^7.0"
-    },
-    "repositories": [
-        {
-            "type" : "path",
-            "url": "../"
-        }
-    ]
+  "name": "kyrian/examples",
+  "autoload": {
+    "psr-4": {
+      "Kyrian\\Examples\\": "/"
+    }
+  },
+  "authors": [
+    {
+      "name": "Kyrian Obikwelu",
+      "email": "koshnawaza@gmail.com"
+    }
+  ],
+  "require": {
+    "php": "^8.1",
+    "symfony/console": "^7.0",
+    "codewithkyrian/transformers": "dev-main"
+  },
+  "minimum-stability": "dev",
+  "require-dev": {
+    "symfony/var-dumper": "^7.0"
+  },
+  "repositories": [
+    {
+      "type": "path",
+      "url": "../"
+    }
+  ],
+  "config": {
+    "allow-plugins": {
+      "codewithkyrian/onnxruntime-downloader-plugin": true
+    }
+  }
 }
diff --git a/examples/pipelines/text-generation.php b/examples/pipelines/text-generation.php
@@ -18,13 +18,13 @@
 
 $messages = [
     ['role' => 'system', 'content' => 'You are a helpful assistant.'],
-    ['role' => 'user', 'content' => 'What is the product of 5 and 4'],
+    ['role' => 'user', 'content' => 'What is diffusion?'],
 ];
 
 $input = $generator->tokenizer->applyChatTemplate($messages, addGenerationPrompt: true, tokenize: false);
 
 $output = $generator($input,
-    streamer: $streamer,
+//    streamer: $streamer,
     maxNewTokens: 128,
     doSample: true,
     returnFullText: false,
diff --git a/src/Models/ModelArchitecture.php b/src/Models/ModelArchitecture.php
@@ -119,8 +119,6 @@ protected function decoderRunBeam(PretrainedModel $model, array &$beam): array
             'past_key_values' => $beam['prev_model_outputs']['past_key_values'] ?? null,
         ];
 
-
-        // 2. Run
         $output = $model->forward($modelInputs);
 
         // 3. Update
@@ -372,7 +370,6 @@ protected function seq2seqForward(PretrainedModel $model, array $modelInputs): a
         $model->addPastKeyValues($decoderFeeds, $pastKeyValues);
 
         $decoderResults = $model->runSession($model->decoderMergedSession, $decoderFeeds);
-
         $logits = $decoderResults['logits'];
         $pastKeyValues = $model->getPastKeyValues($decoderResults, $pastKeyValues);
 
diff --git a/src/Models/Pretrained/BartForConditionalGeneration.php b/src/Models/Pretrained/BartForConditionalGeneration.php
@@ -8,7 +8,7 @@
 use Codewithkyrian\Transformers\Models\ModelArchitecture;
 use Codewithkyrian\Transformers\Utils\AutoConfig;
 use Codewithkyrian\Transformers\Utils\GenerationConfig;
-use OnnxRuntime\InferenceSession;
+use Codewithkyrian\Transformers\Utils\InferenceSession;
 
 /**
  * The BART Model with a language modeling head. Can be used for summarization.
diff --git a/src/Models/Pretrained/GPT2PretrainedModel.php b/src/Models/Pretrained/GPT2PretrainedModel.php
@@ -8,7 +8,7 @@
 use Codewithkyrian\Transformers\Models\ModelArchitecture;
 use Codewithkyrian\Transformers\Utils\AutoConfig;
 use Codewithkyrian\Transformers\Utils\GenerationConfig;
-use OnnxRuntime\InferenceSession;
+use Codewithkyrian\Transformers\Utils\InferenceSession;
 
 class GPT2PretrainedModel extends PretrainedModel
 {
diff --git a/src/Models/Pretrained/M2M100ForConditionalGeneration.php b/src/Models/Pretrained/M2M100ForConditionalGeneration.php
@@ -8,7 +8,7 @@
 use Codewithkyrian\Transformers\Models\ModelArchitecture;
 use Codewithkyrian\Transformers\Utils\AutoConfig;
 use Codewithkyrian\Transformers\Utils\GenerationConfig;
-use OnnxRuntime\InferenceSession;
+use Codewithkyrian\Transformers\Utils\InferenceSession;
 
 class M2M100ForConditionalGeneration extends M2M100PretrainedModel
 {
diff --git a/src/Models/Pretrained/PretrainedModel.php b/src/Models/Pretrained/PretrainedModel.php
@@ -27,12 +27,13 @@
 use Codewithkyrian\Transformers\Utils\AutoConfig;
 use Codewithkyrian\Transformers\Utils\GenerationConfig;
 use Codewithkyrian\Transformers\Utils\Hub;
+use Codewithkyrian\Transformers\Utils\InferenceSession;
 use Codewithkyrian\Transformers\Utils\Tensor;
 use Error;
 use Exception;
-use OnnxRuntime\InferenceSession;
 use Symfony\Component\Console\Output\OutputInterface;
 use function Codewithkyrian\Transformers\Utils\array_some;
+use function Codewithkyrian\Transformers\Utils\timeUsage;
 
 /**
  * A base class for pre-trained models that provides the model configuration and an ONNX session.
@@ -281,9 +282,10 @@ public function runSession(InferenceSession $session, array $inputs): array
 
             $outputNames = array_column($session->outputs(), 'name');
 
-            $outputs = $session->run($outputNames, $inputs);
-
-            return array_combine($outputNames, array_map([Tensor::class, 'fromArray'], $outputs));
+            timeUsage();
+            $out = $session->run($outputNames, $inputs);
+            dump(timeUsage(true));
+            return $out;
         } catch (MissingModelInputException $e) {
             throw $e;
         } catch (Exception $e) {
@@ -331,7 +333,8 @@ public function validateInputs(array $inputNames, array $inputs): array
             The following inputs will be ignored: "' . implode(', ', $ignored) . '".';
         }
 
-        return array_map(fn($i) => $i->toArray(), $inputs);
+//        return array_map(fn($i) => $i->toArray(), $inputs);
+        return $inputs;
     }
 
     /**
@@ -521,8 +524,10 @@ public function addPastKeyValues(array &$decoderFeeds, ?array $pastKeyValues): v
      * @param Tensor $inputs The input token ids.
      * @param GenerationConfig|null $generationConfig The generation configuration to use. If null, default configuration will be used.
      * @param LogitsProcessorList|null $logitsProcessor An optional logits processor to use. If null, a new LogitsProcessorList instance will be created.
-     * @param array|null $inputsAttentionMask An optional attention mask for the inputs.
+     * @param Tensor|null $inputsAttentionMask An optional attention mask for the inputs.
+     * @param Streamer|null $streamer
      * @return array An array of generated output sequences, where each sequence is an array of token IDs.
+     * @throws Exception
      */
     public function generate(
         Tensor               $inputs,
@@ -609,6 +614,7 @@ public function generate(
 
                 $output = $this->runBeam($beam);
 
+
                 // add attentions/scores to beam only if user requested
                 if ($generationConfig->output_attentions) {
                     $this->addAttentionsToBeam($beam, $output);
@@ -626,6 +632,7 @@ public function generate(
                 $logits = $output['logits']->slice(null, -1, null);
 //                $logits = $output['logits'];
 
+
                 // Apply logits processor
                 $logitsProcessor($beam['output_token_ids'], $logits);
 
@@ -649,7 +656,6 @@ public function generate(
 
             }
 
-
             ++$numOutputTokens;
 
             // Group and select best beams
@@ -665,15 +671,13 @@ function ($group) use ($generationConfig) {
                 $this->groupBeams($newestBeams)
             ));
 
-
             // Flatten beams
             $beams = $newestBeams;
 
             // Stream the beams if a streamer is provided
             $streamer?->put($beams);
         }
 
-
         // TODO: Ensure that we can return non-batched outputs
 
         $groupedBeams = $this->groupBeams($beams);
diff --git a/src/Models/Pretrained/Qwen2PreTrainedModel.php b/src/Models/Pretrained/Qwen2PreTrainedModel.php
@@ -8,7 +8,7 @@
 use Codewithkyrian\Transformers\Models\ModelArchitecture;
 use Codewithkyrian\Transformers\Utils\AutoConfig;
 use Codewithkyrian\Transformers\Utils\GenerationConfig;
-use OnnxRuntime\InferenceSession;
+use Codewithkyrian\Transformers\Utils\InferenceSession;
 
 /**
  * The bare Qwen2 Model outputting raw hidden-states without any specific head on top.
@@ -32,7 +32,7 @@ public function __construct(
         $this->config['pad_token_id'] = $this->config['eos_token_id'];
         $this->config->padTokenId = $this->config['eos_token_id'];
 
-        $this->numHeads = $this->config['num_key_value_heads'] ??  $this->config['num_attention_heads'];
+        $this->numHeads = $this->config['num_key_value_heads'] ?? $this->config['num_attention_heads'];
         $this->numLayers = $this->config['num_hidden_layers'];
         $this->dimKv = $this->config['hidden_size'] / $this->config['num_attention_heads'];
     }
diff --git a/src/Models/Pretrained/T5ForConditionalGeneration.php b/src/Models/Pretrained/T5ForConditionalGeneration.php
@@ -8,7 +8,7 @@
 use Codewithkyrian\Transformers\Models\ModelArchitecture;
 use Codewithkyrian\Transformers\Utils\AutoConfig;
 use Codewithkyrian\Transformers\Utils\GenerationConfig;
-use OnnxRuntime\InferenceSession;
+use Codewithkyrian\Transformers\Utils\InferenceSession;
 
 /**
  * T5Model is a class representing a T5 model for conditional generation.
diff --git a/src/Models/Pretrained/VisionEncoderDecoderModel.php b/src/Models/Pretrained/VisionEncoderDecoderModel.php
@@ -11,7 +11,7 @@
 use Codewithkyrian\Transformers\Models\ModelArchitecture;
 use Codewithkyrian\Transformers\Utils\AutoConfig;
 use Codewithkyrian\Transformers\Utils\GenerationConfig;
-use OnnxRuntime\InferenceSession;
+use Codewithkyrian\Transformers\Utils\InferenceSession;
 
 /**
  * Vision Encoder-Decoder model based on OpenAI's GPT architecture for image captioning and other vision tasks
diff --git a/src/Pipelines/TextGenerationPipeline.php b/src/Pipelines/TextGenerationPipeline.php
@@ -114,7 +114,7 @@ public function __invoke(array|string $inputs, ...$args): array
         );
 
         // Streamer can only handle one input at a time for now, so we only pass the first input
-        $streamer->init($this->tokenizer, $inputIds[0]->toArray(), true);
+        $streamer?->init($this->tokenizer, $inputIds[0]->toArray(), true);
 
         $outputTokenIds = $this->model->generate($inputIds,
             generationConfig: $generationConfig,
diff --git a/src/Utils/Helpers.php b/src/Utils/Helpers.php
@@ -21,7 +21,7 @@ function memoryPeak(): string
 }
 
 
-function timeUsage(bool $milliseconds = false, bool $sinceLastCall = true): string
+function timeUsage(bool $milliseconds = false, bool $sinceLastCall = true, bool $returnString = true): string|float
 {
     static $lastCallTime = 0;
 
@@ -35,7 +35,8 @@ function timeUsage(bool $milliseconds = false, bool $sinceLastCall = true): stri
 
     $timeDiff = $milliseconds ? $timeDiff * 1000 : $timeDiff;
 
-    return @round($timeDiff, 4) . ($milliseconds ? ' ms' : ' s');
+//    return @round($timeDiff, 4) . ($milliseconds ? ' ms' : ' s');
+    return $returnString ? @round($timeDiff, 4) . ($milliseconds ? ' ms' : ' s') : @round($timeDiff, 4);
 }
 
 function array_some(array $array, callable $callback): bool
diff --git a/src/Utils/InferenceSession.php b/src/Utils/InferenceSession.php
diff --git a/src/Utils/Tensor.php b/src/Utils/Tensor.php

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`use Codewithkyrian\Transformers\Models\ModelArchitecture;`
`9`	`9`	`use Codewithkyrian\Transformers\Utils\AutoConfig;`
`10`	`10`	`use Codewithkyrian\Transformers\Utils\GenerationConfig;`
`11`		`-use OnnxRuntime\InferenceSession;`
	`11`	`+use Codewithkyrian\Transformers\Utils\InferenceSession;`
`12`	`12`
`13`	`13`	`class GPT2PretrainedModel extends PretrainedModel`
`14`	`14`	`{`