Accept zero in tensor dimensions

CodeWithKyrian · CodeWithKyrian · commit 5e310f92ec9d · 2024-04-23T01:00:27.000+01:00
diff --git a/src/Models/ModelArchitecture.php b/src/Models/ModelArchitecture.php
@@ -220,14 +220,6 @@ protected function decoderForward(PretrainedModel $model, array $modelInputs): a
         $model->preparePositionIds($inputNames, $decoderFeeds, $useCacheBranch);
         $model->addPastKeyValues($decoderFeeds, $pastKeyValues);
 
-        // The initial past key values should have a shape of 0 in one of the dimensions, which
-        // is the sequence length. However, I haven't found a way to pass a tensor with a shape of 0
-        // to the model, so I'm using a sequence length of 1 instead for the first step, and then
-        // offsetting the sequence length by 1 for the subsequent steps. This is a workaround for now.
-        $prevSequenceLength = $decoderFeeds['past_key_values.0.key']->shape()[2];
-        $attnMaskLength = $prevSequenceLength == 1 ? 1 : $prevSequenceLength + 1;
-        $decoderFeeds['attention_mask'] = Tensor::ones([1, $attnMaskLength], dtype: NDArray::int64);
-
         $decoderResults = $model->runSession($model->session, $decoderFeeds);
 
         $logits = $decoderResults['logits'];
diff --git a/src/Models/Pretrained/PretrainedModel.php b/src/Models/Pretrained/PretrainedModel.php
@@ -467,50 +467,50 @@ public function addPastKeyValues(array &$decoderFeeds, ?array $pastKeyValues): v
             $decoderFeeds = array_merge($decoderFeeds, $pastKeyValues);
         } else {
             // TODO support batches (i.e., batch_size > 1)
-            $batch_size = 1;
+            $batchSize = 1;
 
             if ($this->config->isEncoderDecoder && ($this->addEncoderPkv ?? true)) {
-                $encoderShape = [$batch_size, $this->numEncoderHeads, 1, $this->encoderDimKv];
-                $decoderShape = [$batch_size, $this->numDecoderHeads, 1, $this->decoderDimKv];
+                $encoderShape = [$batchSize, $this->numEncoderHeads, 0, $this->encoderDimKv];
+                $decoderShape = [$batchSize, $this->numDecoderHeads, 0, $this->decoderDimKv];
 
 
                 for ($i = 0; $i < $this->numDecoderLayers; ++$i) {
                     $decoderFeeds["past_key_values.$i.encoder.key"]
                         = $decoderFeeds["past_key_values.$i.encoder.value"]
-                        = new Tensor(null, shape: $encoderShape);
+                        = new Tensor([], shape: $encoderShape);
                     $decoderFeeds["past_key_values.$i.decoder.key"]
                         = $decoderFeeds["past_key_values.$i.decoder.value"]
-                        = new Tensor(null, shape: $decoderShape);
+                        = new Tensor([], shape: $decoderShape);
                 }
             } else if ($this->config->modelType === 'falcon') {
                 // NOTE: Custom implementation for Falcon
-                $shape = [$batch_size * $this->numHeads, 1, $this->dimKv];
+                $shape = [$batchSize * $this->numHeads, 0, $this->dimKv];
 
                 for ($i = 0; $i < $this->numLayers; ++$i) {
-                    $decoderFeeds["past_key_values.$i.key"] = new Tensor(null, shape: $shape);
-                    $decoderFeeds["past_key_values.$i.value"] = new Tensor(null, shape: $shape);
+                    $decoderFeeds["past_key_values.$i.key"] = new Tensor([], shape: $shape);
+                    $decoderFeeds["past_key_values.$i.value"] = new Tensor([], shape: $shape);
                 }
             } else if ($this->config['multi_query'] ?? null) { // e.g., for `gpt_bigcode`
-                $shape = [$batch_size * $this->numHeads, 1, 2 * $this->dimKv];
+                $shape = [$batchSize * $this->numHeads, 0, 2 * $this->dimKv];
 
                 for ($i = 0; $i < $this->numLayers; ++$i) {
-                    $decoderFeeds["past_key_values.$i.key_value"] = new Tensor(null, shape: $shape);
+                    $decoderFeeds["past_key_values.$i.key_value"] = new Tensor([], shape: $shape);
                 }
             } else if ($this->config['model_type'] === 'bloom') {
                 // NOTE: Custom implementation for Bloom
-                $keyShape = [$batch_size * $this->numHeads, $this->dimKv, 1];
-                $valueShape = [$batch_size * $this->numHeads, 1, $this->dimKv];
+                $keyShape = [$batchSize * $this->numHeads, $this->dimKv, 0];
+                $valueShape = [$batchSize * $this->numHeads, 0, $this->dimKv];
 
                 for ($i = 0; $i < $this->numLayers; ++$i) {
-                    $decoderFeeds["past_key_values.$i.key"] = new Tensor(null, shape: $keyShape);
-                    $decoderFeeds["past_key_values.$i.value"] = new Tensor(null, shape: $valueShape);
+                    $decoderFeeds["past_key_values.$i.key"] = new Tensor([], shape: $keyShape);
+                    $decoderFeeds["past_key_values.$i.value"] = new Tensor([], shape: $valueShape);
                 }
             } else { // Decoder-only
-                $shape = [$batch_size, $this->numHeads, 1, $this->dimKv];
+                $shape = [$batchSize, $this->numHeads, 0, $this->dimKv];
 
                 for ($i = 0; $i < $this->numLayers; ++$i) {
-                    $decoderFeeds["past_key_values.$i.key"] = new Tensor(null, shape: $shape);
-                    $decoderFeeds["past_key_values.$i.value"] = new Tensor(null, shape: $shape);
+                    $decoderFeeds["past_key_values.$i.key"] = new Tensor([], shape: $shape);
+                    $decoderFeeds["past_key_values.$i.value"] = new Tensor([], shape: $shape);
                 }
             }
         }
diff --git a/src/Utils/InferenceSession.php b/src/Utils/InferenceSession.php
@@ -372,7 +372,11 @@ private function createInputTensor($inputFeed, &$refs)
                 if (isset($inputTypes[$inp['type']])) {
                     $typeEnum = $inputTypes[$inp['type']];
                     $castType = $this->castTypes()[$typeEnum];
-                    $inputTensorValues = $this->ffi->new("{$castType}[$size]");
+                    if ($size == 0) {
+                        $inputTensorValues = $this->ffi->new("void *");
+                    } else {
+                        $inputTensorValues = $this->ffi->new("{$castType}[$size]");
+                    }
                 } else {
                     $this->unsupportedType('input', $inp['type']);
                 }
@@ -494,7 +498,7 @@ private function createFromOnnxValue($outPtr)
                     $values = $this->createFromOnnxValue($mapValues);
                     return array_combine($keys, $values);
                 } else {
-                    $this->unsupported_type('element', $elemType);
+                    $this->unsupportedType('element', $elemType);
                 }
             } else {
                 $this->unsupportedType('ONNX', $outType->cdata);
diff --git a/src/Utils/Tensor.php b/src/Utils/Tensor.php
@@ -201,7 +201,7 @@ protected function assertShape(array $shape): void
                 throw new InvalidArgumentException(
                     "Invalid shape numbers. It gives " . gettype($num));
             }
-            if ($num <= 0) {
+            if ($num < 0) {
                 throw new InvalidArgumentException(
                     "Invalid shape numbers. It gives " . $num);
             }

Original file line number	Diff line number	Diff line change
`@@ -201,7 +201,7 @@ protected function assertShape(array $shape): void`
`201`	`201`	`throw new InvalidArgumentException(`
`202`	`202`	`"Invalid shape numbers. It gives " . gettype($num));`
`203`	`203`	`}`
`204`		`- if ($num <= 0) {`
	`204`	`+ if ($num < 0) {`
`205`	`205`	`throw new InvalidArgumentException(`
`206`	`206`	`"Invalid shape numbers. It gives " . $num);`
`207`	`207`	`}`