Skip to content

Commit bb3d065

Browse files
Add Qwen2 model classes
1 parent c2c42fb commit bb3d065

10 files changed

Lines changed: 86 additions & 9 deletions

File tree

docs/getting-started.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ Before installing TransformersPHP, ensure your system meets the following requir
1212
- Composer
1313
- PHP FFI extension
1414
- JIT compilation (optional)
15+
- Increased memory limit (for advanced tasks like text generation)
1516

1617
## Installation
1718

examples/pipelines/text-generation.php

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
ini_set('memory_limit', -1);
1313
//
14-
$generator = pipeline('text-generation', 'Xenova/gpt2');
14+
$generator = pipeline('text-generation', 'Xenova/Qwen1.5-0.5B-Chat');
1515

1616
$streamer = StdOutStreamer::make($generator->tokenizer);
1717

@@ -21,7 +21,9 @@
2121
['role' => 'user', 'content' => 'I am doing great. What about you?'],
2222
];
2323

24-
$output = $generator("I love going to school but I don't",
24+
$input = $generator->tokenizer->applyChatTemplate($messages, addGenerationPrompt: true, tokenize: false);
25+
26+
$output = $generator($messages,
2527
streamer: $streamer,
2628
maxNewTokens: 128,
2729
doSample: true,

src/Models/Auto/AutoModel.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class AutoModel extends PretrainedMixin
3838
"gptj" => \Codewithkyrian\Transformers\Models\Pretrained\GPTJModel::class,
3939
"gpt_bigcode" => \Codewithkyrian\Transformers\Models\Pretrained\GPTBigCodeModel::class,
4040
"codegen" => \Codewithkyrian\Transformers\Models\Pretrained\CodeGenModel::class,
41+
"qwen2" => \Codewithkyrian\Transformers\Models\Pretrained\Qwen2Model::class,
4142
];
4243

4344
const MODEL_CLASS_MAPPINGS = [

src/Models/Auto/AutoModelForCausalLM.php

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ class AutoModelForCausalLM extends PretrainedMixin
1212
'gptj' => \Codewithkyrian\Transformers\Models\Pretrained\GPTJForCausalLM::class,
1313
'gpt_bigcode' => \Codewithkyrian\Transformers\Models\Pretrained\GPTBigCodeForCausalLM::class,
1414
'codegen' => \Codewithkyrian\Transformers\Models\Pretrained\CodeGenForCausalLM::class,
15-
'trocr' => \Codewithkyrian\Transformers\Models\Pretrained\TrOCRForCausalLM::class,
16-
15+
'qwen2' => \Codewithkyrian\Transformers\Models\Pretrained\Qwen2ForCausalLM::class
1716
];
1817

1918
const MODEL_CLASS_MAPPINGS = [
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
6+
namespace Codewithkyrian\Transformers\Models\Pretrained;
7+
8+
class Qwen2ForCausalLM extends Qwen2PreTrainedModel
9+
{
10+
11+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
6+
namespace Codewithkyrian\Transformers\Models\Pretrained;
7+
8+
class Qwen2Model extends Qwen2PreTrainedModel
9+
{
10+
11+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
6+
namespace Codewithkyrian\Transformers\Models\Pretrained;
7+
8+
use Codewithkyrian\Transformers\Models\ModelArchitecture;
9+
use Codewithkyrian\Transformers\Utils\AutoConfig;
10+
use Codewithkyrian\Transformers\Utils\GenerationConfig;
11+
use OnnxRuntime\InferenceSession;
12+
13+
/**
14+
* The bare Qwen2 Model outputting raw hidden-states without any specific head on top.
15+
*/
16+
class Qwen2PreTrainedModel extends PreTrainedModel
17+
{
18+
protected int $numHeads;
19+
protected int $numLayers;
20+
protected int $dimKv;
21+
22+
public function __construct(
23+
AutoConfig $config,
24+
InferenceSession $session,
25+
public ModelArchitecture $modelArchitecture,
26+
public GenerationConfig $generationConfig
27+
)
28+
{
29+
parent::__construct($config, $session, $modelArchitecture);
30+
31+
// config doesn't contain pad_token_id, so we assume it is the eos_token_id
32+
$this->config['pad_token_id'] = $this->config['eos_token_id'];
33+
$this->config->padTokenId = $this->config['eos_token_id'];
34+
35+
$this->numHeads = $this->config['num_key_value_heads'] ?? $this->config['num_attention_heads'];
36+
$this->numLayers = $this->config['num_hidden_layers'];
37+
$this->dimKv = $this->config['hidden_size'] / $this->config['num_attention_heads'];
38+
}
39+
}

src/PreTokenizers/SplitPreTokenizer.php

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,15 @@ public function preTokenizeText(string|array $text, array $options): array
2323
return explode($this->pattern, $text);
2424
}
2525

26-
$regex = $this->pattern['Regex'] ?? $this->pattern['String'] ?? null;
26+
$regex = $this->pattern['Regex'] ?? null;
2727

28-
if ($regex != null) {
29-
preg_match_all("/$regex/u", $text, $matches, PREG_SPLIT_NO_EMPTY);
30-
31-
$text = array_map(fn($match) => $match, $matches[0]);
28+
if($regex != null)
29+
{
30+
$split = preg_split($regex, $text, -1, PREG_SPLIT_NO_EMPTY);
31+
dd($split);
3232
}
3333

34+
// TODO: Handle all types of Regex
3435
return $text;
3536
}
3637
}

src/PretrainedTokenizers/AutoTokenizer.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ class AutoTokenizer
5555
'SpeechT5Tokenizer' => 'SpeechT5Tokenizer',
5656
'NougatTokenizer' => 'NougatTokenizer',
5757
'VitsTokenizer' => 'VitsTokenizer',
58+
'Qwen2Tokenizer' => 'Qwen2Tokenizer',
5859
// Base case:
5960
'PreTrainedTokenizer' => 'PreTrainedTokenizer',
6061
];
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
6+
namespace Codewithkyrian\Transformers\PretrainedTokenizers;
7+
8+
class Qwen2Tokenizer extends PretrainedTokenizer
9+
{
10+
11+
}

0 commit comments

Comments
 (0)