segment-anything-tensorrt/export.h at main · mingj2021/segment-anything-tensorrt · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#ifndef EXPORT_H
#define EXPORT_H

#include <iostream>
#include <fstream>
#include "sam_utils.h"


void export_engine_image_encoder(std::string f="vit_l_embedding.onnx",std::string output="vit_l_embedding.engine")
{
    // create an instance of the builder
    std::unique_ptr<nvinfer1::IBuilder> builder(createInferBuilder(logger));
    // create a network definition
    // The kEXPLICIT_BATCH flag is required in order to import models using the ONNX parser.
    uint32_t flag = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);

    //auto network = std::make_unique<nvinfer1::INetworkDefinition>(builder->createNetworkV2(flag));
    std::unique_ptr<nvinfer1::INetworkDefinition> network(builder->createNetworkV2(flag));

    // Importing a Model Using the ONNX Parser
    //auto parser = std::make_unique<nvonnxparser::IParser>(createParser(*network, logger));
    std::unique_ptr<nvonnxparser::IParser> parser(createParser(*network, logger));

    // read the model file and process any errors
    parser->parseFromFile(f.c_str(),
                          static_cast<int32_t>(nvinfer1::ILogger::Severity::kWARNING));
    for (int32_t i = 0; i < parser->getNbErrors(); ++i)
    {
        std::cout << parser->getError(i)->desc() << std::endl;
    }

    // create a build configuration specifying how TensorRT should optimize the model
    std::unique_ptr<nvinfer1::IBuilderConfig> config(builder->createBuilderConfig());

    // maximum workspace size
    // int workspace = 4;  // GB
    // config->setMaxWorkspaceSize(workspace * 1U << 30);
    config->setFlag(BuilderFlag::kGPU_FALLBACK);

    config->setFlag(BuilderFlag::kFP16);

    // create an engine
    // auto serializedModel = std::make_unique<nvinfer1::IHostMemory>(builder->buildSerializedNetwork(*network, *config));
     std::unique_ptr<nvinfer1::IHostMemory> serializedModel(builder->buildSerializedNetwork(*network, *config));
     std::cout << "serializedModel->size()" << serializedModel->size() << std::endl;
     std::ofstream outfile(output, std::ofstream::out | std::ofstream::binary);
     outfile.write((char*)serializedModel->data(), serializedModel->size());
}

void export_engine_prompt_encoder_and_mask_decoder(std::string f="sam_onnx_example.onnx",std::string output="sam_onnx_example.engine")
{
    // create an instance of the builder
    std::unique_ptr<nvinfer1::IBuilder> builder(createInferBuilder(logger));
    // create a network definition
    // The kEXPLICIT_BATCH flag is required in order to import models using the ONNX parser.
    uint32_t flag = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);

    //auto network = std::make_unique<nvinfer1::INetworkDefinition>(builder->createNetworkV2(flag));
    std::unique_ptr<nvinfer1::INetworkDefinition> network(builder->createNetworkV2(flag));

    // Importing a Model Using the ONNX Parser
    //auto parser = std::make_unique<nvonnxparser::IParser>(createParser(*network, logger));
    std::unique_ptr<nvonnxparser::IParser> parser(createParser(*network, logger));

    // read the model file and process any errors
    parser->parseFromFile(f.c_str(),
                          static_cast<int32_t>(nvinfer1::ILogger::Severity::kWARNING));
    for (int32_t i = 0; i < parser->getNbErrors(); ++i)
    {
        std::cout << parser->getError(i)->desc() << std::endl;
    }

    // create a build configuration specifying how TensorRT should optimize the model
    std::unique_ptr<nvinfer1::IBuilderConfig> config(builder->createBuilderConfig());

    // maximum workspace size
    // int workspace = 8;  // GB
    // config->setMaxWorkspaceSize(workspace * 1U << 30);
    config->setFlag(BuilderFlag::kGPU_FALLBACK);

    config->setFlag(BuilderFlag::kFP16);

    nvinfer1::IOptimizationProfile* profile = builder->createOptimizationProfile();
    // profile->setDimensions("image_embeddings", nvinfer1::OptProfileSelector::kMIN, {1, 256, 64, 64 });
    // profile->setDimensions("image_embeddings", nvinfer1::OptProfileSelector::kOPT, {1, 256, 64, 64 });
    // profile->setDimensions("image_embeddings", nvinfer1::OptProfileSelector::kMAX, {1, 256, 64, 64 });

    profile->setDimensions("point_coords", nvinfer1::OptProfileSelector::kMIN, {3, 1, 2,2 });
    profile->setDimensions("point_coords", nvinfer1::OptProfileSelector::kOPT, { 3,1, 5,2 });
    profile->setDimensions("point_coords", nvinfer1::OptProfileSelector::kMAX, { 3,1,10,2 });

    profile->setDimensions("point_labels", nvinfer1::OptProfileSelector::kMIN, { 2,1, 2});
    profile->setDimensions("point_labels", nvinfer1::OptProfileSelector::kOPT, { 2,1, 5 });
    profile->setDimensions("point_labels", nvinfer1::OptProfileSelector::kMAX, { 2,1,10 });

    // profile->setDimensions("mask_input", nvinfer1::OptProfileSelector::kMIN, { 1, 1, 256, 256});
    // profile->setDimensions("mask_input", nvinfer1::OptProfileSelector::kOPT, { 1, 1, 256, 256 });
    // profile->setDimensions("mask_input", nvinfer1::OptProfileSelector::kMAX, { 1, 1, 256, 256 });

    // profile->setDimensions("has_mask_input", nvinfer1::OptProfileSelector::kMIN, { 1,});
    // profile->setDimensions("has_mask_input", nvinfer1::OptProfileSelector::kOPT, { 1, });
    // profile->setDimensions("has_mask_input", nvinfer1::OptProfileSelector::kMAX, { 1, });

    config->addOptimizationProfile(profile);

    // create an engine
    // auto serializedModel = std::make_unique<nvinfer1::IHostMemory>(builder->buildSerializedNetwork(*network, *config));
     std::unique_ptr<nvinfer1::IHostMemory> serializedModel(builder->buildSerializedNetwork(*network, *config));
     std::cout << "serializedModel->size()" << serializedModel->size() << std::endl;
     std::ofstream outfile(output, std::ofstream::out | std::ofstream::binary);
     outfile.write((char*)serializedModel->data(), serializedModel->size());
}
#endif