Skip to content

Commit 4d9ae33

Browse files
peblairLi Xu
authored andcommitted
RD-2370: TVEC updates (#130)
* RD-2370: Update API model to support per-token embeddings * RD-2370: Rename option * RD-2370: Update TVEC example * RD-2370: Fix copyright notice on TextEmbeddingOptions * RD-2370: Clarify documentation
1 parent 2f4ec9f commit 4d9ae33

3 files changed

Lines changed: 51 additions & 3 deletions

File tree

examples/src/main/java/com/basistech/rosette/examples/TextEmbeddingExample.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import com.basistech.rosette.api.HttpRosetteAPI;
1919
import com.basistech.rosette.apimodel.DocumentRequest;
20+
import com.basistech.rosette.apimodel.TextEmbeddingOptions;
2021
import com.basistech.rosette.apimodel.TextEmbeddingResponse;
2122

2223
import java.io.IOException;
@@ -44,7 +45,9 @@ private void run() throws IOException {
4445
//The api object creates an http client, but to provide your own:
4546
//api.httpClient(CloseableHttpClient)
4647
// When no options, use <?>.
47-
DocumentRequest<?> request = DocumentRequest.builder().content(embeddingsData).build();
48+
DocumentRequest<TextEmbeddingOptions> request = DocumentRequest.<TextEmbeddingOptions>builder()
49+
.content(embeddingsData)
50+
.build();
4851
TextEmbeddingResponse response = rosetteApi.perform(HttpRosetteAPI.TEXT_EMBEDDING_SERVICE_PATH, request, TextEmbeddingResponse.class);
4952
System.out.println(responseToJson(response));
5053
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Copyright 2018 Basis Technology Corp.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.basistech.rosette.apimodel;
17+
18+
import com.basistech.rosette.annotations.JacksonMixin;
19+
import lombok.Builder;
20+
import lombok.Value;
21+
22+
/**
23+
* Text embedding options
24+
*/
25+
@Value
26+
@Builder
27+
@JacksonMixin
28+
public class TextEmbeddingOptions extends Options {
29+
30+
/**
31+
* @return whether embeddings should be returned for each token in addition to the whole document
32+
*/
33+
private Boolean perToken;
34+
35+
}

model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingResponse.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,17 @@
3333
public class TextEmbeddingResponse extends Response {
3434

3535
/**
36-
* @return the embedding vector as a list
36+
* @return the document embedding vector as a list
3737
*/
38-
private final List<Double> embedding;
38+
private final List<Double> documentEmbedding;
39+
40+
/**
41+
* @return list of tokens, or {@code null}
42+
*/
43+
private final List<String> tokens;
44+
45+
/**
46+
* @return list of per-token embeddings, 1:1 with tokens, or {@code null}
47+
*/
48+
private final List<List<Double>> tokenEmbeddings;
3949
}

0 commit comments

Comments
 (0)