Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/Arrow.dom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ export {
Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
Decimal,
List,
LargeList,
Struct, StructRow,
Union, DenseUnion, SparseUnion,
Dictionary,
Expand Down Expand Up @@ -100,6 +101,7 @@ export {
DurationBuilder, DurationSecondBuilder, DurationMillisecondBuilder, DurationMicrosecondBuilder, DurationNanosecondBuilder,
IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint8Builder, Uint16Builder, Uint32Builder, Uint64Builder,
ListBuilder,
LargeListBuilder,
MapBuilder,
NullBuilder,
StructBuilder,
Expand Down
2 changes: 2 additions & 0 deletions src/Arrow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export {
Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
Decimal,
List,
LargeList,
Struct,
Union, DenseUnion, SparseUnion,
Dictionary,
Expand Down Expand Up @@ -85,6 +86,7 @@ export { BinaryBuilder } from './builder/binary.js';
export { BinaryViewBuilder } from './builder/binaryview.js';
export { LargeBinaryBuilder } from './builder/largebinary.js';
export { ListBuilder } from './builder/list.js';
export { LargeListBuilder } from './builder/largelist.js';
export { FixedSizeListBuilder } from './builder/fixedsizelist.js';
export { MapBuilder } from './builder/map.js';
export { StructBuilder } from './builder/struct.js';
Expand Down
4 changes: 2 additions & 2 deletions src/builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import {
DataType, strideForType,
Float, Int, Decimal, FixedSizeBinary,
Date_, Time, Timestamp, Interval, Duration,
Utf8, LargeUtf8, Binary, LargeBinary, List, Map_,
Utf8, LargeUtf8, Binary, LargeBinary, List, LargeList, Map_,
} from './type.js';
import { createIsValidFunction } from './builder/valid.js';
import { BufferBuilder, BitmapBufferBuilder, DataBufferBuilder, OffsetsBufferBuilder } from './builder/buffer.js';
Expand Down Expand Up @@ -357,7 +357,7 @@ export abstract class FixedWidthBuilder<T extends Int | Float | FixedSizeBinary
}

/** @ignore */
export abstract class VariableWidthBuilder<T extends Binary | LargeBinary | Utf8 | LargeUtf8 | List | Map_, TNull = any> extends Builder<T, TNull> {
export abstract class VariableWidthBuilder<T extends Binary | LargeBinary | Utf8 | LargeUtf8 | List | LargeList | Map_, TNull = any> extends Builder<T, TNull> {
protected _pendingLength = 0;
protected _offsets: OffsetsBufferBuilder<T>;
protected _pending: Map<number, any> | undefined;
Expand Down
55 changes: 55 additions & 0 deletions src/builder/largelist.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import { Field } from '../schema.js';
import { DataType, LargeList } from '../type.js';
import { OffsetsBufferBuilder } from './buffer.js';
import { bigIntToNumber } from '../util/bigint.js';
import { Builder, BuilderOptions, VariableWidthBuilder } from '../builder.js';

/** @ignore */
export class LargeListBuilder<T extends DataType = any, TNull = any> extends VariableWidthBuilder<LargeList<T>, TNull> {
protected _offsets: OffsetsBufferBuilder<LargeList<T>>;
constructor(opts: BuilderOptions<LargeList<T>, TNull>) {
super(opts);
this._offsets = new OffsetsBufferBuilder(opts.type);
}
public addChild(child: Builder<T>, name = '0') {
if (this.numChildren > 0) {
throw new Error('LargeListBuilder can only have one child.');
}
this.children[this.numChildren] = child;
this.type = new LargeList(new Field(name, child.type, true));
return this.numChildren - 1;
}
protected _flushPending(pending: Map<number, T['TValue'] | undefined>) {
const offsets = this._offsets;
const [child] = this.children;
for (const [index, value] of pending) {
if (typeof value === 'undefined') {
offsets.set(index, BigInt(0));
} else {
const v = value as T['TValue'];
const n = v.length;
const start = bigIntToNumber(offsets.set(index, BigInt(n)).buffer[index]);
for (let i = -1; ++i < n;) {
child.set(start + i, v[i]);
}
Comment on lines +46 to +51
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, fixed

}
}
}
}
12 changes: 11 additions & 1 deletion src/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ Object.defineProperty(Data, Symbol.hasInstance, {

import {
Dictionary,
Bool, Null, Utf8, Utf8View, LargeUtf8, Binary, BinaryView, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
Bool, Null, Utf8, Utf8View, LargeUtf8, Binary, BinaryView, LargeBinary, Decimal, FixedSizeBinary, List, LargeList, FixedSizeList, Map_, Struct,
Float,
Int,
Date_,
Expand Down Expand Up @@ -455,6 +455,13 @@ class MakeDataVisitor extends Visitor {
const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props;
return new Data(type, offset, length, nullCount, [valueOffsets, undefined, nullBitmap], [child]);
}
public visitLargeList<T extends LargeList>(props: LargeListDataProps<T>) {
const { ['type']: type, ['offset']: offset = 0, ['child']: child } = props;
const nullBitmap = toUint8Array(props['nullBitmap']);
const valueOffsets = toBigInt64Array(props['valueOffsets']);
const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props;
return new Data(type, offset, length, nullCount, [valueOffsets, undefined, nullBitmap], [child]);
}
public visitStruct<T extends Struct>(props: StructDataProps<T>) {
const { ['type']: type, ['offset']: offset = 0, ['children']: children = [] } = props;
const nullBitmap = toUint8Array(props['nullBitmap']);
Expand Down Expand Up @@ -539,6 +546,7 @@ interface Utf8DataProps<T extends Utf8> extends DataProps_<T> { valueOffsets: Va
interface Utf8ViewDataProps<T extends Utf8View> extends DataProps_<T> { views: DataBuffer<T>; variadicBuffers?: ReadonlyArray<ArrayLike<number> | Iterable<number> | Uint8Array>; data?: DataBuffer<T> }
interface LargeUtf8DataProps<T extends LargeUtf8> extends DataProps_<T> { valueOffsets: LargeValueOffsetsBuffer | ValueOffsetsBuffer; data?: DataBuffer<T> }
interface ListDataProps<T extends List> extends DataProps_<T> { valueOffsets: ValueOffsetsBuffer; child: Data<T['valueType']> }
interface LargeListDataProps<T extends LargeList> extends DataProps_<T> { valueOffsets: LargeValueOffsetsBuffer | ValueOffsetsBuffer; child: Data<T['valueType']> }
interface FixedSizeListDataProps<T extends FixedSizeList> extends DataProps_<T> { child: Data<T['valueType']> }
interface StructDataProps<T extends Struct> extends DataProps_<T> { children: Data[] }
interface Map_DataProps<T extends Map_> extends DataProps_<T> { valueOffsets: ValueOffsetsBuffer; child: Data }
Expand Down Expand Up @@ -566,6 +574,7 @@ export type DataProps<T extends DataType> = (
T extends LargeUtf8 /* */ ? LargeUtf8DataProps<T> :
T extends Utf8View /* */ ? Utf8ViewDataProps<T> :
T extends List /* */ ? ListDataProps<T> :
T extends LargeList /* */ ? LargeListDataProps<T> :
T extends FixedSizeList /* */ ? FixedSizeListDataProps<T> :
T extends Struct /* */ ? StructDataProps<T> :
T extends Map_ /* */ ? Map_DataProps<T> :
Expand Down Expand Up @@ -596,6 +605,7 @@ export function makeData<T extends Utf8>(props: Utf8DataProps<T>): Data<T>;
export function makeData<T extends LargeUtf8>(props: LargeUtf8DataProps<T>): Data<T>;
export function makeData<T extends Utf8View>(props: Utf8ViewDataProps<T>): Data<T>;
export function makeData<T extends List>(props: ListDataProps<T>): Data<T>;
export function makeData<T extends LargeList>(props: LargeListDataProps<T>): Data<T>;
export function makeData<T extends FixedSizeList>(props: FixedSizeListDataProps<T>): Data<T>;
export function makeData<T extends Struct>(props: StructDataProps<T>): Data<T>;
export function makeData<T extends Map_>(props: Map_DataProps<T>): Data<T>;
Expand Down
1 change: 1 addition & 0 deletions src/enum.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ export enum Type {
Duration = 18, /** Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds */
LargeBinary = 19, /** Large variable-length bytes (no guarantee of UTF8-ness) */
LargeUtf8 = 20, /** Large variable-length string as List<Char> */
LargeList = 21, /** A list of some logical data type with 64-bit offsets */
BinaryView = 23, /** Variable-length binary values backed by inline-or-referenced views */
Utf8View = 24, /** Variable-length UTF8 string values backed by inline-or-referenced views */

Expand Down
4 changes: 4 additions & 0 deletions src/interfaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import type { LargeUtf8Builder } from './builder/largeutf8.js';
import type { BinaryBuilder } from './builder/binary.js';
import type { LargeBinaryBuilder } from './builder/largebinary.js';
import type { ListBuilder } from './builder/list.js';
import type { LargeListBuilder } from './builder/largelist.js';
import type { FixedSizeListBuilder } from './builder/fixedsizelist.js';
import type { MapBuilder } from './builder/map.js';
import type { StructBuilder } from './builder/struct.js';
Expand Down Expand Up @@ -242,6 +243,7 @@ export type TypeToDataType<T extends Type> = {
[Type.DurationNanosecond]: type.DurationNanosecond;
[Type.Map]: type.Map_;
[Type.List]: type.List;
[Type.LargeList]: type.LargeList;
[Type.Struct]: type.Struct;
[Type.Dictionary]: type.Dictionary;
[Type.FixedSizeList]: type.FixedSizeList;
Expand Down Expand Up @@ -300,6 +302,7 @@ type TypeToBuilder<T extends Type = any, TNull = any> = {
[Type.DurationNanosecond]: DurationNanosecondBuilder<TNull>;
[Type.Map]: MapBuilder<any, any, TNull>;
[Type.List]: ListBuilder<any, TNull>;
[Type.LargeList]: LargeListBuilder<any, TNull>;
[Type.Struct]: StructBuilder<any, TNull>;
[Type.Dictionary]: DictionaryBuilder<any, TNull>;
[Type.FixedSizeList]: FixedSizeListBuilder<any, TNull>;
Expand Down Expand Up @@ -358,6 +361,7 @@ type DataTypeToBuilder<T extends DataType = any, TNull = any> = {
[Type.DurationNanosecond]: T extends type.DurationNanosecond ? DurationNanosecondBuilder<TNull> : never;
[Type.Map]: T extends type.Map_ ? MapBuilder<T['keyType'], T['valueType'], TNull> : never;
[Type.List]: T extends type.List ? ListBuilder<T['valueType'], TNull> : never;
[Type.LargeList]: T extends type.LargeList ? LargeListBuilder<T['valueType'], TNull> : never;
[Type.Struct]: T extends type.Struct ? StructBuilder<T['dataTypes'], TNull> : never;
[Type.Dictionary]: T extends type.Dictionary ? DictionaryBuilder<T, TNull> : never;
[Type.FixedSizeList]: T extends type.FixedSizeList ? FixedSizeListBuilder<T['valueType'], TNull> : never;
Expand Down
3 changes: 2 additions & 1 deletion src/ipc/metadata/message.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ import ByteBuffer = flatbuffers.ByteBuffer;
import {
DataType, Dictionary, TimeBitWidth,
Utf8, LargeUtf8, Binary, LargeBinary, BinaryView, Utf8View, Decimal, FixedSizeBinary,
List, FixedSizeList, Map_, Struct, Union,
List, LargeList, FixedSizeList, Map_, Struct, Union,
Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys, Duration,
} from '../../type.js';

Expand Down Expand Up @@ -521,6 +521,7 @@ function decodeFieldType(f: _Field, children?: Field[]): DataType<any> {
case Type['Utf8View']: return new Utf8View();
case Type['Bool']: return new Bool();
case Type['List']: return new List((children || [])[0]);
case Type['LargeList']: return new LargeList((children || [])[0]);
case Type['Struct_']: return new Struct(children || []);
}

Expand Down
27 changes: 27 additions & 0 deletions src/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ export abstract class DataType<TType extends Type = Type, TChildren extends Type
/** @nocollapse */ static isInterval(x: any): x is Interval_ { return x?.typeId === Type.Interval; }
/** @nocollapse */ static isDuration(x: any): x is Duration { return x?.typeId === Type.Duration; }
/** @nocollapse */ static isList(x: any): x is List { return x?.typeId === Type.List; }
/** @nocollapse */ static isLargeList(x: any): x is LargeList { return x?.typeId === Type.LargeList; }
// TODO: Implement ListView type
// /** @nocollapse */ static isListView(x: any): x is ListView { return x?.typeId === Type.ListView; }
/** @nocollapse */ static isStruct(x: any): x is Struct { return x?.typeId === Type.Struct; }
Expand Down Expand Up @@ -616,6 +617,32 @@ export class List<T extends DataType = any> extends DataType<Type.List, { [0]: T
})(List.prototype);
}

/** @ignore */
export interface LargeList<T extends DataType = any> extends DataType<Type.LargeList, { [0]: T }> {
TArray: Array<T>;
TValue: Vector<T>;
TOffsetArray: BigInt64Array;
OffsetArrayType: BigIntArrayConstructor<BigInt64Array>;
}

/** @ignore */
export class LargeList<T extends DataType = any> extends DataType<Type.LargeList, { [0]: T }> {
constructor(child: Field<T>) {
super(Type.LargeList);
this.children = [child];
}
public declare readonly children: Field<T>[];
public toString() { return `LargeList<${this.valueType}>`; }
public get valueType(): T { return this.children[0].type as T; }
public get valueField(): Field<T> { return this.children[0] as Field<T>; }
public get ArrayType(): T['ArrayType'] { return this.valueType.ArrayType; }
protected static [Symbol.toStringTag] = ((proto: LargeList) => {
(<any>proto).children = null;
(<any>proto).OffsetArrayType = BigInt64Array;
return proto[Symbol.toStringTag] = 'LargeList';
})(LargeList.prototype);
}

/** @ignore */
export interface Struct<T extends TypeMap = any> extends DataType<Type.Struct, T> {
TArray: Array<StructRowProxy<T>>;
Expand Down
3 changes: 2 additions & 1 deletion src/util/buffer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,9 @@ export function rebaseValueOffsets(offset: number, length: number, valueOffsets:
// shifted by the start offset, such that the new start offset is 0
if (offset !== 0) {
valueOffsets = valueOffsets.slice(0, length);
const delta = typeof valueOffsets[0] === 'bigint' ? BigInt(offset) : offset;
for (let i = -1, n = valueOffsets.length; ++i < n;) {
valueOffsets[i] += offset;
valueOffsets[i] += delta;
}
}
return valueOffsets.subarray(0, length);
Expand Down
3 changes: 3 additions & 0 deletions src/visitor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ export abstract class Visitor {
public visitTime(_node: any, ..._args: any[]): any { return null; }
public visitDecimal(_node: any, ..._args: any[]): any { return null; }
public visitList(_node: any, ..._args: any[]): any { return null; }
public visitLargeList(_node: any, ..._args: any[]): any { return null; }
public visitStruct(_node: any, ..._args: any[]): any { return null; }
public visitUnion(_node: any, ..._args: any[]): any { return null; }
public visitDictionary(_node: any, ..._args: any[]): any { return null; }
Expand Down Expand Up @@ -114,6 +115,7 @@ function getVisitFnByTypeId(visitor: Visitor, dtype: Type, throwIfNotFound = tru
case Type.TimeNanosecond: fn = visitor.visitTimeNanosecond || visitor.visitTime; break;
case Type.Decimal: fn = visitor.visitDecimal; break;
case Type.List: fn = visitor.visitList; break;
case Type.LargeList: fn = visitor.visitLargeList; break;
case Type.Struct: fn = visitor.visitStruct; break;
case Type.Union: fn = visitor.visitUnion; break;
case Type.DenseUnion: fn = visitor.visitDenseUnion || visitor.visitUnion; break;
Expand Down Expand Up @@ -211,6 +213,7 @@ function inferDType<T extends DataType>(type: T): Type {
return Type.Duration;
case Type.Map: return Type.Map;
case Type.List: return Type.List;
case Type.LargeList: return Type.LargeList;
case Type.Struct: return Type.Struct;
case Type.Union:
switch ((type as any as Union).mode) {
Expand Down
2 changes: 2 additions & 0 deletions src/visitor/builderctor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import { IntervalBuilder, IntervalDayTimeBuilder, IntervalMonthDayNanoBuilder, I
import { DurationBuilder, DurationSecondBuilder, DurationMillisecondBuilder, DurationMicrosecondBuilder, DurationNanosecondBuilder } from '../builder/duration.js';
import { IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint8Builder, Uint16Builder, Uint32Builder, Uint64Builder } from '../builder/int.js';
import { ListBuilder } from '../builder/list.js';
import { LargeListBuilder } from '../builder/largelist.js';
import { MapBuilder } from '../builder/map.js';
import { NullBuilder } from '../builder/null.js';
import { StructBuilder } from '../builder/struct.js';
Expand Down Expand Up @@ -90,6 +91,7 @@ export class GetBuilderCtor extends Visitor {
public visitTimeNanosecond() { return TimeNanosecondBuilder; }
public visitDecimal() { return DecimalBuilder; }
public visitList() { return ListBuilder; }
public visitLargeList() { return LargeListBuilder; }
public visitStruct() { return StructBuilder; }
public visitUnion() { return UnionBuilder; }
public visitDenseUnion() { return DenseUnionBuilder; }
Expand Down
9 changes: 6 additions & 3 deletions src/visitor/get.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import { uint16ToFloat64 } from '../util/math.js';
import { Type, UnionMode, Precision, DateUnit, TimeUnit, IntervalUnit } from '../enum.js';
import {
DataType, Dictionary,
Bool, Null, Utf8, Utf8View, LargeUtf8, Binary, BinaryView, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
Bool, Null, Utf8, Utf8View, LargeUtf8, Binary, BinaryView, LargeBinary, Decimal, FixedSizeBinary, List, LargeList, FixedSizeList, Map_, Struct,
Float, Float16, Float32, Float64,
Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
Date_, DateDay, DateMillisecond,
Expand Down Expand Up @@ -83,6 +83,7 @@ export interface GetVisitor extends Visitor {
visitTimeNanosecond<T extends TimeNanosecond>(data: Data<T>, index: number): T['TValue'] | null;
visitDecimal<T extends Decimal>(data: Data<T>, index: number): T['TValue'] | null;
visitList<T extends List>(data: Data<T>, index: number): T['TValue'] | null;
visitLargeList<T extends LargeList>(data: Data<T>, index: number): T['TValue'] | null;
visitStruct<T extends Struct>(data: Data<T>, index: number): T['TValue'] | null;
visitUnion<T extends Union>(data: Data<T>, index: number): T['TValue'] | null;
visitDenseUnion<T extends DenseUnion>(data: Data<T>, index: number): T['TValue'] | null;
Expand Down Expand Up @@ -261,9 +262,10 @@ const getTime = <T extends Time>(data: Data<T>, index: number): T['TValue'] => {
const getDecimal = <T extends Decimal>({ values, stride }: Data<T>, index: number): T['TValue'] => BN.decimal(values.subarray(stride * index, stride * (index + 1)));

/** @ignore */
const getList = <T extends List>(data: Data<T>, index: number): T['TValue'] => {
const getList = <T extends List | LargeList>(data: Data<T>, index: number): T['TValue'] => {
const { valueOffsets, stride, children } = data;
const { [index * stride]: begin, [index * stride + 1]: end } = valueOffsets;
const begin = bigIntToNumber(valueOffsets[index * stride]);
const end = bigIntToNumber(valueOffsets[index * stride + 1]);
const child: Data<T['valueType']> = children[0];
const slice = child.slice(begin, end - begin);
return new Vector([slice]) as T['TValue'];
Expand Down Expand Up @@ -399,6 +401,7 @@ GetVisitor.prototype.visitTimeMicrosecond = wrapGet(getTimeMicrosecond);
GetVisitor.prototype.visitTimeNanosecond = wrapGet(getTimeNanosecond);
GetVisitor.prototype.visitDecimal = wrapGet(getDecimal);
GetVisitor.prototype.visitList = wrapGet(getList);
GetVisitor.prototype.visitLargeList = wrapGet(getList);
GetVisitor.prototype.visitStruct = wrapGet(getStruct);
GetVisitor.prototype.visitUnion = wrapGet(getUnion);
GetVisitor.prototype.visitDenseUnion = wrapGet(getDenseUnion);
Expand Down
Loading
Loading