Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Bump to apache-arrow 15 and simplify worker utils #22

Merged
merged 2 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Refer to the [`algorithm` namespace](https://geoarrow.github.io/geoarrow-js/modu

## Web Worker utilities

Refer to the [`worker` namespace](https://geoarrow.github.io/geoarrow-js/modules/worker.html). Note that due to limitations in Arrow JS (as of v14) you **must** use `preparePostMessage` before a call to `structuredClone` or `postMessage`, to ensure it can correctly be rehydrated on the worker.
Refer to the [`worker` namespace](https://geoarrow.github.io/geoarrow-js/modules/worker.html). Use `preparePostMessage` to obtain references to all underlying `ArrayBuffer` objects, so they can be transfered instead of copied.

```ts
import * as arrow from "apache-arrow";
Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@
"src/"
],
"peerDependencies": {
"apache-arrow": ">=14"
"apache-arrow": ">=15"
},
"devDependencies": {
"@rollup/plugin-terser": "^0.4.3",
"@rollup/plugin-typescript": "^11.1.2",
"@types/node": "^20.9.3",
"@types/proj4": "^2",
"apache-arrow": "^14",
"apache-arrow": "^15",
"esbuild": "^0.19.8",
"gh-pages": "^6.1.0",
"prettier": "^3.1.0",
Expand Down
146 changes: 3 additions & 143 deletions src/worker/rehydrate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,11 @@ import { Data } from "apache-arrow/data";
import { Vector } from "apache-arrow/vector";
import { Field } from "apache-arrow/schema";
import type { Buffers } from "apache-arrow/data";
import {
LineString,
Point,
Polygon,
MultiLineString,
MultiPoint,
MultiPolygon,
} from "../type";
import { Polygon, isPolygon } from "../type";
import { PolygonData } from "../data";

// Typedefs that include only the information kept from a structuredClone
type PostMessageDataType = Pick<DataType, "children"> & {
__type: Type;
};
type PostMessageDataType = Pick<DataType, "children" | "typeId">;
type PostMessageField = Pick<Field, "name" | "nullable" | "metadata"> & {
type: PostMessageDataType;
};
Expand All @@ -62,16 +53,7 @@ type PostMessageVector<T extends DataType> = Pick<
> & { type: PostMessageDataType };

function rehydrateType<T extends Type>(type: PostMessageDataType): DataType<T> {
// Note: by default in Arrow JS, the `DataType` is a class with no identifying
// attribute. Since a `structuredClone` is unable to maintain class
// information, the result of `structuredClone(new arrow.Utf8())` is an empty
// object `{}`.
//
// To get around this, in `preparePostMessage`, we manually assign the
// `typeId` (usually a getter) onto `__type`. Then when rehydrating the type,
// we can match on the `__type`, checking `arrow.Type` values, and
// reconstitute a full `arrow.DataType` object.
switch (type.__type) {
switch (type.typeId) {
case Type.Null:
return new Null() as DataType<T>;
case Type.Int:
Expand Down Expand Up @@ -158,7 +140,6 @@ function rehydrateField(field: PostMessageField): Field {
export function rehydrateData<T extends DataType>(
data: PostMessageData<T>,
): Data<T> {
// @ts-expect-error
const children = data.children.map((childData) => rehydrateData(childData));
const dictionary = data.dictionary
? rehydrateVector(data.dictionary)
Expand Down Expand Up @@ -209,124 +190,3 @@ export function rehydratePolygonData(
// on the JS side.
return data;
}

// NOTE: these functions are copied from `type.ts` to work on __type

/** Check that the given type is a Point data type */
function isPoint(type: DataType): type is Point {
// @ts-expect-error
if (type.__type === Type.FixedSizeList) {
// Check list size
// @ts-expect-error
if (![2, 3, 4].includes(type.listSize)) {
return false;
}

// Check child of FixedSizeList is floating type
// @ts-expect-error
if (type.children[0].__type !== Type.Float) {
return false;
}

return true;
}

// @ts-expect-error
if (type.__type === Type.Struct) {
// Check number of children
if (![2, 3, 4].includes(type.children.length)) {
return false;
}

// Check that children have correct field names
if (
!type.children.every((field) => ["x", "y", "z", "m"].includes(field.name))
) {
return false;
}

// @ts-expect-error
if (!type.children.every((field) => field.__type === Type.Float)) {
return false;
}

return true;
}

return false;
}

function isLineString(type: DataType): type is LineString {
// Check the outer type is a List
// @ts-expect-error
if (type.__type !== Type.List) {
return false;
}

// Check the child is a point type
if (!isPoint(type.children[0].type)) {
return false;
}

return true;
}

function isPolygon(type: DataType): type is Polygon {
// Check the outer vector is a List
// @ts-expect-error
if (type.__type !== Type.List) {
return false;
}

// Check the child is a linestring vector
if (!isLineString(type.children[0].type)) {
return false;
}

return true;
}

function isMultiPoint(type: DataType): type is MultiPoint {
// Check the outer vector is a List
// @ts-expect-error
if (type.__type !== Type.List) {
return false;
}

// Check the child is a point vector
if (!isPoint(type.children[0].type)) {
return false;
}

return true;
}

function isMultiLineString(type: DataType): type is MultiLineString {
// Check the outer vector is a List
// @ts-expect-error
if (type.__type !== Type.List) {
return false;
}

// Check the child is a linestring vector
if (!isLineString(type.children[0].type)) {
return false;
}

return true;
}

function isMultiPolygon(type: DataType): type is MultiPolygon {
// Check the outer vector is a List
// @ts-expect-error
if (type.__type !== Type.List) {
return false;
}

// Check the child is a polygon vector
if (!isPolygon(type.children[0].type)) {
return false;
}

return true;
}
16 changes: 1 addition & 15 deletions src/worker/transferable.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { DataType } from "apache-arrow/type";
import { BufferType, Type } from "apache-arrow/enum";
import { BufferType } from "apache-arrow/enum";
import { Data } from "apache-arrow/data";
import { Vector } from "apache-arrow/vector";
import { hardClone } from "./hard-clone";
Expand Down Expand Up @@ -30,7 +30,6 @@ export function preparePostMessage<T extends DataType>(
transferArrayBuffers.push(...arrayBuffers);
}
const vector = new Vector(postMessageDatas);
assignTypeIdOnType(vector.type);
return [vector, transferArrayBuffers];
}

Expand Down Expand Up @@ -74,18 +73,5 @@ export function preparePostMessage<T extends DataType>(
transferArrayBuffers.push(input.buffers[BufferType.TYPE].buffer);
}

assignTypeIdOnType(input.type);

return [input, transferArrayBuffers];
}

function assignTypeIdOnType<T extends Type>(type: DataType<T>): void {
// @ts-expect-error __type does not exist
type.__type = type.typeId;

if (type.children && type.children.length > 0) {
for (const child of type.children) {
assignTypeIdOnType(child.type);
}
}
}
Loading