-
Notifications
You must be signed in to change notification settings - Fork 186
support Apache Arrow as a normalized data representation #2115
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
22 commits
Select commit
Hold shift + click to select a range
013bd45
columnar support for arrow tables
Fil 019a793
defer reading the values until they're actually requested (which is o…
Fil 778b890
tests
Fil 932186c
comment
Fil 1f5ded9
fix apache arrow dates (alternative to #2096)
Fil 4e729d0
add test snapshot
Fil f045315
fix test wrt https://github.com/apache/arrow/issues/40718
Fil c6704e4
arrow table data; fix BigInt coercion
mbostock e7ed7b7
more arrow support
mbostock fcd3ce6
arrow date hint; fix BigInt coercion
mbostock 7856377
inline floater
mbostock d147e81
shorten slightly
mbostock 6fc974f
valueof tests; better arrow coercion
mbostock b652131
Arrow-aware stack transform
mbostock 4faebbc
a few more dataify
mbostock 0f1bb9a
Merge branch 'main' into mbostock/arrow
Fil 107363d
fix merge conflict
Fil bd22e0b
fix Plot.find and stack customOrder
Fil d0d0b70
handle Arrow in a few more places
mbostock 1f00372
fix tree.js and add various tests
Fil 9c5f7ad
test pointer
Fil f02a805
move test outside of loop
mbostock File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,14 +7,36 @@ import {timeInterval, utcInterval} from "./time.js"; | |
export const TypedArray = Object.getPrototypeOf(Uint8Array); | ||
const objectToString = Object.prototype.toString; | ||
|
||
export function isArray(value) { | ||
return value instanceof Array || value instanceof TypedArray; | ||
} | ||
|
||
function isNumberArray(value) { | ||
return value instanceof TypedArray && !isBigIntArray(value); | ||
} | ||
|
||
function isNumberType(type) { | ||
return type?.prototype instanceof TypedArray && !isBigIntType(type); | ||
} | ||
|
||
function isBigIntArray(value) { | ||
return value instanceof BigInt64Array || value instanceof BigUint64Array; | ||
} | ||
|
||
function isBigIntType(type) { | ||
return type === BigInt64Array || type === BigUint64Array; | ||
} | ||
|
||
// If a reindex is attached to the data, channel values expressed as arrays will | ||
// be reindexed when the channels are instantiated. See exclusiveFacets. | ||
export const reindex = Symbol("reindex"); | ||
|
||
export function valueof(data, value, type) { | ||
const valueType = typeof value; | ||
return valueType === "string" | ||
? maybeTypedMap(data, field(value), type) | ||
? isArrowTable(data) | ||
? maybeTypedArrowify(data.getChild(value), type) | ||
: maybeTypedMap(data, field(value), type) | ||
: valueType === "function" | ||
? maybeTypedMap(data, value, type) | ||
: valueType === "number" || value instanceof Date || valueType === "boolean" | ||
|
@@ -29,21 +51,25 @@ function maybeTake(values, index) { | |
} | ||
|
||
function maybeTypedMap(data, f, type) { | ||
return map(data, type?.prototype instanceof TypedArray ? floater(f) : f, type); | ||
return map(data, isNumberType(type) ? (d, i) => coerceNumber(f(d, i)) : f, type); // allow conversion from BigInt | ||
mbostock marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
function maybeTypedArrayify(data, type) { | ||
return type === undefined | ||
? arrayify(data) // preserve undefined type | ||
: isArrowVector(data) | ||
? maybeTypedArrowify(data, type) | ||
: data instanceof type | ||
? data | ||
: type.prototype instanceof TypedArray && !(data instanceof TypedArray) | ||
? type.from(data, coerceNumber) | ||
: type.from(data); | ||
: type.from(data, isNumberType(type) && !isNumberArray(data) ? coerceNumber : undefined); | ||
} | ||
|
||
function floater(f) { | ||
return (d, i) => coerceNumber(f(d, i)); | ||
function maybeTypedArrowify(vector, type) { | ||
return vector == null | ||
? vector | ||
: (type === undefined || type === Array) && isArrowDateType(vector.type) | ||
? coerceDates(vector.toArray()) | ||
: maybeTypedArrayify(vector.toArray(), type); | ||
} | ||
|
||
export const singleton = [null]; // for data-less decoration marks, e.g. frame | ||
|
@@ -70,7 +96,7 @@ export function percentile(reduce) { | |
|
||
// If the values are specified as a typed array, no coercion is required. | ||
export function coerceNumbers(values) { | ||
return values instanceof TypedArray ? values : map(values, coerceNumber, Float64Array); | ||
return isNumberArray(values) ? values : map(values, coerceNumber, Float64Array); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (This fixes |
||
} | ||
|
||
// Unlike Mark’s number, here we want to convert null and undefined to NaN since | ||
|
@@ -95,7 +121,7 @@ export function coerceDate(x) { | |
? x | ||
: typeof x === "string" | ||
? isoParse(x) | ||
: x == null || isNaN((x = +x)) | ||
: x == null || isNaN((x = Number(x))) // allow conversion from BigInt | ||
? undefined | ||
: new Date(x); | ||
} | ||
|
@@ -130,9 +156,15 @@ export function keyword(input, name, allowed) { | |
return i; | ||
} | ||
|
||
// Like arrayify, but also allows data to be an Apache Arrow Table. | ||
export function dataify(data) { | ||
return isArrowTable(data) ? data : arrayify(data); | ||
} | ||
|
||
// Promotes the specified data to an array as needed. | ||
export function arrayify(values) { | ||
if (values == null || values instanceof Array || values instanceof TypedArray) return values; | ||
if (values == null || isArray(values)) return values; | ||
if (isArrowVector(values)) return maybeTypedArrowify(values); | ||
switch (values.type) { | ||
case "FeatureCollection": | ||
return values.features; | ||
|
@@ -233,22 +265,21 @@ export function maybeZ({z, fill, stroke} = {}) { | |
return z; | ||
} | ||
|
||
export function lengthof(data) { | ||
return isArray(data) ? data.length : data?.numRows; | ||
} | ||
|
||
// Returns a Uint32Array with elements [0, 1, 2, … data.length - 1]. | ||
export function range(data) { | ||
const n = data.length; | ||
const n = lengthof(data); | ||
const r = new Uint32Array(n); | ||
for (let i = 0; i < n; ++i) r[i] = i; | ||
return r; | ||
} | ||
|
||
// Returns a filtered range of data given the test function. | ||
export function where(data, test) { | ||
return range(data).filter((i) => test(data[i], i, data)); | ||
} | ||
mbostock marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// Returns an array [values[index[0]], values[index[1]], …]. | ||
export function take(values, index) { | ||
return map(index, (i) => values[i], values.constructor); | ||
return isArray(values) ? map(index, (i) => values[i], values.constructor) : map(index, (i) => values.at(i)); | ||
} | ||
|
||
// If f does not take exactly one argument, wraps it in a function that uses take. | ||
|
@@ -575,3 +606,30 @@ export function maybeClip(clip) { | |
else if (clip != null) clip = keyword(clip, "clip", ["frame", "sphere"]); | ||
return clip; | ||
} | ||
|
||
// https://github.com/observablehq/stdlib/blob/746ca2e69135df6178e4f3a17244def35d8d6b20/src/arrow.js#L4C1-L17C1 | ||
function isArrowTable(value) { | ||
return ( | ||
value && | ||
typeof value.getChild === "function" && | ||
typeof value.toArray === "function" && | ||
value.schema && | ||
Array.isArray(value.schema.fields) | ||
); | ||
} | ||
|
||
function isArrowVector(value) { | ||
return value && typeof value.toArray === "function" && value.type; | ||
} | ||
|
||
// Apache Arrow now represents dates as numbers. We currently only support | ||
// implicit coercion to JavaScript Date objects when the numbers represent | ||
// milliseconds since Unix epoch. | ||
function isArrowDateType(type) { | ||
return ( | ||
type && | ||
(type.typeId === 8 || // date | ||
type.typeId === 10) && // timestamp | ||
type.unit === 1 // millisecond | ||
); | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.