Skip to content

Commit 95f3787

Browse files
authored
feat: configurable limit for sample values COMPASS-8984 (#230)
1 parent d31950c commit 95f3787

File tree

3 files changed

+31
-14
lines changed

3 files changed

+31
-14
lines changed

src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ async function analyzeDocuments(
3737
*/
3838
async function parseSchema(
3939
source: AnyIterable,
40-
options?: SchemaParseOptions
40+
options?: Partial<SchemaParseOptions>
4141
): Promise<InternalSchema> {
4242
return (await getCompletedSchemaAnalyzer(source, options)).getResult();
4343
}

src/schema-analyzer.ts

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -162,10 +162,18 @@ type SemanticTypeMap = {
162162
[typeName: string]: SemanticTypeFunction | boolean;
163163
};
164164

165-
export type SchemaParseOptions = {
166-
semanticTypes?: boolean | SemanticTypeMap;
167-
storeValues?: boolean;
165+
type AllSchemaParseOptions = {
166+
semanticTypes: boolean | SemanticTypeMap;
167+
storeValues: boolean;
168168
signal?: AbortSignal;
169+
storedValuesLengthLimit: number;
170+
};
171+
export type SchemaParseOptions = Partial<AllSchemaParseOptions>;
172+
173+
const defaultSchemaParseOptions: AllSchemaParseOptions = {
174+
semanticTypes: false,
175+
storeValues: true,
176+
storedValuesLengthLimit: 10000
169177
};
170178

171179
/**
@@ -331,25 +339,25 @@ function simplifiedSchema(fields: SchemaAnalysisFieldsMap): SimplifiedSchema {
331339

332340
function cropString(value: string, limit: number) {
333341
if (limit < 1) return '';
334-
return value.charCodeAt(limit - 1) === value.codePointAt(10000 - 1)
342+
return value.charCodeAt(limit - 1) === value.codePointAt(limit - 1)
335343
? value.slice(0, limit)
336344
: value.slice(0, limit - 1);
337345
}
338346

339-
function getCappedValue(bsonType: SchemaBSONType, value: BSONValue) {
347+
function getCappedValue(bsonType: SchemaBSONType, value: BSONValue, limit: number) {
340348
if (bsonType === 'String') {
341-
return cropString(value as string, 10000);
349+
return cropString(value as string, limit);
342350
}
343351
if (bsonType === 'Binary') {
344352
value = value as Binary;
345-
return value.buffer.length > 10000
346-
? new Binary(value.buffer.slice(0, 10000), value.sub_type)
353+
return value.buffer.length > limit
354+
? new Binary(value.buffer.slice(0, limit), value.sub_type)
347355
: value;
348356
}
349357
if (bsonType === 'Code') {
350358
value = value as Code;
351-
return (value.code.length >= 10000)
352-
? new Code(cropString(value.code, 10000), value.scope)
359+
return (value.code.length >= limit)
360+
? new Code(cropString(value.code, limit), value.scope)
353361
: value;
354362
}
355363
return value;
@@ -459,7 +467,7 @@ function finalizeSchema(schemaAnalysis: SchemaAnalysisRoot): SchemaField[] {
459467

460468
export class SchemaAnalyzer {
461469
semanticTypes: SemanticTypeMap;
462-
options: SchemaParseOptions;
470+
options: AllSchemaParseOptions;
463471
documentsAnalyzed = 0;
464472
schemaAnalysisRoot: SchemaAnalysisRoot = {
465473
fields: Object.create(null),
@@ -474,7 +482,7 @@ export class SchemaAnalyzer {
474482

475483
constructor(options?: SchemaParseOptions) {
476484
// Set default options.
477-
this.options = { semanticTypes: false, storeValues: true, ...options };
485+
this.options = { ...defaultSchemaParseOptions, ...options };
478486

479487
this.semanticTypes = {
480488
...semanticTypes
@@ -555,7 +563,7 @@ export class SchemaAnalyzer {
555563
}
556564

557565
type.values.pushSome(
558-
getCappedValue(type.bsonType, value)
566+
getCappedValue(type.bsonType, value, this.options.storedValuesLengthLimit)
559567
);
560568
}
561569
};

test/bloated.test.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,13 @@ describe('bloated documents', function() {
4141
assert.ok(binary.length() <= 10000);
4242
assert.strictEqual(binary.sub_type, 2);
4343
});
44+
45+
it('the limit is configurable', async function() {
46+
const documents = [{
47+
str: generateRandomString(20000)
48+
}];
49+
const schema = await getSchema(documents, { storedValuesLengthLimit: 5 });
50+
const stringLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as string).length;
51+
assert.ok(stringLength === 5);
52+
});
4453
});

0 commit comments

Comments
 (0)