How can I get the top N objects per some aggregation?

I have an object type and I want to aggregate by a property (e.g. propertyA) and get the top N values for this property.

For instance, if I have an object type with 10 millions object instances with 5000 different values for a given “propertyA” how can I get the top N values for this properties (e.g. the most frequent ones) ?

You could use something like this. You would call getTopNObjectTypeABucketKeys(objects, "propertyA", "propertyThatWillBeIgnoredSinceWeCount", N, "count"). The implementation is quite generic and allows you to get the top N buckets of different variable-based aggregations. You would need one function per object type (eg getTopNObjectTypeABucketKeys), but all the underlying functions are reusable (hence the ObjectSet<ObjectTypeA | ObjectTypeB> parameter of getTopNBucketKeys)

import { Function, Integer, ITwoDimensionalAggregationStep, TwoDimensionalAggregation, BucketKey, ITerminalAggregationOperations, UserFacingError } from "@foundry/functions-api";
import { ObjectSet, ObjectType, ObjectTypeA, ObjectTypeB } from "@foundry/ontology-api";


export class MyFunctions {
    @Function()
    public async getTopNObjectTypeABucketKeys(objects: ObjectSet<ObjectTypeA>, groupByProperty: string, aggregationProperty: string, n: Integer, aggregationOperator: string): Promise<string[]> {
        this.checkPropertyApiNameIsValid(groupByProperty, ObjectTypeA);
        this.checkPropertyApiNameIsValid(aggregationProperty, ObjectTypeA);
        // Checking types could also be a good idea
        return this.getTopNBucketKeys(objects, groupByProperty, aggregationProperty, aggregationOperator, n);
    }

    private async getTopNBucketKeys(
        objects: ObjectSet<ObjectTypeA | ObjectTypeB>,
        groupByProperty: string, aggregationProperty: string, aggregationOperator: string, n: Integer
    ): Promise<string[]> {

        const grouping = objects.groupBy(el => (el as any)[groupByProperty].exactValues({maxBuckets: n}));
        const aggregation = await this.getAggregationPerOperator(grouping, aggregationProperty, aggregationOperator);
        const topNKeys = aggregation.buckets.sort((a, b) => b.value - a.value).slice(0, n).map(b => b.key.toString());

        return topNKeys;
    }

    private getAggregationPerOperator(grouping: ITwoDimensionalAggregationStep<any, any, BucketKey>, aggregationProperty: string, aggregationOperator: string): Promise<TwoDimensionalAggregation<BucketKey, number>> {
        const operators: (keyof ITerminalAggregationOperations<any, any>)[] = Object.values(AggregationOperator);

        switch(aggregationOperator) {
            case AggregationOperator.AVERAGE:
                return grouping.average(el => (el as any)[aggregationProperty]);
            case AggregationOperator.CARDINALITY:
                return grouping.cardinality(el => (el as any)[aggregationProperty]);
            case AggregationOperator.MAX:
                return grouping.max(el => (el as any)[aggregationProperty]);
            case AggregationOperator.MIN:
                return grouping.min(el => (el as any)[aggregationProperty]);
            case AggregationOperator.SUM:
                return grouping.sum(el => (el as any)[aggregationProperty]);
            case AggregationOperator.COUNT:
                return grouping.count();
            default:
                throw new UserFacingError(`"${aggregationOperator}" is not a valid aggregation operator. Valid operators are: ${operators}`);
        }
    }

    private checkPropertyApiNameIsValid(propertyApiName: string, objectType: ObjectType): void {
        const allPropertyApiNames = Object.values(objectType.properties).map(property => property.apiName);
        if (!allPropertyApiNames.includes(propertyApiName)) {
            throw new UserFacingError(`"${propertyApiName}" is not a valid property API name for object type ${objectType.apiName}. Valid names are: ${allPropertyApiNames.toString()}`);
        }
    }
}

enum AggregationOperator {
    AVERAGE = "average",
    CARDINALITY = "cardinality",
    MAX = "max",
    MIN = "min",
    SUM = "sum",
    COUNT = "count",
}