I had to jump through some hoops to get the final dataset with embeddings out of Pipeline Builder.
I wanted to do the case text concatenation and embeddings of that text all in a single pipeline, but it’d fail with OOM or Module Unreachable errors even with the larger compute profiles.
So I built it without embeddings, then split the output by year, giving me 10 files.
I then generated embeddings for each years worth of data, each one taking 5-7 hours to build with large and native acceleration compute profiles applied. I then unioned the 10 outputs into a table of 1.6m rows with embeddings before trying and failing to use it to back an object in Ontology Manager.
This morning I’ve split that table into datasets for each of our product lines. The largest of these is ~750k and Ontology Manager has no issues with this. I can create an object with it no problem, takes around 20 minutes to build.
When I try creating an object in Ontology Manager with the total dataset as the backing data it fails either at the changelog step or the indexing step. Diagnostic logs and screenshots:-
{“objectType”:{“currentState”:“funnel-only-indexing”,“uid”:“fe2beba1-246d-4299-bfcf-b827a03b953c”,“rid”:“ri.ontology.main.object-type.fe2beba1-246d-4299-bfcf-b827a03b953c”,“ontologyBranchRid”:“ri.ontology.main.branch.98d10c18-87f0-466b-b473-61a5381e7ae0”,“defaultBranch”:true,“definition”:{“rid”:“ri.ontology.main.object-type.fe2beba1-246d-4299-bfcf-b827a03b953c”,“ontologyVersion”:“00000014-a661-caf0-8a80-796a9a415190”,“datasources”:{“ri.ontology.main.datasource.0c5134de-151a-48c6-88d0-103c19f688b8”:“ri.foundry.main.dataset.c5d6c911-5cc2-4fb1-ab1c-9608b3644cb9”},“acceptsPatches”:false,“dbs”:{“ri.highbury.main.cluster.1”:“highbury”},“migrations”:{“transitions”:[],“target”:1},“indexingConfig”:“FunnelOnly”},“activePipeline”:{“type”:“batch”,“batch”:“1b7ddcc6-89cd-4dd5-a701-8ab6d7f8817b”},“replacementPipeline”:{“type”:“batch”,“batch”:“b95d37e5-370f-47ca-a5a6-f3e117eac2b3”},“v2”:true},“batchPipelines”:[{“currentState”:“await-datasource-changelogs-ready”,“pipelineId”:“1b7ddcc6-89cd-4dd5-a701-8ab6d7f8817b”,“objectTypeUid”:“fe2beba1-246d-4299-bfcf-b827a03b953c”,“changelogs”:[“a0a66503-568a-4386-afba-46d2a98cf5f0”],“ontologyVersion”:“00000014-a622-6d4a-af73-6af87c9b1e61”},{“currentState”:“await-page-ready-v2”,“pipelineId”:“b95d37e5-370f-47ca-a5a6-f3e117eac2b3”,“objectTypeUid”:“fe2beba1-246d-4299-bfcf-b827a03b953c”,“changelogs”:[“b9dcf3c3-7f8c-4481-886d-300e62714532”],“ontologyVersion”:“00000014-a661-caf0-8a80-796a9a415190”,“page”:“b227cfc4-6f82-4224-9179-0d2eda638ff2”}],“changelogs”:[{“currentState”:“up-to-date”,“changelogId”:“b9dcf3c3-7f8c-4481-886d-300e62714532”,“pipelineId”:“b95d37e5-370f-47ca-a5a6-f3e117eac2b3”,“objectTypeRid”:“ri.ontology.main.object-type.fe2beba1-246d-4299-bfcf-b827a03b953c”,“objectTypeUid”:“fe2beba1-246d-4299-bfcf-b827a03b953c”,“datasourceRid”:“ri.ontology.main.datasource.0c5134de-151a-48c6-88d0-103c19f688b8”,“datasourceLocatorRid”:“ri.foundry.main.dataset.c5d6c911-5cc2-4fb1-ab1c-9608b3644cb9”,“changelog”:“ri.foundry.main.dataset.1247e33d-19aa-45d0-bf1c-0141276f0d6b”,“snapshot”:“ri.foundry.main.dataset.08b32043-b1ae-48c5-8137-f90e592dd0ee”,“awaitingJobRid”:null,“transformResourcesDiagnostic”:{“inferredTransformProfile”:“SMALL”,“profileConfig”:{“type”:“automatic”,“automatic”:{}},“profileOverride”:null,“upscaling”:false,“bucketingSpec”:null}},{“currentState”:“build-failure-backoff”,“changelogId”:“a0a66503-568a-4386-afba-46d2a98cf5f0”,“pipelineId”:“1b7ddcc6-89cd-4dd5-a701-8ab6d7f8817b”,“objectTypeRid”:“ri.ontology.main.object-type.fe2beba1-246d-4299-bfcf-b827a03b953c”,“objectTypeUid”:“fe2beba1-246d-4299-bfcf-b827a03b953c”,“datasourceRid”:“ri.ontology.main.datasource.ed0c2bdc-72d2-4e4d-a648-d8d68f1c4d19”,“datasourceLocatorRid”:“ri.foundry.main.dataset.09981206-63a8-4129-a8a4-74379d46f011”,“changelog”:“ri.foundry.main.dataset.707f83f0-1f05-48d0-9d77-c237eceb011e”,“snapshot”:“ri.foundry.main.dataset.e9f2c67a-5157-4fa8-a554-b083f830d6e6”,“awaitingJobRid”:null,“transformResourcesDiagnostic”:{“inferredTransformProfile”:“EXTRA_SMALL”,“profileConfig”:{“type”:“automatic”,“automatic”:{}},“profileOverride”:null,“upscaling”:false,“bucketingSpec”:null}}],“pages”:[{“currentState”:“await-build”,“pageId”:“b227cfc4-6f82-4224-9179-0d2eda638ff2”,“pipelineId”:“b95d37e5-370f-47ca-a5a6-f3e117eac2b3”,“objectTypeRid”:“ri.ontology.main.object-type.fe2beba1-246d-4299-bfcf-b827a03b953c”,“objectTypeUid”:“fe2beba1-246d-4299-bfcf-b827a03b953c”,“changelogInputs”:[“b9dcf3c3-7f8c-4481-886d-300e62714532”],“pageChangelog”:“ri.foundry.main.dataset.9f6c111a-00a2-4eed-b751-108dc259cacf”,“pageSnapshot”:“ri.foundry.main.dataset.c557197b-7bf9-4e3d-9569-2f264634f5f0”,“pageDatasetHasPatches”:false,“patchedBaseVersions”:{},“additionalInput”:{},“usesOldBaseVersionFormat”:false,“patchOffsetReused”:true,“awaitingJobRid”:“ri.foundry.main.job.31fe85ad-01a7-4920-9457-29de8d848a6f”,“includesPatchOffsetColumn”:true,“transformResourcesDiagnostic”:{“inferredTransformProfile”:“EXTRA_SMALL”,“profileConfig”:{“type”:“automatic”,“automatic”:{}},“profileOverride”:null,“upscaling”:false,“bucketingSpec”:null}}]}
{“objectType”:{“currentState”:“funnel-only-indexing”,“uid”:“fe2beba1-246d-4299-bfcf-b827a03b953c”,“rid”:“ri.ontology.main.object-type.fe2beba1-246d-4299-bfcf-b827a03b953c”,“ontologyBranchRid”:“ri.ontology.main.branch.98d10c18-87f0-466b-b473-61a5381e7ae0”,“defaultBranch”:true,“definition”:{“rid”:“ri.ontology.main.object-type.fe2beba1-246d-4299-bfcf-b827a03b953c”,“ontologyVersion”:“00000014-a661-caf0-8a80-796a9a415190”,“datasources”:{“ri.ontology.main.datasource.0c5134de-151a-48c6-88d0-103c19f688b8”:“ri.foundry.main.dataset.c5d6c911-5cc2-4fb1-ab1c-9608b3644cb9”},“acceptsPatches”:false,“dbs”:{“ri.highbury.main.cluster.1”:“highbury”},“migrations”:{“transitions”:[],“target”:1},“indexingConfig”:“FunnelOnly”},“activePipeline”:{“type”:“batch”,“batch”:“1b7ddcc6-89cd-4dd5-a701-8ab6d7f8817b”},“replacementPipeline”:{“type”:“batch”,“batch”:“b95d37e5-370f-47ca-a5a6-f3e117eac2b3”},“v2”:true},“batchPipelines”:[{“currentState”:“await-datasource-changelogs-ready”,“pipelineId”:“1b7ddcc6-89cd-4dd5-a701-8ab6d7f8817b”,“objectTypeUid”:“fe2beba1-246d-4299-bfcf-b827a03b953c”,“changelogs”:[“a0a66503-568a-4386-afba-46d2a98cf5f0”],“ontologyVersion”:“00000014-a622-6d4a-af73-6af87c9b1e61”},{“currentState”:“await-initial-syncs-ready-v2”,“pipelineId”:“b95d37e5-370f-47ca-a5a6-f3e117eac2b3”,“objectTypeUid”:“fe2beba1-246d-4299-bfcf-b827a03b953c”,“changelogs”:[“b9dcf3c3-7f8c-4481-886d-300e62714532”],“ontologyVersion”:“00000014-a661-caf0-8a80-796a9a415190”,“page”:“b227cfc4-6f82-4224-9179-0d2eda638ff2”,“persistentSyncs”:{“452a231c-1673-4441-9470-24967046f0af”:“BOOTSTRAPPING”},“nextBaseVersion”:“10000000-0000-0002-0000-000000000001”,“baseVersionAcked”:false}],“changelogs”:[{“currentState”:“up-to-date”,“changelogId”:“b9dcf3c3-7f8c-4481-886d-300e62714532”,“pipelineId”:“b95d37e5-370f-47ca-a5a6-f3e117eac2b3”,“objectTypeRid”:“ri.ontology.main.object-type.fe2beba1-246d-4299-bfcf-b827a03b953c”,“objectTypeUid”:“fe2beba1-246d-4299-bfcf-b827a03b953c”,“datasourceRid”:“ri.ontology.main.datasource.0c5134de-151a-48c6-88d0-103c19f688b8”,“datasourceLocatorRid”:“ri.foundry.main.dataset.c5d6c911-5cc2-4fb1-ab1c-9608b3644cb9”,“changelog”:“ri.foundry.main.dataset.1247e33d-19aa-45d0-bf1c-0141276f0d6b”,“snapshot”:“ri.foundry.main.dataset.08b32043-b1ae-48c5-8137-f90e592dd0ee”,“awaitingJobRid”:null,“transformResourcesDiagnostic”:{“inferredTransformProfile”:“SMALL”,“profileConfig”:{“type”:“automatic”,“automatic”:{}},“profileOverride”:null,“upscaling”:false,“bucketingSpec”:null}},{“currentState”:“build-failure-backoff”,“changelogId”:“a0a66503-568a-4386-afba-46d2a98cf5f0”,“pipelineId”:“1b7ddcc6-89cd-4dd5-a701-8ab6d7f8817b”,“objectTypeRid”:“ri.ontology.main.object-type.fe2beba1-246d-4299-bfcf-b827a03b953c”,“objectTypeUid”:“fe2beba1-246d-4299-bfcf-b827a03b953c”,“datasourceRid”:“ri.ontology.main.datasource.ed0c2bdc-72d2-4e4d-a648-d8d68f1c4d19”,“datasourceLocatorRid”:“ri.foundry.main.dataset.09981206-63a8-4129-a8a4-74379d46f011”,“changelog”:“ri.foundry.main.dataset.707f83f0-1f05-48d0-9d77-c237eceb011e”,“snapshot”:“ri.foundry.main.dataset.e9f2c67a-5157-4fa8-a554-b083f830d6e6”,“awaitingJobRid”:null,“transformResourcesDiagnostic”:{“inferredTransformProfile”:“EXTRA_SMALL”,“profileConfig”:{“type”:“automatic”,“automatic”:{}},“profileOverride”:null,“upscaling”:false,“bucketingSpec”:null}}],“pages”:[{“currentState”:“up-to-date”,“pageId”:“b227cfc4-6f82-4224-9179-0d2eda638ff2”,“pipelineId”:“b95d37e5-370f-47ca-a5a6-f3e117eac2b3”,“objectTypeRid”:“ri.ontology.main.object-type.fe2beba1-246d-4299-bfcf-b827a03b953c”,“objectTypeUid”:“fe2beba1-246d-4299-bfcf-b827a03b953c”,“changelogInputs”:[“b9dcf3c3-7f8c-4481-886d-300e62714532”],“pageChangelog”:“ri.foundry.main.dataset.9f6c111a-00a2-4eed-b751-108dc259cacf”,“pageSnapshot”:“ri.foundry.main.dataset.c557197b-7bf9-4e3d-9569-2f264634f5f0”,“pageDatasetHasPatches”:false,“patchedBaseVersions”:{},“additionalInput”:{},“usesOldBaseVersionFormat”:false,“patchOffsetReused”:true,“includesPatchOffsetColumn”:true,“transformResourcesDiagnostic”:{“inferredTransformProfile”:“EXTRA_SMALL”,“profileConfig”:{“type”:“automatic”,“automatic”:{}},“profileOverride”:null,“upscaling”:false,“bucketingSpec”:null}}],“persistentSyncs”:[{“currentState”:“up-to-date”,“persistentSyncId”:“452a231c-1673-4441-9470-24967046f0af”,“pageId”:“b227cfc4-6f82-4224-9179-0d2eda638ff2”,“pipelineId”:“b95d37e5-370f-47ca-a5a6-f3e117eac2b3”,“objectTypeUid”:“fe2beba1-246d-4299-bfcf-b827a03b953c”,“dbType”:“highbury”,“dbRid”:“ri.highbury.main.cluster.1”,“activeSyncId”:“d245a8be-b702-44f9-bbc1-06fb85aa23c7”,“backgroundSyncId”:null}],“syncs”:[{“currentState”:“sync-failure-backoff-v2”,“syncId”:“d245a8be-b702-44f9-bbc1-06fb85aa23c7”,“persistentSyncId”:“452a231c-1673-4441-9470-24967046f0af”,“objectTypeUid”:“fe2beba1-246d-4299-bfcf-b827a03b953c”,“pageId”:“b227cfc4-6f82-4224-9179-0d2eda638ff2”,“nextBaseVersion”:“10000000-0000-0002-0000-000000000001”,“currentOntologyVersion”:“00000014-a661-caf0-8a80-796a9a415190”,“ackedOntologyVersion”:“00000014-a661-caf0-8a80-796a9a415190”,“dbType”:“highbury”,“dbRid”:“ri.highbury.main.cluster.1”,“transformResourcesDiagnostic”:{“inferredTransformProfile”:“HIGHBURY_OPTIMIZED”,“profileConfig”:{“type”:“automatic”,“automatic”:{}},“profileOverride”:null,“upscaling”:false,“bucketingSpec”:null},“targetBaseVersions”:[“10000000-0000-0002-0000-000000000001”]}]}