I’ve attempted to parse PDFs using libraries like pdf-parse
, pdf-lib
, pdfjs-dist
, and pdf2json
. Although there are no errors in my code, I encounter a compilation error with the following result.
Does anybody knows the way of extracting text from PDF file with typescript?
{
“errorCode”: “INVALID_ARGUMENT”,
“errorInstanceId”: “64fd8be1-c243-4308-9a12-7bfcdd073cb8”,
“errorName”: “Functions:CompileFailed”,
“parameters”: {
“stdout”: “[object Object]\n[object Object]\n[object Object]\n[object Object]”,
“stderr”: “”
}
}
After logging, I found that the error occurs in the following code line:
const data = await pdfParse(buffer);
mport { Attachment, Function, Integer } from "@foundry/functions-api";
import * as pdfParse from "pdf-parse";
import { isUint8Array } from "util/types";
export class PdfToTextConverter {
@Function()
public async convertPdfToText(attachment: Attachment): Promise<Integer> {
try {
// Read the PDF file from the attachment
const blob = await attachment.readAsync();
// Convert the Blob to an ArrayBuffer
const arrayBuffer = await blob.arrayBuffer();
console.log(arrayBuffer);
const buffer = Buffer.from(arrayBuffer);
console.log(buffer);
// Use pdf-parse to extract text from the PDF
const data = await pdfParse(buffer);
console.log(data);
// The extracted text is in the 'text' property of the data object
const extractedText = data.text;
console.log(extractedText);
return arrayBuffer.byteLength;
} catch (error) {
console.error("Error converting PDF to text:", error);
throw new Error("Failed to convert PDF to text");
}
}
}