Skip to content

Commit 2f14442

Browse files
committed
pdf read
1 parent 1eec4f9 commit 2f14442

4 files changed

Lines changed: 89 additions & 19 deletions

File tree

package-lock.json

Lines changed: 30 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@
3333
"discord.js": "^14.16.3",
3434
"dotenv": "^16.4.5",
3535
"form-data": "^4.0.1",
36-
"openai": "^4.73.0"
36+
"openai": "^4.73.0",
37+
"pdf.js-extract": "^0.2.1"
3738
},
3839
"engines": {
3940
"node": ">=18.0.0"

utils/buildConversationLog.js

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import {
55
AI_NAME,
66
} from "../config.js";
77
import transcribeVoiceMessage from "./transcribeVoiceMessage.js";
8+
import extractFileContent from "./extractFileContent.js";
89

910
async function buildConversationLog(message, client) {
1011
const conversationLog = [
@@ -20,15 +21,11 @@ async function buildConversationLog(message, client) {
2021
const reversedMessages = Array.from(prevMessages.values()).reverse();
2122

2223
for (const msg of reversedMessages) {
23-
if (
24-
msg.content.startsWith("!") ||
25-
(msg.author.bot && msg.author.id !== client.user.id)
26-
) {
24+
if (msg.content.startsWith("!") || (msg.author.bot && msg.author.id !== client.user.id)) {
2725
continue;
2826
}
2927

3028
const role = msg.author.id === client.user.id ? "assistant" : "user";
31-
3229
conversationLog.push({
3330
role: role,
3431
content: msg.content,
@@ -50,25 +47,41 @@ async function buildConversationLog(message, client) {
5047

5148
if (message.attachments.size > 0) {
5249
console.log(`Message contains ${message.attachments.size} attachments.`);
53-
const imageAttachments = Array.from(message.attachments.values())
54-
.filter((attachment) => !attachment.name.endsWith(".ogg"))
55-
.map((attachment) => {
56-
console.log(`Image attachment URL: ${attachment.url}`);
57-
return {
58-
type: "image_url",
59-
image_url: { url: attachment.url },
60-
};
61-
});
50+
const attachmentPromises = Array.from(message.attachments.values()).map(async (attachment) => {
51+
if (attachment.name.endsWith('.ogg')) return null;
52+
53+
// Handle PDFs and text files
54+
if (attachment.name.endsWith('.pdf') || attachment.name.endsWith('.txt')) {
55+
try {
56+
const extractedText = await extractFileContent(attachment);
57+
return {
58+
type: "text",
59+
text: `Content from ${attachment.name}:\n${extractedText}`,
60+
};
61+
} catch (error) {
62+
console.error(`Error extracting content from ${attachment.name}:`, error);
63+
return null;
64+
}
65+
}
66+
67+
// Handle images
68+
return {
69+
type: "image_url",
70+
image_url: { url: attachment.url },
71+
};
72+
});
73+
74+
const processedAttachments = (await Promise.all(attachmentPromises)).filter(Boolean);
6275

63-
if (imageAttachments.length > 0) {
76+
if (processedAttachments.length > 0) {
6477
conversationLog.push({
6578
role: "user",
6679
content: [
6780
{
6881
type: "text",
6982
text: message.content,
7083
},
71-
...imageAttachments,
84+
...processedAttachments,
7285
],
7386
});
7487
}
@@ -105,4 +118,4 @@ async function handleVoiceMessage(message) {
105118
}
106119
}
107120

108-
export default buildConversationLog;
121+
export default buildConversationLog;

utils/extractFileContent.js

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import axios from 'axios';
2+
import { PDFExtract } from 'pdf.js-extract';
3+
4+
async function extractFileContent(attachment) {
5+
const response = await axios.get(attachment.url, { responseType: 'arraybuffer' });
6+
const buffer = Buffer.from(response.data);
7+
8+
if (attachment.name.endsWith('.pdf')) {
9+
try {
10+
const pdfExtract = new PDFExtract();
11+
const data = await pdfExtract.extractBuffer(buffer);
12+
const text = data.pages
13+
.map(page => page.content.map(item => item.str).join(' '))
14+
.join('\n\n');
15+
return text;
16+
} catch (error) {
17+
console.error('Error parsing PDF:', error);
18+
throw new Error('Failed to parse PDF file');
19+
}
20+
} else if (attachment.name.endsWith('.txt')) {
21+
return buffer.toString('utf-8');
22+
}
23+
24+
throw new Error('Unsupported file type');
25+
}
26+
27+
export default extractFileContent;

0 commit comments

Comments
 (0)