PDF → Document
Extract and convert PDF content into your desired format.
text_snippet TXT article Word code Markdown html HTML text_fields RTF menu_book EPUB table_rows CSV table_chart Excel data_object JSON data_array XML polyline SVG
picture_as_pdf

Drop a PDF file here

Or click to browse · PDF files only

tune Options

Page Range
preview Extracted Content Preview
Processing…
`; dlText(html, baseName + '.html', 'text/html'); } function exportRtf(text) { const escaped = text.replace(/\\/g,'\\\\').replace(/\{/g,'\\{').replace(/\}/g,'\\}').replace(/\n/g,'\\par\n'); const rtf = `{\\rtf1\\ansi\\deff0{\\fonttbl{\\f0 Times New Roman;}}\\f0\\fs24 ${escaped}}`; dlText(rtf, baseName + '.rtf', 'application/rtf'); } function exportJson(texts) { const obj = { source: baseName, pageCount: texts.length, pages: texts.map((t, i) => ({ page: i+1, text: t })) }; dlText(JSON.stringify(obj, null, 2), baseName + '.json', 'application/json'); } function exportXml(texts) { const pages = texts.map((t, i) => ` ${escHtml(t)}`).join('\n'); const xml = `\n\n${pages}\n`; dlText(xml, baseName + '.xml', 'application/xml'); } function exportCsv(texts) { // Simple table: split by whitespace, detect rows by line const rows = [['Page', 'Text']]; texts.forEach((t, i) => rows.push([String(i+1), t.replace(/"/g,'""')])); const csv = rows.map(r => r.map(c => `"${c}"`).join(',')).join('\n'); dlText(csv, baseName + '.csv', 'text/csv'); } async function exportXlsx(texts) { const sheetName = document.getElementById('opt-sheet').value || 'Sheet1'; const wb = XLSX.utils.book_new(); const data = [['Page', 'Content']]; texts.forEach((t, i) => data.push([i+1, t])); const ws = XLSX.utils.aoa_to_sheet(data); ws['!cols'] = [{ wch: 8 }, { wch: 100 }]; XLSX.utils.book_append_sheet(wb, ws, sheetName.slice(0, 31)); XLSX.writeFile(wb, baseName + '.xlsx'); } async function exportDocx(texts) { const { Document, Packer, Paragraph, TextRun, HeadingLevel } = docx; const children = []; texts.forEach((t, i) => { children.push(new Paragraph({ text: `Page ${i+1}`, heading: HeadingLevel.HEADING_2 })); t.split(/\n+/).forEach(line => { if (line.trim()) children.push(new Paragraph({ children: [new TextRun(line.trim())] })); }); children.push(new Paragraph({})); }); const doc = new Document({ sections: [{ children }] }); const blob = await Packer.toBlob(doc); dl(blob, baseName + '.docx'); } async function exportEpub(texts) { const zip = new JSZip(); zip.file('mimetype', 'application/epub+zip'); const meta = zip.folder('META-INF'); meta.file('container.xml', ``); const oebps = zip.folder('OEBPS'); const itemrefs = texts.map((_, i) => ``).join(''); const items = texts.map((_, i) => ``).join(''); oebps.file('content.opf', `${escHtml(baseName)}en${items}${itemrefs}`); texts.forEach((t, i) => { oebps.file(`page${i+1}.xhtml`, `Page ${i+1}

Page ${i+1}

${escHtml(t).replace(/\n/g,'

')}

`); }); const blob = await zip.generateAsync({ type: 'blob', mimeType: 'application/epub+zip' }); dl(blob, baseName + '.epub'); } async function exportSvg(rangeStr) { const rangeObj = getTextsForRange(rangeStr); const allPages = !rangeStr.trim() ? Array.from({ length: pdfDoc.numPages }, (_, i) => i + 1) : (() => { const s = new Set(); rangeStr.split(',').forEach(part => { const t = part.trim(); if (t.includes('-')) { const [a,b]=t.split('-').map(Number); for(let i=Math.max(1,a);i<=Math.min(b,pdfDoc.numPages);i++) s.add(i); } else { const n=parseInt(t); if(n>=1&&n<=pdfDoc.numPages) s.add(n); } }); return [...s].sort((a,b)=>a-b); })(); if (allPages.length === 1) { const svgText = await pageToSvg(allPages[0]); dlText(svgText, baseName + '_page1.svg', 'image/svg+xml'); } else { const zip = new JSZip(); for (const p of allPages) { const svgText = await pageToSvg(p); zip.file(`${baseName}_page${String(p).padStart(3,'0')}.svg`, svgText); } const blob = await zip.generateAsync({ type: 'blob' }); dl(blob, baseName + '_svg.zip'); } } async function pageToSvg(pageNum) { const page = await pdfDoc.getPage(pageNum); const vp = page.getViewport({ scale: 2 }); const canvas = document.createElement('canvas'); canvas.width = vp.width; canvas.height = vp.height; const ctx = canvas.getContext('2d'); ctx.fillStyle = '#fff'; ctx.fillRect(0,0,canvas.width,canvas.height); await page.render({ canvasContext: ctx, viewport: vp }).promise; const dataUrl = canvas.toDataURL('image/png'); return ``; } function escHtml(s) { return String(s).replace(/&/g,'&').replace(//g,'>').replace(/"/g,'"'); } function showToast(msg, type = '') { const t = document.getElementById('toast'); t.textContent = msg; t.className = `toast ${type}`; void t.offsetWidth; t.classList.add('show'); setTimeout(() => t.classList.remove('show'), 3500); }