上传识别PDF
import { Document, Page, pdfjs } from 'react-pdf' //'react-pdf'
// 设置 PDF.js 的 workerSrc
pdfjs.GlobalWorkerOptions.workerSrc = '/api/pdf.worker'const PdfReader = ({
file,
pageCallback,
loadCallback,
}: {
file: any
pageCallback?: (arg?: any) => void
loadCallback?: (arg?: any) => void
}) => {
const [numPages, setNumPages] = useState(0)
const onDocumentLoadSuccess = (documentArgs: DocumentCallback) => {
const { numPages } = documentArgs || {}
setNumPages(numPages)
if (typeof loadCallback == `function`) {
loadCallback()
}
}
let tempCollection: PageContentCollection = []
const onPageTextLoadSuccess = (page: PageCallback) => {
page.getTextContent().then(textContent => {
tempCollection.push({
page: page.pageNumber,
contentList: textContent?.items,
})
if (tempCollection.length >= numPages && numPages > 0) {
// page Load Success
const longContentBlock = getLongContentFromPage(tempCollection)
const { longParagraph } = longContentBlock
if (longParagraph && typeof pageCallback == `function`) {
pageCallback(longParagraph)
}
}
})
}
return (
<div>
<Document file={file} onLoadSuccess={onDocumentLoadSuccess}>
{_.map(new Array(numPages), (el, index) => (
<div className="inside_carousel" key={`pdf_page_${index}`}>
<Page
key={`page_${index + 1}`}
pageNumber={index + 1}
renderTextLayer={false}
renderAnnotationLayer={false}
onLoadSuccess={page => onPageTextLoadSuccess(page)}
/>
<br />
</div>
))}
</Document>
</div>
)
}
最后更新于