fix: fix tesseract.js progress bar (#1599)

* fix: fix tesseract.js progress bar

* fixup
This commit is contained in:
Nolan Lawson 2019-10-23 18:07:24 -07:00 committed by GitHub
parent 2a248cb482
commit c174c19ddd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 40 additions and 28 deletions

View File

@ -2,24 +2,49 @@ import { importTesseractWorker } from '../_utils/asyncModules'
const DESTROY_WORKER_DELAY = 300000 // 5 minutes const DESTROY_WORKER_DELAY = 300000 // 5 minutes
// TODO: it's flaky to try to estimate tesseract's total progress this way
const steps = [
{ status: 'loading tesseract core', proportion: 0.05 },
{ status: 'initializing tesseract', proportion: 0.05 },
{ status: 'loading language traineddata', proportion: 0.1 },
{ status: 'initializing api', proportion: 0.2 },
{ status: 'recognizing text', proportion: 0.6 }
]
let worker let worker
let destroyWorkerHandle let destroyWorkerHandle
async function initWorker () { // TODO: it seems hacky that we have to spy on the tesseract worker to figure out its progress
const steps = [
{ status: 'loading tesseract core', proportion: 0.1 },
{ status: 'initializing tesseract', proportion: 0.05 },
{ status: 'loading language traineddata', proportion: 0.1 },
{ status: 'initializing api', proportion: 0.2 },
{ status: 'recognizing text', proportion: 0.55 }
]
if (process.env.NODE_ENV !== 'production') {
if (steps.map(_ => _.proportion).reduce((a, b) => a + b, 0) !== 1) {
console.error('Steps do not add up to 1! You should probably fix this.')
}
}
async function spyOnWorkerProgress (onProgress, runnable) {
const listener = event => {
const { data } = event
if (onProgress && data.status === 'progress' && steps.find(({ status }) => status === data.data.status)) {
onProgress(getTotalProgress(data.data))
}
}
worker.worker.addEventListener('message', listener)
try {
const res = await runnable()
return res
} finally {
worker.worker.removeEventListener('message', listener)
}
}
async function initWorker (onProgress) {
if (!worker) { if (!worker) {
worker = (await importTesseractWorker())() worker = (await importTesseractWorker())()
await worker.load() await spyOnWorkerProgress(onProgress, async () => {
await worker.loadLanguage('eng') await worker.load()
await worker.initialize('eng') await worker.loadLanguage('eng')
await worker.initialize('eng')
})
} }
} }
@ -55,25 +80,12 @@ function getTotalProgress (progressInfo) {
} }
async function recognize (url, onProgress) { async function recognize (url, onProgress) {
// TODO: it seems hacky that we have to spy on the tesseract worker to figure out its progress return spyOnWorkerProgress(onProgress, () => worker.recognize(url, 'eng'))
const listener = event => {
const { data } = event
if (onProgress && data.status === 'progress' && steps.find(({ status }) => status === data.data.status)) {
onProgress(getTotalProgress(data.data))
}
}
worker.worker.addEventListener('message', listener)
try {
const res = await worker.recognize(url, 'eng')
return res
} finally {
worker.worker.removeEventListener('message', listener)
}
} }
export async function runTesseract (url, onProgress) { export async function runTesseract (url, onProgress) {
cancelDestroyWorker() cancelDestroyWorker()
await initWorker() await initWorker(onProgress)
try { try {
const res = await recognize(url, onProgress) const res = await recognize(url, onProgress)
console.log('result', res) console.log('result', res)