fix: prefer local file URLs for OCR (#1436)
This commit is contained in:
parent
cb12e05584
commit
e2c137b2ef
|
@ -2,6 +2,7 @@ import { store } from '../_store/store'
|
|||
import { uploadMedia } from '../_api/media'
|
||||
import { toast } from '../_components/toast/toast'
|
||||
import { scheduleIdleTask } from '../_utils/scheduleIdleTask'
|
||||
import { mediaUploadFileCache } from '../_utils/mediaUploadFileCache'
|
||||
|
||||
export async function doMediaUpload (realm, file) {
|
||||
const { currentInstance, accessToken } = store.get()
|
||||
|
@ -12,6 +13,7 @@ export async function doMediaUpload (realm, file) {
|
|||
if (composeMedia.length === 4) {
|
||||
throw new Error('Only 4 media max are allowed')
|
||||
}
|
||||
mediaUploadFileCache.set(response.id, file)
|
||||
composeMedia.push({
|
||||
data: response,
|
||||
file: { name: file.name },
|
||||
|
|
|
@ -98,6 +98,7 @@
|
|||
import { runTesseract } from '../../../_utils/runTesseract'
|
||||
import SvgIcon from '../../SvgIcon.html'
|
||||
import { toast } from '../../toast/toast'
|
||||
import { mediaUploadFileCache } from '../../../_utils/mediaUploadFileCache'
|
||||
|
||||
const updateRawTextInStore = throttleTimer(requestPostAnimationFrame)
|
||||
|
||||
|
@ -119,7 +120,8 @@
|
|||
computed: {
|
||||
length: ({ rawText }) => length(rawText || ''),
|
||||
overLimit: ({ mediaAltCharLimit, length }) => length > mediaAltCharLimit,
|
||||
url: ({ media, index }) => get(media, [index, 'data', 'url'])
|
||||
url: ({ media, index }) => get(media, [index, 'data', 'url']),
|
||||
mediaId: ({ media, index }) => get(media, [index, 'data', 'id'])
|
||||
},
|
||||
methods: {
|
||||
observe,
|
||||
|
@ -165,8 +167,19 @@
|
|||
async onClick () {
|
||||
this.set({ extracting: true })
|
||||
try {
|
||||
const { url } = this.get()
|
||||
const text = await runTesseract(url)
|
||||
const { url, mediaId } = this.get()
|
||||
const file = mediaUploadFileCache.get(mediaId)
|
||||
let text
|
||||
if (file) { // Avoid downloading from the network a file that the user *just* uploaded
|
||||
const fileUrl = URL.createObjectURL(file)
|
||||
try {
|
||||
text = await runTesseract(fileUrl)
|
||||
} finally {
|
||||
URL.revokeObjectURL(fileUrl)
|
||||
}
|
||||
} else {
|
||||
text = await runTesseract(url)
|
||||
}
|
||||
const { media, index, realm } = this.get()
|
||||
if (media[index].description !== text) {
|
||||
media[index].description = text
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
// keep a cache of files for the most recent uploads to avoid
|
||||
// re-downloading them for OCR
|
||||
|
||||
import { QuickLRU } from '../_thirdparty/quick-lru/quick-lru'
|
||||
|
||||
export const mediaUploadFileCache = new QuickLRU({ maxSize: 4 })
|
|
@ -1,6 +1,6 @@
|
|||
import { importTesseractWorker } from '../_utils/asyncModules'
|
||||
|
||||
export async function runTesseract (image) {
|
||||
export async function runTesseract (url) {
|
||||
const worker = await importTesseractWorker()
|
||||
|
||||
// TODO: have to trick tesseract into not creating a blob URL because that would break our CSP
|
||||
|
@ -9,7 +9,7 @@ export async function runTesseract (image) {
|
|||
const OldBlob = window.Blob
|
||||
window.Blob = null
|
||||
try {
|
||||
promise = worker.recognize(image)
|
||||
promise = worker.recognize(url)
|
||||
} finally {
|
||||
window.Blob = OldBlob
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue