I've been using the function attached below for over a year and it worked perfectly. However, 2 days ago, something changed and it stopped converting Polish characters in multiple installations. I have tried the iso function based on ISO 639-1 code, and there is no difference.
Each time it runs, it skips polish letters. Do you have any idea what can be changed to still read the Polish language?
An example PDF file is available here to download: https://file.io/HOfUPv2oHZFK
/** * Convert pdf file (blob) to a text file on Drive, using built-in OCR. * By default, the text file will be placed in the root folder, with the same * name as source pdf (but extension 'txt'). Options: * keepPdf (boolean, default false) Keep a copy of the original PDF file. * keepGdoc (boolean, default false) Keep a copy of the OCR Google Doc file. * keepTextfile (boolean, default true) Keep a copy of the text file. * path (string, default blank) Folder path to store file(s) in. * ocrLanguage (ISO 639-1 code) Default 'en'. * textResult (boolean, default false) If true and keepTextfile true, return * string of text content. If keepTextfile * is false, text content is returned without * regard to this option. Otherwise, return * id of textfile. * * @param {blob} pdfFile Blob containing pdf file * @param {object} options (Optional) Object specifying handling details * * @returns {string} id of text file (default) or text content */ function pdfToText ( pdfFile, options ) { // Ensure Advanced Drive Service is enabled try { Drive.Files.list(); } catch (e) { throw new Error( "To use pdfToText(), first enable 'Drive API' in Resources > Advanced Google Services." ); } // Set default options options = options || {}; options.keepTextfile = options.hasOwnProperty("keepTextfile") ? options.keepTextfile : true; // Prepare resource object for file creation var parents = []; if (options.path) { parents.push( getDriveFolderFromPath (options.path) ); } var pdfName = pdfFile.getName(); var resource = { title: pdfName, mimeType: pdfFile.getContentType(), parents: parents }; // Save PDF to Drive, if requested if (options.keepPdf) { var file = Drive.Files.insert(resource, pdfFile); } Logger.log(resource) // Save PDF as GDOC resource.title = pdfName.replace(/pdf$/, 'gdoc'); var insertOpts = { ocr: true, ocrLanguage: options.ocrLanguage || 'pl' } var gdocFile = Drive.Files.insert(resource, pdfFile, insertOpts); // Get text from GDOC var gdocDoc = DocumentApp.openById(gdocFile.id); var text = gdocDoc.getBody().getText(); Logger.log(text) // We're done using the Gdoc. Unless requested to keepGdoc, delete it. if (!options.keepGdoc) { Drive.Files.remove(gdocFile.id); } // Save text file, if requested if (options.keepTextfile) { resource.title = pdfName.replace(/pdf$/, 'txt'); resource.mimeType = MimeType.PLAIN_TEXT; var textBlob = Utilities.newBlob(text, MimeType.PLAIN_TEXT, resource.title); var textFile = Drive.Files.insert(resource, textBlob); } // Return result of conversion if (!options.keepTextfile || options.textResult) { return text; } else { return textFile.id } } // Helper utility from http://ramblings.mcpher.com/Home/excelquirks/gooscript/driveapppathfolder function getDriveFolderFromPath (path) { return (path || "/").split("/").reduce ( function(prev,current) { if (prev && current) { var fldrs = prev.getFoldersByName(current); return fldrs.hasNext() ? fldrs.next() : null; } else { return current ? null : prev; } },DriveApp.getRootFolder()); }