Skip to content

Instantly share code, notes, and snippets.

@gabouh
Forked from hubgit/pdftotext.html
Created May 8, 2016 05:21
Show Gist options
  • Save gabouh/37aca3072d85dd1eb57e0f581fba8e17 to your computer and use it in GitHub Desktop.
Save gabouh/37aca3072d85dd1eb57e0f581fba8e17 to your computer and use it in GitHub Desktop.
Embedding the PDFToText service in HTML
<!-- edit this; the PDF file must be on the same domain as this page -->
<iframe id="input" src="your-file.pdf"></iframe>
<!-- embed the pdftotext service as an iframe -->
<iframe id="processor" src="http://hubgit.github.com/2011/11/pdftotext/"></iframe>
<!-- a container for the output -->
<div id="output"></div>
<script>
var input = document.getElementById("input");
var processor = document.getElementById("processor");
var output = document.getElementById("output");
// listen for messages from the processor
window.addEventListener("message", function(event){
if (event.source != processor.contentWindow) return;
switch (event.data){
// "ready" = the processor is ready, so fetch the PDF file
case "ready":
var xhr = new XMLHttpRequest;
xhr.open('GET', input.getAttribute("src"), true);
xhr.responseType = "arraybuffer";
xhr.onload = function(event) {
processor.contentWindow.postMessage(this.response, "*");
};
xhr.send();
break;
// anything else is the text of the PDF
default:
output.textContent = event.data.replace(/\s+/g, " ");
break;
}
}, true);
</script>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment