Skip to content

Instantly share code, notes, and snippets.

@ichord
Created March 27, 2014 14:12
Show Gist options
  • Save ichord/9808444 to your computer and use it in GitHub Desktop.
Save ichord/9808444 to your computer and use it in GitHub Desktop.
demo of using pdf.js to extract pages to images
<script src="http://cdnjs.cloudflare.com/ajax/libs/processing.js/1.4.1/processing-api.min.js"></script><html>
<!--
Created using jsbin.com
Source can be edited via http://jsbin.com/pdfjs-helloworld-v2/8598/edit
-->
<body>
<canvas id="the-canvas" style="border:1px solid black"></canvas>
<input id='pdf' type='file'/>
<!-- Use latest PDF.js build from Github -->
<script type="text/javascript" src="https://rawgithub.com/mozilla/pdf.js/gh-pages/build/pdf.js"></script>
<script type="text/javascript">
//
// Disable workers to avoid yet another cross-origin issue (workers need the URL of
// the script to be loaded, and dynamically loading a cross-origin script does
// not work)
//
PDFJS.disableWorker = true;
//
// Asynchronous download PDF as an ArrayBuffer
//
var pdf = document.getElementById('pdf');
pdf.onchange = function(ev) {
if (file = document.getElementById('pdf').files[0]) {
fileReader = new FileReader();
fileReader.onload = function(ev) {
console.log(ev);
PDFJS.getDocument(fileReader.result).then(function getPdfHelloWorld(pdf) {
//
// Fetch the first page
//
console.log(pdf)
pdf.getPage(1).then(function getPageHelloWorld(page) {
var scale = 1.5;
var viewport = page.getViewport(scale);
//
// Prepare canvas using PDF page dimensions
//
var canvas = document.getElementById('the-canvas');
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
//
// Render PDF page into canvas context
//
var task = page.render({canvasContext: context, viewport: viewport})
task.promise.then(function(){
console.log(canvas.toDataURL('image/jpeg'));
});
});
}, function(error){
console.log(error);
});
};
fileReader.readAsArrayBuffer(file);
}
}
</script>
<style id="jsbin-css">
</style>
<script>
</script>
</body>
</html>
@Jetroid
Copy link

Jetroid commented Sep 17, 2016

@gildassamuel @lakshay23695 @pritty12

To get all pages, you will want to change pdf.getPage(1).then(function getPageHelloWorld(page) { to this:

for (i = 1; i <= pdf.numPages; i++){
    pdf.getPage(i).then(function getPageHelloWorld(page) {

Don't forget that you will need an extra closing curly brace after line 55.

@svinchon
Copy link

Great example. I am using your code and using the loop mentioned above to get all pages and then convert them to image using canvas.toDataURL but I am getting a strange result: if the number of pages exceeds 2 then some images are empty or bottom-up. I suspect it has something to do with the use of "promises". Any idea how to fix this?

@anandhegde
Copy link

Hi, is there a way to extract the attachments in pdf?

@maithily
Copy link

maithily commented Feb 22, 2017

Am using this code
for (i = 1; i <= pdf.numPages; i++){
pdf.getPage(i).then(function getPageHelloWorld(page) {
but its not showing all the slides,Is there any other way to dispaly all the slides?

@SeverS
Copy link

SeverS commented Feb 24, 2017

@maithily you need something like:

var allPagesPromises = [];
for(i = 1; i <= pdf.numPages; i++) {
   allPagesPromises.push(pdf.getPage(i))
}
Promise.all(allPagesPromises).then(function(allPages) {
       // you have all pages here
      console.log(allPages);
})

However you`ll need an extra iteration through allPages to process them and the code will start to look messy.
This approach looks faster to me because you`re getting the pages in parallel.

To get them one by one / one after the other you can try:

var promise = Promise.resolve();
for(i = 1; i <= pdf.numPages; i++) {
   promise = promise.then(function() {
          // normal code to show image ( copy pasted from gif)
              var scale = 1.5;
              var viewport = page.getViewport(scale);
              //
              // Prepare canvas using PDF page dimensions
              //
              var canvas = document.getElementById('the-canvas');
              var context = canvas.getContext('2d');
              canvas.height = viewport.height;
              canvas.width = viewport.width;
              //
              // Render PDF page into canvas context
              //
              var task = page.render({canvasContext: context, viewport: viewport})
              return task.promise.then(function(){
                console.log(canvas.toDataURL('image/jpeg'));
                        return Promise.resolve();
              });
  }); 
}
Promise.resolve(promise);

WARNING: Untested code! I didn`t even used PDFJS..

@maithily
Copy link

maithily commented Feb 25, 2017

Thank you @SeverS,
function renderPage(page) {
var viewport = page.getViewport(options.scale);
var canvas = document.createElement('canvas');
var ctx = canvas.getContext('2d');
var renderContext = {
canvasContext: ctx,
viewport: viewport
};
canvas.height = viewport.height;
canvas.width = viewport.width;
canvasContainer.appendChild(canvas);
page.render(renderContext);
}
for(var i = 1; i <= pdfDoc.numPages; i++)
pdfDoc.getPage(i).then(renderPage);

I got all the pages using above code,I have an another doubt,how can i display all the pages in slider,is possible?

@ershadow786
Copy link

is it necessary to use canvas pdf js convert pdf to images can't directly show it in img tag insteaad of canvas?

@sem4phor
Copy link

sem4phor commented Jul 18, 2017

@ershadow786 you can create the canvas with style=display:none attribute and in the img do this:
<img src=canvas.toDataURL()>
Warning pseudo code!

@carylewis
Copy link

Instead of rendering a page to a canvas, is it possible to extract a page and then save it to a new PDF document, or stream the one page?

The use case I am trying to solve is splitting up a multiple page pdf document into its component pages and then using pdfkit.js to manipulate the extracted page?

e.g. is there a page.save or page.stream method?

@javedbaloch4
Copy link

How can I do if i have a link <a href="myfile.pdf" target="_blank" id="pdf">myfile.pdf</a> onclick this will open a PDF in new tab? and it should be converted into image

@lalithanjali0111
Copy link

for (i = 1; i <= pdf.numPages; i++){
pdf.getPage(i).then(function getPageHelloWorld(page) {
it's not working.can anyone give better solution

@libmysterion
Copy link

Maybe learn how to program?

@libmysterion
Copy link

Anyone landing here trying to render an entire PDF document to to a single canvas...
You cant!
You need to use something like above to render individually before gluing all the resulting canvas together:

<html>
<body>
  <input id='pdf' type='file'/>

  <!-- Use latest PDF.js build from Github -->
  <script type="text/javascript" src="https://rawgit.com/mozilla/pdf.js/gh-pages/build/pdf.js"></script>
  
  <script type="text/javascript">
  window.PDFJS = window.pdfjsLib;

    //
    // Disable workers to avoid yet another cross-origin issue (workers need the URL of
    // the script to be loaded, and dynamically loading a cross-origin script does
    // not work)
    //
    PDFJS.disableWorker = true;

    //
    // Asynchronous download PDF as an ArrayBuffer
    //
    var pdf = document.getElementById('pdf');
    pdf.onchange = function(ev) {
      if (file = document.getElementById('pdf').files[0]) {
        fileReader = new FileReader();
        fileReader.onload = function(ev) {
          console.log(ev);
          PDFJS.getDocument(fileReader.result).then(function getPdfHelloWorld(pdf) {
            //
            // Fetch the first page
            //
            //console.log(pdf)
            const go = async function(){
                let h = 0;
                for(var pageN = 1; pageN <= pdf.numPages; pageN++){
                    const page = await pdf.getPage(pageN);
                    console.log('got a page', pageN);
                    var scale = 2;
                    var viewport = page.getViewport(scale);

                    //
                    // Prepare canvas using PDF page dimensions
                    //
                    var canvas = document.createElement('canvas');
                    document.body.appendChild(canvas);
                    var context = canvas.getContext('2d');
                    canvas.height += viewport.height;
                    canvas.width = viewport.width;
                    h+= viewport.height;
                    //
                    // Render PDF page into canvas context
                    //
                    var task = page.render({ canvasContext: context, viewport: viewport })
                    await task.promise;
                  //  console.log(canvas.toDataURL('image/jpeg')); 
                }

                //
                // Now just add super-glue!
                //
                const canvi = document.querySelectorAll('canvas');
                const totalHeight = Array.prototype.reduce.call(canvi, (height, canvas) => height + canvas.height, 0);

                var canvas = document.createElement('canvas');
                var context = canvas.getContext('2d');
                canvas.height = totalHeight;
                canvas.width = canvi[0].width;
                let y= 0;
                canvi.forEach((src, e) => {
                    context.drawImage(src, 0, y);
                    y+=src.height;
                });
                document.body.innerHTML = ''; // lazily delete the other canvi
                document.body.appendChild(canvas);
            };
            go();
          }, function(error){
            console.log(error);
          });
        };
        fileReader.readAsArrayBuffer(file);
      }
    }
  </script>  
</body>
</html>

@BlueWingsTechnologies
Copy link

const go = async function() is not including in working even await task.promise; kindly help me

@afzafri
Copy link

afzafri commented Oct 17, 2019

Not working with latest version of pdf.js.

  • Deprecated API usage: PDFDocumentLoadingTask.then method, use the promise getter instead.
  • Deprecated API usage: getViewport is called with obsolete arguments.

Working with pdfjs v2.1.266 and below

@rushglen
Copy link

rushglen commented Dec 27, 2019

For viewport error, change as below:
`//var scale = 2;
//var viewport = page.getViewport(scale);

var viewport = page.getViewport({scale: 2});`
Otherwise works perfectly with pdfjs v2.2.228

@acg-thp
Copy link

acg-thp commented Aug 6, 2020

Just to be precise you must explicitly call .promise for PDFJS.getDocument
PDFJS.getDocument(fileReader.result).promise.then(function getPdfHelloWorld(pdf) {

As well as what rushglen said (Removing var scale and then passing in the scale in the viewport declaration
var viewport = page.getViewport({scale: 2});

This is in response to libmysterion code for multiple page canvas (As PDFJS has updated the above two changes must be made for his example to work)

@kanta-mir
Copy link

kanta-mir commented Oct 14, 2021

index.html:20 Uncaught ReferenceError: PDFJS is not defined

PDFJS Global object is deprecated please update the code thanks
I have a working example here
https://qutbi.amuslim.org/views.php?f=/books/0%20lat%20mi%20library/library/quran/quran-colored-hq.pdf

@sernuzh
Copy link

sernuzh commented Nov 22, 2021

I can't get work any code above

@Tieantono
Copy link

Here is a working example of mine based on the the Mozilla's Hello World with document load error handling example:

<script src="http://cdnjs.cloudflare.com/ajax/libs/processing.js/1.4.1/processing-api.min.js"></script>
<html>
<!--
  Created using jsbin.com
  Source can be edited via http://jsbin.com/pdfjs-helloworld-v2/8598/edit
-->

<body>
  <canvas id="the-canvas" style="border:1px solid black"></canvas>
  <input id='pdf' type='file' />

  <!-- Use latest PDF.js build from Github -->
  <script src="https://mozilla.github.io/pdf.js/build/pdf.js"></script>

  <script type="text/javascript">
    var pdfjsLib = window['pdfjs-dist/build/pdf'];
    pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://mozilla.github.io/pdf.js/build/pdf.worker.js';

    //
    // Asynchronous download PDF as an ArrayBuffer
    //
    var pdf = document.getElementById('pdf');

    pdf.onchange = function (ev) {
      if (file = document.getElementById('pdf').files[0]) {
        fileReader = new FileReader();
        fileReader.onload = function (ev) {
          console.log(ev);

          var loadingTask = pdfjsLib.getDocument(fileReader.result);

          loadingTask.promise
            .then(function (pdf) {
              console.log('PDF loaded');

              // Fetch the first page
              var pageNumber = 1;
              pdf.getPage(pageNumber).then(function (page) {
                console.log('Page loaded');

                var scale = 1.5;
                console.log(page);
                var viewport = page.getViewport({ scale: scale });

                var canvas = document.getElementById('the-canvas');
                var context = canvas.getContext('2d');
                canvas.height = viewport.height;
                canvas.width = viewport.width;

                var renderContext = {
                  canvasContext: context,
                  viewport: viewport
                };

                var renderTask = page.render(renderContext);

                renderTask.promise.then(function () {
                  console.log(canvas.toDataURL('image/jpeg'));
                });
              });
            }, function (error) {
              console.log(error);
            });
        };
        fileReader.readAsArrayBuffer(file);
      }
    }
  </script>


  <style id="jsbin-css">

  </style>
  <script>

  </script>
</body>

</html>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment