美文网首页
pdfjs 导出PDF所有图片

pdfjs 导出PDF所有图片

作者: 此昵称已被狗抢占 | 来源:发表于2020-08-24 21:08 被阅读0次
var fs = require('fs');
var path = require('path');
var Canvas = require('canvas');

var pdfjsLib = require('pdfjs-dist/es5/build/pdf.js');

var pdfPath = '9.pdf';
var data = new Uint8Array(fs.readFileSync(pdfPath));

const loadingTask = pdfjsLib.getDocument({ data });

async function extractImages() {
  let doc = await loadingTask.promise;
  let numPages = doc.numPages;

  for (var pageNum = 1; pageNum <= numPages; pageNum++) {
    try {
      let page = await doc.getPage(pageNum);
      let opList = await page.getOperatorList();

      for (var i = 0; i < opList.fnArray.length; i++) {
        if (
          opList.fnArray[i] == pdfjsLib.OPS.paintJpegXObject ||
          opList.fnArray[i] == pdfjsLib.OPS.paintImageXObject
        ) {
          var op = opList.argsArray[i][0];
          var img = page.objs.get(op);

          var scale = img.width / page.view[2];
          var viewport = page.getViewport({ scale: scale });

          var canvas = Canvas.createCanvas(img.width, img.height);
          var ctx = canvas.getContext('2d');
          var imageData = ctx.createImageData(img.width, img.height);
          var imageBytes = imageData.data;

          for (var j = 0, k = 0, jj = img.width * img.height * 4; j < jj; ) {
            imageBytes[j++] = img.data[k++];
            imageBytes[j++] = img.data[k++];
            imageBytes[j++] = img.data[k++];
            imageBytes[j++] = 255;
          }

          canvas.width = img.width;
          canvas.height = img.height;
          ctx.putImageData(imageData, 0, 0);

          var imageData = canvas.toBuffer('image/png');
          fs.writeFileSync(op + '.png', imageData);
        }
      }
    } catch (error) {
      console.error(error);
    }
  }
}

extractImages();

相关文章

网友评论

      本文标题:pdfjs 导出PDF所有图片

      本文链接:https://www.haomeiwen.com/subject/pwrojktx.html