PHP - Linking to html files is wrong in Pdf-to-html

1.3k Views Asked by At

I have installed Poppler Utils for windows in addition to https://github.com/mgufrone/pdf-to-html

It works perfectly and it converts PDF files to HTML, by making a single HTML file contains 2 iframes, one for pages navigation and the other for the actual text.

The problem is when the HTML files are generated, the linking for iframe src gives a false linking.

For Example:

  • Test.html

  • Pages.html

  • Page_1.html

All these files exist in the same folder named "Output".

Test.html contains 2 iframes linking to Pages.html and Page_1.html

Here's the problem in Test.html:

<frameset cols="100,*">
<frame name="links" src="output/Pages.html"/>
<frame name="contents" src="output/Pages_1.html"/>
</frameset>

Should be:

<frameset cols="100,*">
<frame name="links" src="Pages.html"/>
<frame name="contents" src="Pages_1.html"/>
</frameset>

PDF.php

<?php namespace Gufy\PdfToHtml;

class Pdf
{
  protected $file, $info;
  // protected $info_bin = '/usr/bin/pdfinfo';
  public function __construct($file, $options=array())
  {
    $this->file = $file;
    $class = $this;
    array_walk($options, function($item, $key) use($class){
      $class->$key = $item;
    });
    return $this;
  }
  public function getInfo()
  {
    if($this->info == null)
    $this->info($this->file);
    return $this->info;
  }
  protected function info()
  {
    $content = shell_exec($this->bin().' '.$this->file);
    // print_r($info);
    $options = explode("\n", $content);
    $info = array();
    foreach($options as &$item)
    {
      if(!empty($item))
      {
        list($key, $value) = explode(":", $item);
        $info[str_replace(array(" "),array("_"),strtolower($key))] = trim($value);
      }
    }
    // print_r($info);
    $this->info = $info;
    return $this;
    // return $content;
  }
  public function html()
  {
    if($this->info == null)
    $this->info($this->file);
    return new Html($this->file);
  }
  public function getPages()
  {
    if($this->info == null)
    $this->info($this->file);
    return $this->info['pages'];
  }
  public function bin()
  {
    return Config::get('pdfinfo.bin', '/usr/bin/pdfinfo');
  }
}

Base.php

<?php
namespace Gufy\PdfToHtml;

class Base
{
    private $options=array(
        'singlePage'=>false,
        'imageJpeg'=>false,
        'ignoreImages'=>false,
        'zoom'=>1.5,
        'noFrames'=>true,
    );
    public $outputDir;
    private $bin="/usr/bin/pdftohtml";
    private $file;

    public function __construct($pdfFile='', $options=array())
    {
        if(empty($pdfFile))
            return $this;
        $pdf = $this;
        if(!empty($options))
        array_walk($options, function($value, $key) use($pdf){
            $pdf->setOptions($key, $value);
        });
        return $this->open($pdfFile);
    }

    public function open($pdfFile)
    {
        $this->file = $pdfFile;
        $this->setOutputDirectory(dirname($pdfFile));
        return $this;
    }

    public function html()
    {
        $this->generate();
        $file_output = $this->outputDir."/".preg_replace("/\.pdf$/","",basename($this->file)).".html";
        $content = file_get_contents($file_output);
        unlink($file_output);
        return $content;
    }
    /**
    * generating html files using pdftohtml software.
    * @return $this current object
    */
    public function generate(){


        $output = $this->outputDir."/".preg_replace("/\.pdf$/","",basename($this->file)).".html";
        $options = $this->generateOptions();
        $command = $this->bin()." ".$options." ".$this->file." ".$output;
        $result = exec($command);
        return $this;

    }

    /**
    * generate options based on the preserved options
    * @return string options that will be passed on running the command
    */
    public function generateOptions()
    {
        $generated = array();
        array_walk($this->options, function($value, $key) use(&$generated){
            $result = "";
            switch($key)
            {
                case "singlePage":
                $result = $value?"-c":"-s";
                break;
                case "imageJpeg":
                $result = "-fmt ".($value?"jpg":"png");
                break;
                case "zoom":
                $result = "-zoom ".$value;
                break;
                case "ignoreImages":
                $result = $value?"-i":"";
                break;
                case 'noFrames':
                $result = $value?'-noframes':'';
                break;
            }
            $generated[] = $result;
        });
        return implode(" ", $generated);
    }

    /**
    * change value of preserved configuration
    * @param string $key key of option you want to change
    * @param mixed $value value of option you want to change
    * @return $this current object
    */
    public function setOptions($key, $value)
    {
        if(isset($this->options[$key]))
            $this->options[$key] = $value;
        return $this;
    }
    /**
    * open pdf file that will be converted. make sure it is exists
    * @param string $pdfFile path to pdf file
    * @return $this current object
    */
    public function setOutputDirectory($dir)
    {
        $this->outputDir=$dir;
        return $this;
    }
    /**
    * clear the whole files that has been generated by pdftohtml. Make sure directory ONLY contain generated files from pdftohtml
    * because it remove the whole contents under preserved output directory
    * @return $this current object
    */
    public function clearOutputDirectory()
    {
        $files = new \RecursiveIteratorIterator(new \RecursiveDirectoryIterator($this->outputDir, \FilesystemIterator::SKIP_DOTS));
        foreach($files as $file)
        {
            $path = (string)$file;
            $basename = basename($path);
            if($basename != '..')
            {
                if(is_file($path) && file_exists($path))
                    unlink($path);
                elseif(is_dir($path) && file_exists($path))
                    rmdir($path);
            }
        }
        return $this;
    }

    public function bin()
    {
        return Config::get('pdftohtml.bin', '/usr/bin/pdftohtml');
    }
}
0

There are 0 best solutions below