在 php 中的文件(doc、docx、xlsx、pdf)中按短语搜索

Search by phrase in files(doc,docx,xlsx,pdf) in php

到目前为止,我已经创建了一个具有搜索功能的网站。用户可以在数据库中上传不同的文件扩展名(docx、doc、pdf 等)。所以我需要按内容搜索文件。通过我无法正确搜索它。我做了两个功能 1.按文件名搜索 2. 按短语搜索

按名称搜索工作正常,但按短语搜索时出现问题 part.I我能够将这些文件转换为文本文件。但我不知道为什么我无法在那个 file.So 中搜索,任何人都可以告诉我哪里错了或者为我提供另一个解决方案。

这是代码....

homepage.php

<form method="post" action="search1.php" class="container 50%" id="searchform">
                            <input type="text" name="name" placeholder="Enter the terms you wish to search for" />
                                <input type="submit" name="submit" value="Search" class="fit special" />
<input type="radio" id="name" name="search" value="name" class="fit special" />
                                <input type="radio" id="phrase" name="search" value="phrase" class="fit special" />                         
                        </form>

search1.php

    <?php require_once("/includes/functions.php"); ?>
<?php require_once("/includes/class.php"); ?>

<?php
$dbhost = "localhost";
$dbuser = "root";
$dbpass = "sandeep";
$dbname = "dbtuts";
mysql_connect($dbhost,$dbuser,$dbpass) or die('cannot connect to the server'); 
mysql_select_db($dbname) or die('database selection problem');
?>

<!DOCTYPE html>
<html>
<head>
<title>SEARCHED FILES</title>
<link rel="stylesheet" href="assets/css/main.css" />
</head>
<body>
<section>   
<div class="table-wrapper">
      <table class="alt">
        <thead>
            <tr>
                <th>File Name</th>
                <th>View</th>
            </tr>
        </thead>    
<?php 
     if(isset($_POST['submit'])){ 
      $name=$_POST['name']; 
      if($name!=NULL)
      {
      if (!empty($_POST['search'])) {
        if ($_POST['search']=="phrase") { //search by phrase
      $searchthis = $name;
      $matches = array();

        $query  = "SELECT file from ada ";
        $query .= "UNION ";
        $query .= "SELECT file from cdr ";
        $query .= "UNION ";
        $query .= "SELECT file from others ";
        $query .= "UNION ";
        $query .= "SELECT file from pdr ";
        $query .= "UNION ";
        $query .= "SELECT file from rr ";
        $query .= "UNION ";
        $query .= "SELECT file from sdd ";
        $query .= "UNION ";
        $query .= "SELECT file from tbl_uploads ";

        $result = mysql_query($query);
        $new_file = fopen("sample.txt","w") or die("Unable to open file!!");

        while($row=mysql_fetch_array($result))
         {
            $filepath = getcwd() . "\uploads\".$row['file'];
            $path = str_replace('//', '\', $filepath);
            $Obj = new DocxConversion($path);
            $Text= $Obj->convertToText();
            fwrite($new_file,$Text);
            echo $new_file."<br/>";
            $handle = fopen($new_file, "r");
             if ($handle)
             {
                while (!feof($handle))
                {
                     $buffer = fgets($handle);
                     if(strpos($buffer, $searchthis) !== FALSE)
                     {
                         $matches[] = $row['file'];
                         break;
                     }

                }
                    fclose($handle);
              }
         }
       $matches = array_filter($matches);

        if (!empty($matches)) 
        {
               foreach($matches as $row)
                {
                ?>
                <tr>
                <td><?php echo $row ?></td>
                <td><a href="uploads/<?php echo $row ?>" target="_blank">view file</a></td>
                </tr>
                <?php
                }
        }
        else
        {
            //echo " Phrase not found!!!";
            ?>
            <script>
                alert('Phrase not Found');
                window.location.href='homepage.php';
            </script>
            <?php
        }
      }
     else{                              //search by name
          $array = array(
        "db1" => "ada",
        "db2" => "cdr",
        "db3" => "others",
        "db4" => "pdr",
        "db5" => "rr",
        "db6" => "sdd",
        "db7" => "tbl_uploads",
        );

      //connect  to the database 
      $db=mysql_connect("localhost","root","sandeep") or die ('I cannot connect to the database  because:'.mysql_error()); 

      //-select  the database to use 
      $mydb=mysql_select_db("dbtuts"); 
      $no_of_access = false;
      while ($db_name = current($array)) 
      {  

      //-query  the database table 
      $sql = "SELECT * FROM $db_name WHERE (file LIKE '%$name%')";

      //-run  the query against the mysql query function 
      $result=mysql_query($sql); 
      $num_rows = mysql_num_rows($result);
      if($num_rows > 0)
      {
      //-create  while loop and loop through result set 
      $no_of_access = true;
      while($row=mysql_fetch_array($result))
        {
        ?>
        <tr>
        <td><?php echo $row['file'] ?></td>
        <td><a href="uploads/<?php echo $row['file'] ?>" target="_blank">view file</a></td>
        </tr>
        <?php
        }
      }
      else 
        {
            if(!$no_of_access && $db_name == "tbl_uploads")
            //echo "<p> Result not found!!<p>";
            {
            ?>
            <script>
                alert('Result Not Found!!');
                window.location.href='homepage.php';
            </script>
            <?php
            }
        }
        next($array);
      }

     }    
     }
     else
          { 
            //echo  "<p>Please select an option</p>"; 
            ?>
            <script>
                alert('Please Select an option');
                window.location.href='homepage.php';
            </script>
            <?php
          } 
    } 
      else
          { 
            //echo  "<p>Please enter a search query</p>"; 
            ?>
            <script>
                alert('Please enter a search query');
                window.location.href='homepage.php';
            </script>
            <?php
          } 
    }
?> 
</table>
</div>
</section>  
</body> 
</html>

以上代码完美地按文件名搜索,但按短语部分存在一些问题。

class.php

  <?php require_once("/includes/pdf.php"); ?>
<?php
class DocxConversion{
    private $filename;

    public function __construct($filePath) {
        $this->filename = $filePath;
    }

/************************doc file************************************/
    private function read_doc() {
        $fileHandle = fopen($this->filename, "r");
        $line = @fread($fileHandle, filesize($this->filename));   
        $lines = explode(chr(0x0D),$line);
        $outtext = "";
        foreach($lines as $thisline)
          {
            $pos = strpos($thisline, chr(0x00));
            if (($pos !== FALSE)||(strlen($thisline)==0))
              {
              } else {
                $outtext .= $thisline." ";
              }
          }
         $outtext = preg_replace("/[^a-zA-Z0-9\s\,\.\-\n\r\t@\/\_\(\)]/","",$outtext);
        return $outtext;
    }

/************************docx file************************************/ 
    private function read_docx(){

        $striped_content = '';
        $content = '';

        $zip = zip_open($this->filename);

        if (!$zip || is_numeric($zip)) return false;

        while ($zip_entry = zip_read($zip)) {

            if (zip_entry_open($zip, $zip_entry) == FALSE) continue;

            if (zip_entry_name($zip_entry) != "word/document.xml") continue;

            $content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));

            zip_entry_close($zip_entry);
        }// end while

        zip_close($zip);

        $content = str_replace('</w:r></w:p></w:tc><w:tc>', " ", $content);
        $content = str_replace('</w:r></w:p>', "\r\n", $content);
        $striped_content = strip_tags($content);

        return $striped_content;
    }

/************************PDF file************************************/  
    private function read_pdf(){
             $a=new PDF2Text();
             $a->setFilename($this->filename);
             $a->decodePDF();
             echo $a->output();

    }

 /************************excel sheet************************************/

function xlsx_to_text($input_file){
    $xml_filename = "xl/sharedStrings.xml"; //content file name
    $zip_handle = new ZipArchive;
    $output_text = "";
    if(true === $zip_handle->open($input_file)){
        if(($xml_index = $zip_handle->locateName($xml_filename)) !== false){
            $xml_datas = $zip_handle->getFromIndex($xml_index);
            $xml_handle = new DOMDocument();
            $xml_handle->loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
            $output_text = strip_tags($xml_handle->saveXML());
        }else{
            $output_text .="";
        }
        $zip_handle->close();
    }else{
    $output_text .="";
    }
    return $output_text;
}

/*************************power point files*****************************/
function pptx_to_text($input_file){
    $zip_handle = new ZipArchive;
    $output_text = "";
    if(true === $zip_handle->open($input_file)){
        $slide_number = 1; //loop through slide files
        while(($xml_index = $zip_handle->locateName("ppt/slides/slide".$slide_number.".xml")) !== false){
            $xml_datas = $zip_handle->getFromIndex($xml_index);
            $xml_handle = new DOMDocument();
            $xml_handle->loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
            $output_text .= strip_tags($xml_handle->saveXML());
            $slide_number++;
        }
        if($slide_number == 1){
            $output_text .="";
        }
        $zip_handle->close();
    }else{
    $output_text .="";
    }
    return $output_text;
}


    public function convertToText() {

        if(isset($this->filename) && !file_exists($this->filename)) {
            return "File Not exists";
        }

        $fileArray = pathinfo($this->filename);
        $file_ext  = $fileArray['extension'];
        if($file_ext == "doc" || $file_ext == "docx" || $file_ext == "xlsx" || $file_ext == "pptx" || $file_ext == "pdf")
        {
            if($file_ext == "doc") {
                return $this->read_doc($this->filename);
            } elseif($file_ext == "docx") {
                return $this->read_docx($this->filename);
            } elseif($file_ext == "xlsx") {
                return $this->xlsx_to_text($this->filename);
            }elseif($file_ext == "pptx") {
                return $this->pptx_to_text($this->filename);
            }elseif($file_ext == "pdf") {
                return $this->read_pdf($this->filename);
            }
        } else {
            return "Invalid File Type";
        }
    }

}

?>

以上代码 class.php 将 doc、docx、xlsx、pdf 转换为文本。

pdf.php http://pastebin.com/dvwySU1a class 将 pdf 文件转换为文本文件。

这部分是错误的(我认为):

        fwrite($new_file,$Text);
        echo $new_file."<br/>";
        $handle = fopen($new_file, "r");

在 $new_file 中你有来自之前 fopen 的 "file pointer or FALSE".. 你也没有关闭 txt 文件(如果你要打开它,你应该在 fwrite 之后调用 fclose)。

为什么不直接在字符串中搜索短语.. 为什么需要将它写入另一个 txt 文件? 您可以只搜索 here

这样的文本

终于自己解决了

search1.php

<?php require_once("/includes/functions.php"); ?>
<?php require_once("/includes/class.php"); ?>

<?php
$dbhost = "localhost";
$dbuser = "root";
$dbpass = "sandeep";
$dbname = "dbtuts";
mysql_connect($dbhost,$dbuser,$dbpass) or die('cannot connect to the server'); 
mysql_select_db($dbname) or die('database selection problem');
?>

<!DOCTYPE html>
<html>
<head>
<title>SEARCHED FILES</title>
<link rel="stylesheet" href="assets/css/main.css" />
</head>
<body>
<section>   
<div class="table-wrapper">
      <table class="alt">
        <thead>
            <tr>
                <th>File Name</th>
                <th>View</th>
            </tr>
        </thead>    
<?php 
     if(isset($_POST['submit'])){ 
      $name=$_POST['name']; 
      if($name!=NULL)
      {
      if (!empty($_POST['search'])) {
        if ($_POST['search']=="phrase") { //search by phrase
            $searchthis = strtolower($name);
            $matches = array();
            $array = array(
        "db1" => "ada",
        "db2" => "cdr",
        "db3" => "others",
        "db4" => "pdr",
        "db5" => "rr",
        "db6" => "sdd",
        "db7" => "tbl_uploads",
        );


        while ($db_name = current($array)) 
      {  

        $query= "SELECT file FROM $db_name";
        $result = mysql_query($query);

        while($row=mysql_fetch_array($result))
         {
            $filepath = getcwd() . "\uploads\".$row['file'];
            $path = str_replace('//', '\', $filepath);
            $Obj = new DocxConversion($path);
            $Text= $Obj->convertToText();
            $new_file = fopen("sample.txt","w") or die("Unable to open file!!");
            fwrite($new_file,strtolower($Text));

            $handle = fopen("sample.txt", "r");

            if ($handle)
             {
                while (!feof($handle))
                {
                     $buffer = fgets($handle);
                     if(strpos($buffer, $searchthis) !== FALSE)
                     {
                         $matches[] = $row['file'];
                         break;
                     }

                }
                    fclose($handle);
              }fclose($new_file);
         }next($array);
      } 
       $matches = array_filter($matches);

        if (!empty($matches)) 
        {
               foreach($matches as $row)
                {
                ?>
                <tr>
                <td><?php echo $row ?></td>
                <td><a href="uploads/<?php echo $row ?>" target="_blank">view file</a></td>
                </tr>
                <?php
                }
        }
        else
        {
            //echo " Phrase not found!!!";
            ?>
            <script>
                alert('Phrase not Found');
                window.location.href='homepage.php';
            </script>
            <?php
        }

      }
     else{                              //search by name
          $array = array(
        "db1" => "ada",
        "db2" => "cdr",
        "db3" => "others",
        "db4" => "pdr",
        "db5" => "rr",
        "db6" => "sdd",
        "db7" => "tbl_uploads",
        );

      //connect  to the database 
      $db=mysql_connect("localhost","root","sandeep") or die ('I cannot connect to the database  because:'.mysql_error()); 

      //-select  the database to use 
      $mydb=mysql_select_db("dbtuts"); 
      $no_of_access = false;
      while ($db_name = current($array)) 
      {  

      //-query  the database table 
      $sql = "SELECT * FROM $db_name WHERE (file LIKE '%$name%')";

      //-run  the query against the mysql query function 
      $result=mysql_query($sql); 
      $num_rows = mysql_num_rows($result);
      if($num_rows > 0)
      {
      //-create  while loop and loop through result set 
      $no_of_access = true;
      while($row=mysql_fetch_array($result))
        {
        ?>
        <tr>
        <td><?php echo $row['file'] ?></td>
        <td><a href="uploads/<?php echo $row['file'] ?>" target="_blank">view file</a></td>
        </tr>
        <?php
        }
      }
      else 
        {
            if(!$no_of_access && $db_name == "tbl_uploads")
            //echo "<p> Result not found!!<p>";
            {
            ?>
            <script>
                alert('Result Not Found!!');
                window.location.href='homepage.php';
            </script>
            <?php
            }
        }
        next($array);
      }

     }    
     }
     else
          { 
            //echo  "<p>Please select an option</p>"; 
            ?>
            <script>
                alert('Please Select an option');
                window.location.href='homepage.php';
            </script>
            <?php
          } 
    } 
      else
          { 
            //echo  "<p>Please enter a search query</p>"; 
            ?>
            <script>
                alert('Please enter a search query');
                window.location.href='homepage.php';
            </script>
            <?php
          } 
    }
?> 
</table>
</div>
</section>  
</body> 
</html>