Zend Search Lucene searching whole product description

64 Views Asked by At

What is below is my code of search engine on the website. Right now is only searching what is refered as ProductName and ProductNumber. I didn't know what need to be changed to searching whole ProductDescription Here is Search.php file

 protected $_index;
protected $_indexed = array();
/**
 * 
 * @var Zend_Http_Client
 */
protected $_httpClient;

public function __construct()
{
    try {
        $indexDir = realpath($_SERVER['DOCUMENT_ROOT'] . '/../tmp/search');
        $this->_index = Zend_Search_Lucene::open($indexDir);
    } catch (Zend_Search_Lucene_Exception $e) {
        $this->_index = Zend_Search_Lucene::create($indexDir);
    }

    $this->_httpClient = new Zend_Http_Client();
    $this->_httpClient->setConfig(array('timeout' => 10));

    Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive());
}

public function indexUrl($url)
{
    if (is_array($url)) {
        foreach ($url as $uri) {
            $this->_indexUrl($uri);
        }
    } else if (is_string($url)) {
        $this->_indexUrl($url);
    }
}

public function indexWholePage()
{
    $pageUrl = $this->_getHostName();

    $this->_indexUrl($pageUrl . '/');
}

protected function _indexUrl($url)
{
    if (in_array($url, $this->_indexed))
        return;

    $log = Zend_Registry::get('Zend_Log');
    $log->log($url, Zend_Log::NOTICE);

    $this->_httpClient->setUri($url);
    $response = $this->_httpClient->request();

    $this->_indexed[] = $url;

    if ($response->isSuccessful()) {
        $body = $response->getBody();

        $doc = Zend_Search_Lucene_Document_Html::loadHTML($body, true);

        foreach ($doc->getLinks() as $link) {
            if ($this->_isValidPageLink($link) && !in_array($this->_getHostName() . $link, $this->_indexed)) {
                $this->_indexUrl($this->_getHostName() . $link);
            }
        }

        $t = new Zend_Search_Lucene_Index_Term($url, 'url');
        $q = new Zend_Search_Lucene_Search_Query_Term($t);
        $hits = $this->_index->find($q);

        foreach ($hits as $hit) {
            if ($hit->md5 == md5($body)) {
                return;
            } else {
                $this->_index->delete($hit->id);
            }
        }

        $doc->addField(Zend_Search_Lucene_Field::Keyword('url', $url));
        $doc->addField(Zend_Search_Lucene_Field::UnIndexed('md5', md5($body)));

        $this->_index->addDocument($doc);

        $log = Zend_Registry::get('Zend_Log');
        $log->log('done', Zend_Log::NOTICE);
    }
}

public function search($query)
{
    return $this->_index->find($query);
}

public function deleteIndex()
{

}

protected function _getHostName()
{
    $host = isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : '';
    $proto = (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] !== "off") ? 'https' : 'http';
    $port = isset($_SERVER['SERVER_PORT']) ? $_SERVER['SERVER_PORT'] : 80;
    $uri = $proto . '://' . $host;

    if ((('http' == $proto) && (80 != $port)) || (('https' == $proto) && (443 != $port))) {
        $uri .= ':' . $port;
    }

    return $uri;
}

protected function _isValidPageLink($url)
{
    $hostName = $this->_getHostName();

    if (substr($url, 0, strlen($hostName)) == $hostName ||
            substr($url, 0, 1) == '/' || substr($url, 0, 1) == '?') {
        if (@preg_match('#^(.+)\.(jpg|gif|png|pdf|doc|xls)$#i', $url)) {
            return false;
        }
        return true;
    }

    return false;
}

And here is php form to generate search results. Lucene implementations that I found after searching where completly different than what here is. This is my first time with ZendFramework.

 <form method="get" action="/search.html" class="searchForm" enctype="application/x-www-form-urlencoded" id="searchForm">
  <fieldset>
    <input type="text" id="search_text" name="q" value="<?php echo $this->escape($this->query) ?>"><br>
     <input type="submit" value="search" id="search" name="search"> 
  </fieldset>
</form>

<h1>Search results</h1>

<?php if(empty($this->searchString)): ?>
          <p><strong>Please write text of minimal lenght of<?php echo $this->minimumLength ?></strong></p>
<?php else: ?>

<?php if(count($this->products)){ ?>

<?php foreach ($this->products as $product): ?>
<?php $link = '/'.$this->permalink($product->product_name).','.$product->product_id.','.$product->category_id.',p.html'; ?>
<div class="productlist clearfix">
  <a href="<?= $link; ?>" class="clearfix">
<div class="txt">
  <h2><?= $product->product_name ?><?php if(strlen($product->product_number) > 2){ echo '<small> [ '.$product->product_number.' ]</small>'; } ?></h2>
  <p><?= stripslashes($product->product_intro2) ?></p>
</div>
<div class="pic">
   <?php if($product->has_media): ?>
     <?php echo $this->thumb($product->media_src, 110, 110) ?>
   <?php endif; ?>
   <p style="text-align: center;">More</p>
</div>
</a>
</div>
<hr/>

<?php endforeach; ?>

<?php }else{ ?>
<p>0 product was found</p>
<?php } ?>

<div style="clear: both;">
<?php echo $this->products; ?>
</div>



<?php endif ?>
0

There are 0 best solutions below