topical media & game development

talk show tell print

professional-search-13-seophp-include-link-checker.inc.php / php



  <?php
  
  LINKCHECKER_total_str = '';
  
  // +----------------------------------------------------------------------+
  // | LinkChecker                                                          |
  // | Gets URL header data using cURL                                      |
  // +----------------------------------------------------------------------+
  // | Copyright (c) 2003 Jaimie Sirovich                                   |
  // +----------------------------------------------------------------------+
  // | Author: Jaimie Sirovich <jsirovic@gmail.com>                         |
  // +----------------------------------------------------------------------+
  
  class LinkChecker
  {
    // helper function for the cURL request
    function CURLOPT_WRITEFUNCTION(ch, str)
    {
      global LINKCHECKER_total_str;
      LINKCHECKER_total_str .= str;
      if (preg_match('/^(.*?)\r\n\r\n/s', LINKCHECKER_total_str, matches)) 
      {
        echo matches[1];
        return -1;
      } 
      else  
      {
        return strlen(str);
      }  
    }  
  
    // return the header data
    function getHeader(url, userAgent = "Mozilla/4.0")
    {
      global LINKCHECKER_total_str;
      LINKCHECKER_total_str = "";
      ob_start();  
      ch = curl_init();
      curl_setopt (ch, CURLOPT_URL, url);
      curl_setopt (ch, CURLOPT_USERAGENT, userAgent);
      curl_setopt (ch, CURLOPT_HEADER, 1);
      curl_setopt (ch, CURLOPT_RETURNTRANSFER, 1);
      curl_setopt (ch, CURLOPT_FOLLOWLOCATION, 1);
      curl_setopt (ch, CURLOPT_TIMEOUT, 60);
      curl_setopt (ch, CURLOPT_WRITEFUNCTION, 
                   array("LinkChecker", "CURLOPT_WRITEFUNCTION"));
       
      result = curl_exec(ch);
      curl_close(ch);
      return ob_get_clean();
    }
  
    // return response code  
    function parseResponseCode(str) 
    {
      preg_match('/^HTTP\/\d\.\d (.{3})/', str, matches);
      return (isset(matches[1]) ? matches[1] : '(not available)');
    }
  
    // return the MIME type
    function parseMimeType(str) 
    {
      preg_match('/Content-Type: (.*)/', str, matches);
      return (isset(matches[1]) ? matches[1] : '(not available)');
    }
    
    // return the Content-Length
    function parseContentLength(str) 
    {
      preg_match('/Content-Length: (.*)/', str, matches);
      return (isset(matches[1]) ? matches[1] : '(not available)');
    }
    
    // return the Location
    function parseLocation(str) 
    {
      preg_match('/Location: ?([^\r\n]*)/i', str, matches);
      return (isset(matches[1]) ? matches[1] : '(not available)');
    }
  
    // return the path to the destination URL
    function getPath(url, &_response_code, userAgent = 'Mozilla/4.0')
    {
      _url = url;
      path = array();
      path[] = 'Initial destination ' . _url;
      iterations = 0;
  
      do 
      {
        _buffer = LinkChecker::getHeader(_url);
        if (!_buffer) 
        {
          path[] = 'ERROR: Maximum number of redirections exceeded; aborting.';
          break;
        }
        _url = LinkChecker::parseLocation(_buffer) ? 
                LinkChecker::parseLocation(_buffer) : _url;
        _response_code = LinkChecker::parseResponseCode(_buffer);
        path[] = (_response_code != 200 && _response_code != 404) ? 
                  ('Redirect (' . _response_code . ') to => ' . _url) : 
                  ('Final destination (' . _response_code . ') ' . _url ); 
        iterations++;
        if (iterations > 10) 
        {
          path[] = 'ERROR: Maximum number of redirections exceeded; aborting.';
          break;
        }
      } 
      while (_response_code != '200' && _response_code != '404'); 
  
      return path;
    }
  }
  ?>
  


(C) Æliens 20/2/2008

You may not copy or print any of this material without explicit permission of the author or the publisher. In case of other copyright issues, contact the author.