topical media & game development

talk show tell print

mashup-flickr-11-Complete-lib-NewsItem.php / php



  <?php
  
  class NewsItem
  {        
    var rssItem = null;
    var keywords = null;
    var photos = null;
  
    // Flickr only allows 20 tags in a tag search
    var maxKeywords = 20;
  
    // Some common words to ignore
    var commonWords = array(
      'a', 'an', 'the', 'it', 'is', 'for', 'to', 'he', 'she', 'in', 
      'why', 'which', 'where', 'who', 'about', 'this', 'that', 'as',
      'his', 'her', 'as', 'i', 'me', 'my'
    );
  
    function NewsItem(rssItem)
    {
      this->rssItem = rssItem;
      this->photos = array();
    }
  
    function addPhoto(url, link)
    {
      array_push(this->photos, array('url' => url, 'link' => link));
    }
  
    function getPhotos()
    {
      return this->photos;
    }
  
    function getTitle()
    {
      return this->rssItem['title'];
    }
  
    function getDescription()
    {
      return this->rssItem['description'];
    }
  
    function getLink()
    {
      return this->rssItem['link'];
    }
  
    function getKeywords()
    {
      if (this->keywords == null)
      {
        s = this->getTitle() . " " . this->getDescription();
        words = this->splitIntoWords(s);
        this->keywords = this->extractKeywords(words);
      }
  
      return this->keywords;
    }
  
    function splitIntoWords(data)
    {
      // Remove any possessive apostrophes
      s = preg_replace('/\'s /', ' ', data); 
      // remove all except alphanumeric characters and spaces
      s = preg_replace('/[^ \w]/', '', s); 
  
      return preg_split('/ +/', s);
    }
  
    function extractKeywords(data)
    {
      words = array();
  
      names = this->findFullNames(data);
      words = array_merge(words, names);
  
      foreach (data as word)
      {
        if (!in_array(strtolower(word), words) && this->isGoodKeyword(word))
        {
          array_push(words, strtolower(word));        
        }
  
        if (count(words) >= this->maxKeywords)
        {
           break;
        }
      }
  
      return words;
    }
  
    function isGoodKeyword(word)
    {
      // Discard immediately if it is a number
      if (preg_match('/^\d+/', word))
      {
        return false;
      }
  
      // Discard all common words
      if (in_array(strtolower(word), this->commonWords))
      {
        return false;
      }
      
      // Does it contain any capital letters or numbers?
      if (preg_match('/[A-Z0-9]/', word))
      {
        return true;
      }
  
      // If it is more than five characters, it must be a good word
      if (strlen(word) > 5)
      {
        return true;
      }
  
      return false;
    }
    
    function findFullNames(data)
    {
      names = array();
      currentName = "";
      count = 0;
      foreach (data as word)
      {
        firstLetter = substr(word, 0, 1);
        if (firstLetter == strtoupper (firstLetter))
        {
          // This word begins with an upper case letter
          currentName .= word . " ";
          count++;
        }
        else
        {
          // This word does not begin with an upper case letter
          if (count > 1)
          {
            // But we have already found a run of two or more capitalized words
            array_push(names, trim(currentName));
          }
  
          currentName = "";
          count = 0;
        }
      }
  
      return names;
    }
  
  }
  ?>
  


(C) Æliens 20/2/2008

You may not copy or print any of this material without explicit permission of the author or the publisher. In case of other copyright issues, contact the author.