<?php
// ##################################################################################
// Title                     : RDQL (class_rdql.php)
// Version                   : 1.0 
// Author                    : Luis Argerich (lrargerich@yahoo.com)
// Last modification date    : 06-30-2002
// Description               : This class implements the RDQL language
//                             to query RDF documents from paths or URLs.
// ##################################################################################
// History: 
// 06-30-2002                : First release of this class
// ##################################################################################
// To-Dos:
// ##################################################################################
// How to use it: Check rdql_test.php for an example.
//                rdql.html describes the RDQL language.
//                class_rdql.html contains this class documentation.
// ##################################################################################

include_once("class_rdf_parser.php");


// A wrapper class to Query RDF documents
class RDQL_query_document {
  // Queries documents passed as urls or filenames (use urls or filenames in the FROM part of the RDQL query)
  function rdql_query_url($query)              
  {
    $iterator = new RDF_document_iterator();
    $q = new RDQL_query($iterator);
    $res = $q->parse_query($query);
    // Now process the query result 
    return $res;
  }
} // end of class



// Abstract class defining methods for an RDF_iterator
// The RDF iterator is used by the RDQL_query class, the iterator
// MUST provide a find_tuples($sources,$subject,$predicate,$object)
// method that returns all the tuples matching subject, predicate and object
// from the designated sources (The FROM part of a RDQL expresion)
class RDF_iterator {
  
  function init($sources) {}
  function get_tuple() {}
  function find_tuples($sources,$subject,$predicate,$object) {}
  
  function tuple_match($condition,$tuple) {
    if($condition{0}=='?') {
      return true; 
    } else {
      if(trim($condition)==trim($tuple)) {
        return true;
      } else {
        return false;
      }
    }
  }
}


// This class implements the RDQL engine
class RDQL_query {
  var $iterator;
  
  // Constructor receives a RDF_iterator object
  // that must implement a find_tuples($sources,$subject,$predicate,$object) method
  // returning all the tuples in the RDF sources matching the provided arguments.
  // There're two RDF_Iterator classes provided: RDF_document_iterator AND
  //                                             RDF_mysql_iterator
  // The first one is used to query a set of RDF documents passed as filepaths or URLs
  // The second one can be used to query a document stored in MySQL using the RDF_store class 
  function RDQL_query($iterator) {
    $this->iterator=$iterator;  
  }

  // This parses the RDQL query returning an array of asociative arrays with the Query Results.
  function parse_query($query) {
    $exps=$this->tokenize($query);
    $select_vars=Array();
    $sources=Array();
    $conditions=Array();
    $filters=Array();
    $ns=Array();
    foreach($exps as $exp) {
      $exp=trim($exp);
      if(strtoupper(substr($exp,0,6)) == "SELECT") {
        $select_vars=$this->parse_select($exp);
      }
      if(strtoupper(substr($exp,0,4)) == "FROM") {
        $sources=$this->parse_from($exp);
      }
      if(strtoupper(substr($exp,0,5)) == "WHERE") {
        $conditions=$this->parse_where($exp);
      }
      if(strtoupper(substr($exp,0,3)) == "AND") {
        $filters=$this->parse_and($exp);
      }
      if(strtoupper(substr($exp,0,5)) == "USING") {
        $ns=$this->parse_using($exp);
      }
    }   
    // Now everything is parsed and the query can be processed.
    // The next step will parse all the conditions against the
    // supplied source's tuples returning an array of asociative
    // arrays with all the variables involved in the conditions
    $tuples = $this->find_matching_tuples($sources, $conditions, $ns);
    foreach($filters as $filter) {
      // $tuples is passed by reference
      $this->filter_tuples($tuples,$filter);
    }
    $query_results=Array();
    foreach ($tuples as $a_tuple) {
      $a_result=Array();
      foreach($a_tuple as $key=>$val) {
        if(in_array($key,$select_vars)) {
          $a_result[$key]=$val;
        }
      }
      if(count($a_result)>0) {
        ksort($a_result);
        $query_results[]=$a_result; 
      }
    } 
    if(count($query_results)>0) {
      return $query_results;
    } else {
      return false; 
    }
  }


  function tokenize($exp) {
    $exprs=Array();
    $current='';
    $tok = strtok($exp," \n\t");
    while ($tok) {
      if(in_array(trim(strtoupper($tok)),Array("SELECT","FROM","WHERE","AND","USING"))) {
        if(strlen($current)>0) {
          $exprs[]=$current; 
          $current='';
        }
      }
      $current.=$tok.' ';
      $tok = strtok(" \n\t");
    }
    if(strlen($current)>0) {
      $exprs[]=$current; 
      $current=$tok;
    }
    return $exprs;
  }


  function array_sql_join($v1, $v2) {
    $result_set=Array(); 
    foreach ($v1 as $elemv1) {
      foreach ($v2 as $elemv2) {
        $res = $this->array_join_elems($elemv1, $elemv2);
        if($res) {
          $result_set[]=$res; 
        } 
      } 
    }
    return $result_set;
  }

  function array_join_elems($v1, $v2) {
    $ret=Array();
    foreach (array_keys($v1) as $k1) {
      if(isset($v2[$k1])) {
        if($v2[$k1]==$v1[$k1]) {
          $ret[$k1]=$v1[$k1];
        } else {
          return false;
        } 
      } else {
        $ret[$k1]=$v1[$k1];
      } 
    } 
    foreach (array_keys($v2) as $k2) {
      if(!isset($ret[$k2])) {
        $ret[$k2]=$v2[$k2]; 
      } 
    }
    return $ret;
  }
  
  // This parses a 'SELECT ?x,?y,?z' expression returning an array with variable names.
  function parse_select($exp) {
    $vars=Array();
    $exp=trim($exp);
    $exp_parts=explode(" ",$exp);
    if($exp_parts[0]!="SELECT") {
      trigger_error("Expected a SELECT token in the query",E_USER_WARNING); 
    }  
    array_shift($exp_parts);
    $vars=explode(',',implode('',$exp_parts));
    return $vars;
  }

  // This parses a 'FROM doc1,doc2' expression returning an array with document URIs/filenames.
  function parse_from($exp) {
    $vars=Array();
    $exp=trim($exp);
    $exp_parts=explode(" ",$exp);
    if($exp_parts[0]!="FROM") {
      trigger_error("Expected a FROM token in the query",E_USER_WARNING); 
    }  
    array_shift($exp_parts);
    $vars=explode(',',implode('',$exp_parts));
    return $vars;
  }


  // This parses a where construction in the form 'WHERE (x1,x2,x3),(z1,z2,z3)' returning and array of conditions
  function parse_where($exp) {
    $vars=Array();
    $exp=trim($exp);
    $exp_parts=explode(" ",$exp);
    if($exp_parts[0]!="WHERE") {
      trigger_error("Expected a WHERE token in the query",E_USER_WARNING); 
    }  
    array_shift($exp_parts);
    $expr=implode('',$exp_parts);
    $avar='';
    $level=0;
    for($i=0;$i<strlen($expr);$i++) {
      $chr=substr($expr,$i,1);
      if($chr=="(") {
        $level++; 
      }
      if($chr==")") {
        $level--; 
      }
      if( ($chr==',') && ($level==0) ) {
        if(strlen($avar)>0) {
          $vars[]=$avar;
          $avar=''; 
        } 
      } else {
        $avar.=$chr; 
      }
    } 
    if(strlen($avar)>0) {
        $vars[]=$avar;
        $avar=''; 
    } 
    return $vars;
  }

  // This parses and AND condition
  function parse_and($exp) {
    $vars=Array();
    $exp=trim($exp);
    $exp_parts=explode(" ",$exp);
    if($exp_parts[0]!="AND") {
      trigger_error("Expected a AND token in the query",E_USER_WARNING); 
    }  
    array_shift($exp_parts);
    $vars=explode(',',implode('',$exp_parts));
    return $vars; 
  }

  // This parses a "USING" expr in the form USING prefix for URI, prefix for URI
  function parse_using($exp) {
    $vars=Array();
    $ns=Array();
    $exp=trim($exp);
    $exp_parts=explode(" ",$exp);
    if($exp_parts[0]!="USING") {
      trigger_error("Expected a USING token in the query",E_USER_WARNING); 
    }  
    array_shift($exp_parts);
    $vars=explode(',',implode(' ',$exp_parts));
    foreach($vars as $var) {
      $var_parts=explode(' ',trim($var));
      if(strtoupper($var_parts[1])!="FOR") {
         trigger_error("Expected a for token in the USING part: $exp",E_USER_WARNING); 
      }
      preg_match("/\<([^>]*)\>/",$var_parts[2],$reqs);
      $var_parts[2]=$reqs[1];
      $ns[$var_parts[0]]=$var_parts[2];
    }
    return $ns;
  }

  
  // This function filters the tuples passed as arguments according to the filter
  function filter_tuples(&$tuples,$filter) {
    $toelim=Array();
    for($i=0;$i<count($tuples);$i++) {
      $a_tuple=$tuples[$i];
      $a_filter=$filter;
      foreach($a_tuple as $varname=>$value) {
        $a_filter=str_replace($varname,"\"$value\"",$a_filter);
      }
      $php_code='return('.$a_filter.');';
      //print("code: $php_code");
      $result=eval($php_code);
      if(!$result) {
        $toelim[]=$i;
      } 
    }
    foreach($toelim as $i) {
      unset($tuples[$i]);
    }
  }


  // IN: $sources array with the names of RDF documents stored (keys)
  // IN: $conditions array with the coditions to be evaluated
  // IN: $ns array with the namespaces
  function find_matching_tuples($sources,$conditions,$ns) {
     $vec='';
     //$iterator=new RDF_triplets_iterator();
     foreach($conditions as $condition) {
       $condition=trim($condition);
       //print("Condition: $condition<br/>");
       preg_match("/\(([^)]*)\)/",$condition,$reqs);
       $elems=explode(',',$reqs[1]);
       // Check each element, if it is <something:foo> then replace it by the 
       // namespace
       if($elems[0]{0}=='<') {
         preg_match("/\<([^>]*)\>/",$elems[0],$reqs);
         $elems[0]=$reqs[1];
         $predicate_parts=explode(':',$elems[0]);
         $elems[0]=$ns[$predicate_parts[0]].$predicate_parts[1]; 
       }
       if($elems[1]{0}=='<') {
         preg_match("/\<([^>]*)\>/",$elems[1],$reqs);
         $elems[1]=$reqs[1];
         $predicate_parts=explode(':',$elems[1]);
         $elems[1]=$ns[$predicate_parts[0]].$predicate_parts[1]; 
       }
       if($elems[2]{0}=='<') {
         preg_match("/\<([^>]*)\>/",$elems[2],$reqs);
         $elems[2]=$reqs[1];
         $predicate_parts=explode(':',$elems[2]);
         $elems[2]=$ns[$predicate_parts[0]].$predicate_parts[1]; 
       }
       $a_vec=$this->iterator->find_tuples($sources,$elems[0],$elems[1],$elems[2]);
       //print_r($a_vec);print("<br/>");
       if($vec) {
         $vec=$this->array_sql_join($a_vec,$vec);
       } else {
         $vec=$a_vec; 
       } 
      
     }
     return $vec;
  }
} //end of class


// This class implements an iterator for RDF documents
// using URLs or filenames (paths) to locate the documents.
class RDF_document_iterator extends RDF_iterator {
   var $rdf_parser;
   var $subject;
   var $object;
   var $predicate;
   var $tuples=Array();
   
   function init($sources) {
    
   }
  
   function get_tuple() {
    
   }
  
   function find_tuples($sources,$subject,$predicate,$object) {
     $this->subject=$subject;
     $this->predicate=$predicate;
     $this->object=$object;
     $this->tuples=Array(); 
     foreach($sources as $source) {
       preg_match("/\<([^>]*)\>/",$source,$reqs);
       $source=$reqs[1];
       $this->rdf_parser=new Rdf_parser();
       $this->rdf_parser->rdf_parser_create( NULL );
       $this->rdf_parser->rdf_set_statement_handler( "_statement_handler" );
       $this->rdf_parser->rdf_set_user_data( $this );
       $input=fopen($source,"r");
       $done=false;
       if(!$input) {
         $done=true; 
       }
       $done=false;
       while(!$done) {
         $buf = fread( $input, 512 );
         $done = feof($input);
         if ( ! $this->rdf_parser->rdf_parse( $buf, strlen($buf), feof($input) ) ) {
          $done=true;
         } 
       } 
       fclose($input);
       $this->rdf_parser->rdf_parser_free();
     }
     return $this->tuples;     
   }

   function RDF_document_iterator() {
     
   } 
} // end of class


// This is the statement handler used by the RDF parser in the Document Iterator
function _statement_handler(&$user_data,$subject_type,$subject,$predicate,$ordinal,$object_type,$object,$xml_lang) {
  if($user_data->tuple_match($user_data->subject,$subject) &&
     $user_data->tuple_match($user_data->predicate,$predicate) &&
     $user_data->tuple_match($user_data->object,$object) ) {
    $result=Array();
    if($user_data->subject{0}=='?') {
      $result[$user_data->subject]=$subject; 
    }
    if($user_data->predicate{0}=='?') {
      $result[$user_data->predicate]=$predicate; 
    }
    if($user_data->object{0}=='?') {
      $result[$user_data->object]=$object; 
    }
    if(count($result)>0) {
      $user_data->tuples[]=$result; 
    }
  }
}

// This is an iterator for RDF triplets the sources in the
// FROM part of the RDQL expression must be PHP vars in the
// form $var
class RDF_triplets_iterator extends RDF_iterator {
  var $cosa;
  var $index;
  var $tuples;

  
  function init() {
    $this->index=0;
  }

  function find_tuples($sources,$subject,$predicate,$object) {
    $ret=Array();
    $this->init();
    $elems[0]=$subject;
    $elems[1]=$predicate;
    $elems[2]=$object;
    foreach($sources as $source) {
      // remove '$' from source
      preg_match("/\<([^>]*)\>/",$source,$reqs);
      $source=$reqs[1];
      if($source{0}=='$') {
        $source=substr($source,1); 
      }
      global $$source;
      $this->tuples=$$source;
      $this->init();
      while($tuple=$this->get_tuple()) {
        if($this->tuple_match($elems[0],$tuple[0]) && $this->tuple_match($elems[1],$tuple[1]) && $this->tuple_match($elems[2],$tuple[2])) {
          $result=Array();
          for($i=0;$i<3;$i++) {
            if($elems[$i]{0}=='?') {
              $result[$elems[$i]]= $tuple[$i];
            }   
          }
          if(count(result)>0) {
            $ret[]=$result;
          }
        }
      }
    }
    return $ret; 
  }

  function get_tuple() {
    if($this->index>=count($this->tuples)) {
      return false; 
    }
    $elem=$this->tuples[$this->index];
    $this->index++;
    return $elem;
  }

} // end of class


?>