topical media & game development

talk show tell print

server-php-xml-class-xquery-lite-class-xquery-lite.php / php



  <?
  //   

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

###





















// Title : XqueryLite (class_xquery_lite.php) // Version : 0.4 beta // Author : Luis Argerich (lrargerich@yahoo.com) // Last modification date : 05-21-2002 // Description : This is an implementation of a subset of the Xquery // language with intention to add new features in // next releases. It is based on flwr expressions. //

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

###





















// History: // 05-21-2002 : First release of this class from my prototype engine // 05-17-2002 : Some changes to solve W3C use-cases //

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

###





















// To-Dos: //

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

###





















// How to use it: //

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

#

###





















class XqueryLite { var result_sets=Array(); var bindings=Array(); function init() { this->result_sets=Array(); this->bindings=Array(); } function _tokenize(exp) { exprs=Array(); current=''; level=0; tok = strtok(exp," \n\t"); while (tok) { // Now see if there's a "{" in the token or a "}" in the token for(i=0;i<strlen(tok);i++) { if(substr(tok,i,1)=="{") { level++; } if(substr(tok,i,1)=="}") { level--; } } if(level==0) { if(in_array(trim(strtoupper(tok)),Array("FOR","LET","RETURN","WHERE"))) { if(strlen(current)>0) { exprs[]=current; current=''; } } } current.=tok.' '; tok = strtok(" \n\t"); } if(strlen(current)>0) { exprs[]=current; current=tok; } return exprs; } // This function is the "main" function of the flwr-lite engine, it evaluates a flwr expression // returning an XML fragment as a string. // The function won't be called only for top-level flwr expressions but for inner sub-expressions // recursively as well. function evaluate_xqueryl(expr) { result=''; qexpr=''; i=0; chr=substr(expr,i,1); level=0; query=''; while(i<strlen(expr)) { if(chr=='{') { level++; } if(chr=='}') { level--; } if( ((level>0) && (chr<>'{')) || (level>1)) { query.=chr; } if(chr=='}') { if(level==0) { if(strlen(query)>0) { result.=this->_parse_query(query); } query=''; } } if( (chr<>"{") && (chr<>"}") &&(level==0) ){ result.=chr; } i++; chr=substr(expr,i,1); } return result; } // This function returns the root element tagname of an XML // fragment that is later used for auto-adding the root // path to path expressions function _get_root_name(node) { name=node->node_name(); return name; } // This parses a flwr-lite FOR expression binding and // returns the name of the flwr-lite variable associated // the nodeset is stored in the result_sets array. // A flwr-lite FOR expression can be: // FOR name IN xmlmem(xml)/xpath_expression // or // FOR name IN xmldoc(xml)/xpath_expression // or // FOR name IN name/xpath_expression function _parse_for(expr) { result=''; tokens=split(" ",expr); name=tokens[1]; if(strtoupper(tokens[2])<>"IN") { trigger_error("Invalid FOR expresion expr <br/>",E_USER_WARNING); return false; } path=tokens[3]; // while the beginning of path is not or document then // queues the function and repeat functions=Array(); cosa=substr(path,0,6); while( (substr(path,0,1)<>'') && (substr(path,0,8)<>"document") && (substr(path,0,6)<>"xmlmem")) { preg_match("/([^(]*)\((.*)\)/",path,regs); path=regs[2]; path=substr(path,0,strlen(path)); array_unshift(functions,regs[1]); } parts=explode("/",path,2); xml_source=parts[0]; path='/'.parts[1]; // Source maybe xmldoc(path) // or xmlmem(xml) // or x if(substr(xml_source,0,8)=='document') { /* PROCESSING FROM A FILE */ ereg("document\((.*)\)",xml_source,regs); source=regs[1]; name_doc=str_replace('"','',source); if(!file_exists(name_doc)) { trigger_error("name_doc file not found", E_USER_WARNING); } doc=xmldocfile(name_doc); //rootname=_get_root_name(doc->document_element()); //path='/'.rootname.path; if(!doc) { trigger_error("XML source document name_doc was not well formed",E_USER_WARNING); } xpath=doc->xpath_init(); ctx = doc->xpath_new_context(); result=ctx->xpath_eval(path); nodes=result->nodeset; foreach(functions as f) { if(f=="distinct-values") {f="distinct";} nodes=this->f(nodes); } nodeset=Array(); foreach(nodes as node) { if(node->node_type()==XML_ATTRIBUTE_NODE) { nodeset[]=node->value; } else { nodeset[]=node->dump_node(node); } } unset(xpath); unset(doc); unset(cts); unset(result); } elseif(substr(xml_source,0,6)=='xmlmem') { /* PROCESSING FROM MEM */ ereg("xmlmem\((.*)\)",xml_source,regs); source=regs[1]; source=str_replace('"','',source); name_var=substr(source,1); // NOTE THAT THE XML STRING MUST BE GLOBAL if(!isset(GLOBALS[name_var])) { trigger_error("name_var is not visible from here plase use a global string for XML data",E_USER_WARNING); return false; } data=GLOBALS[name_var]; if(strlen(data)>0) { doc=xmldoc(data); rootname=this->_get_root_name(doc->document_element()); //path='/'.rootname.path; if(!doc) { trigger_error("XML source was not well formed",E_USER_WARNING); } xpath=doc->xpath_init(); ctx = doc->xpath_new_context(); result=ctx->xpath_eval(path); nodes=result->nodeset; foreach(functions as f) { if(f=="distinct-values") {f="distinct";} nodes=this->f(nodes); } nodeset=Array(); foreach(nodes as node) { if(node->node_type()==XML_ATTRIBUTE_NODE) { nodeset[]=node->value; } else { nodeset[]=node->dump_node(node); } } } unset(xpath); unset(doc); unset(cts); unset(result); } elseif(substr(xml_source,0,1)=='') { /* PROCESS FROM A VARIABLE */ //ereg("xmlmem\((.*)\)",xml_source,regs); source=xml_source; var_name=substr(source,1); data=this->bindings[var_name]; if(strlen(data)>0) { doc=xmldoc(data); rootname=this->_get_root_name(doc->document_element()); path='/'.rootname.path; if(!doc) { trigger_error("XML source variable name_var was not well formed",E_USER_WARNING); } xpath=doc->xpath_init(); ctx = doc->xpath_new_context(); result=ctx->xpath_eval(path); nodes=result->nodeset; foreach(functions as f) { if(f=="distinct-values") {f="distinct";} nodes=this->f(nodes); } nodeset=Array(); foreach(nodes as node) { if(node->node_type()==XML_ATTRIBUTE_NODE) { nodeset=node->value; } else { nodeset[]=node->dump_node(node); } } } unset(xpath); unset(doc); unset(cts); unset(result); } else { trigger_error("Invalid xml source xml_source <br/>",E_USER_WARNING); return false; } name_of_name=substr(name,1); // Here's where the node_set is set but (but!) we may need to apply a function this->result_sets[name_of_name]=nodeset; return name_of_name; } // Functions that can be applied to a resulting nodeset of a FOR expression // List: _distinct-values // This function eliminate duplicate results from the nodeset // We store a normalized representation of each node in the nodeset and function _distinct(nodeset) { new_nodeset=Array(); seen=Array(); cant=count(nodeset); foreach(nodeset as node) { normalized=this->normalize_elements(node); if(!in_array(normalized,seen)) { new_nodeset[]=node; seen[]=normalized; } } cant=count(new_nodeset); return new_nodeset; } // Normalize can eliminate all the tags // If the node has only one child and it is text then just the text is returned function _normalize_elements(node) { if(node->node_type()==XML_ATTRIBUTE_NODE) { return node->value; } data=trim(node->dump_node(node)); preg_match_all("/<([^>]*)>[^<]*<\/[^>]*>/",data,foo); if(count(foo[1])==1) { data=trim(preg_replace("/<.*>(.*)<\/.*>/","$1",data)); } else { if(node->node_type()==XML_ELEMENT_NODE) { data=preg_replace("/\n/"," ",data); data=preg_replace("/\t/"," ",data); data=preg_replace("/\>\s*\</","><",data); } } return data; } // This function parses an expression in the form: // name/xpath_expression // outside a FOR expression so it aways returns a // string, if the xpath expression returned an element // the element is normalized. function _parse_var(expr,norm) { result=''; // If it is a var is name/expr parts=explode("/",expr,2); var_name=substr(parts[0],1); if(strlen(parts[1])>0) { path="/".parts[1]; } data=this->bindings[var_name]; if(strlen(data)==0) { return ''; } if(strlen(path)>0) { doc=xmldoc(data); rootname=this->_get_root_name(doc->document_element()); path='/'.rootname.path; if(!doc) { trigger_error("cannot evaluate a xpath expression because data is not xml ",E_USER_WARNING); } xpath=doc->xpath_init(); ctx = doc->xpath_new_context(); result_xp=ctx->xpath_eval(path); nodes=result_xp->nodeset; if(count(nodes)>0) { foreach(nodes as a_node ) { if(norm) { res=this->_normalize_elements(a_node); result.=res; } else { if(a_node->node_type()==XML_ATTRIBUTE_NODE) { res=a_node->value; result.=res; } else { res=a_node->dump_node(a_node); result.=res; } } } } else { result=''; } } else { result=data; /* doc=xmldoc(data); root=doc->document_element(doc); if(norm) { result=_normalize_elements(root); } else { print("El resultado es el dump simple <br />"); if(root->node_type()==XML_ATTRIBUTE_NODE) { print("dumping an attribute <br/>"); result=root->value; } else { result=root->dump_node(root); } } */ } unset(xpath); unset(ctx); unset(result_xp); unset(doc); return result; } // This function is very similar to _parse_var BUT // instead of returning the result or the variable // it just counts the number of elements in the nodeset function _count_var(expr) { result=''; // If it is a var is name/expr parts=explode("/",expr,2); var_name=substr(parts[0],1); if(strlen(parts[1])>0) { path="/".parts[1]; } data=this->bindings[var_name]; if(strlen(data)==0) { return ''; } if(strlen(path)>0) { doc=xmldoc(data); rootname=this->_get_root_name(doc->document_element()); path='/'.rootname.path; if(!doc) { trigger_error("cannot evaluate a xpath expression because data is not xml ",E_USER_WARNING); } xpath=doc->xpath_init(); ctx = doc->xpath_new_context(); result_xp=ctx->xpath_eval(path); nodes=result_xp->nodeset; unset(xpath); unset(ctx); unset(result_xp); unset(doc); return count(nodes); } else { return 1; //result=data; /* doc=xmldoc(data); root=doc->document_element(doc); if(norm) { result=_normalize_elements(root); } else { print("El resultado es el dump simple <br />"); if(root->node_type()==XML_ATTRIBUTE_NODE) { print("dumping an attribute <br/>"); result=root->value; } else { result=root->dump_node(root); } } */ } return result; } // This function parses a flwr-lite where expression returning // true/false depending on the expression value // First flwr variables followed or not by an expression are // evaluated and replaced by their values // then and/or are replaced by &&/|| // then a PHP eval construction is used to eval the expression // :TODO: code this function function _parse_where(expr) { result=true; expr=ltrim(expr); wexpr=substr(expr,5); wexpr=preg_replace("/([^A-Za-z0-9])and([^A-Za-z0-9])/","$1&&$2",wexpr); wexpr=preg_replace("/([^A-Za-z0-9])or([^A-Za-z0-9])/","$1||$2",wexpr); wexpr=preg_replace("/([^=><!])=([^=])/","$1==$2",wexpr); if(strstr(wexpr,"count")) { } preg_match_all("/count\(([^)]*)\)/",wexpr,counts); for(i=0;i<count(counts[1]);i++) { cant=this->_count_var(counts[1][i]); cosa=counts[0][i]; wexpr=str_replace(cosa,"cant",wexpr); } vars=Array(); is_a_var=false; a_var=''; for(i=0;i<strlen(wexpr);i++) { chr=substr(wexpr,i,1); if(chr=="") { a_var=''; is_a_var=true; } if(is_a_var) { if(in_array(chr,Array(' ',"\t","\n",';',"\n"))) { is_a_var=false; } if(chr=="[") { predicate=true; } if(chr=="]") { predicate=false; } if(!predicate) { if(in_array(chr,Array('=','>','<','+','-','*',';',"\n"))) { is_a_var=false; } } if(!is_a_var) { vars[]=a_var; a_var=''; } } if(is_a_var && chr<>"") { a_var.=chr; } } if(is_a_var) { vars[]=a_var; } // Now each variable must be evaluated foreach(vars as exp) { exp=''.exp; ret=this->_parse_var(exp,1); // And now strreplace exp for the value ret='"'.ret.'"'; wexpr=str_replace(exp,ret,wexpr); } php_code='return('.wexpr.');'; result=eval(php_code); return result; } // This function parses a flwr-lite RETURN expression // basically a return expression just contains the word // RETURN followed by another flwr-lite query that can // contain flwr-lite expressions. function _parse_return(expr) { expr=ltrim(expr); result=''; // A return expr is retexp=substr(expr,6); sub=this->evaluate_xqueryl(retexp); return sub; } // This function parses a flwr-lite LET expression // a LET statement only binds an evaluation to a variable // name // Let won't normalize path expressions function parse_let(expr) { expr=ltrim(expr); // A let is in the form LET name := value letexpr=substr(expr,4); tokens=split(":=",letexpr); var_name=tokens[0]; var_value=tokens[1]; var_value=trim(var_value); var_name=trim(var_name); if(strstr(var_value,'')) { // We are assigning to a var value //var_value=substr(var_value,1); var_value=this->_parse_var(var_value,false); } var_name=substr(var_name,1); this->bindings[var_name]=var_value; } function _split_fors(expr) { fors=Array(); afor=''; level=0; for(i=0;i<strlen(expr);i++) { chr=substr(expr,i,1); if(chr=="[") { level++; } if(chr=="]") { level--; } if( (chr==',') && (level==0) ) { if(strlen(afor)>0) { fors[]=afor; afor=''; } } else { afor.=chr; } } if(strlen(afor)>0) { fors[]=afor; afor=''; } return fors; } // This function parses a flwr-lite expression // this function is called after filtering out XML constructs from // a flwr-lite query function _parse_query(query) { result=''; exprs=this->_tokenize(query); expr=array_shift(exprs); expr=trim(expr); tokens=split(" ",expr); what=trim(tokens[0]); if(substr(what,0,1)=="") { result.=this->_parse_var(what,0); } else { switch(strtoupper(what)) { case "FOR": // This produces a result-set // and then the rest of the expression // is evaluated for each element in the // node set // Expresion should be split in commas // but don't count commas inside pairs of [] multi_fors=this->_split_fors(expr); if(count(multi_fors)>1) { // then we have to append for lines next for(i=count(multi_fors)-1;i>0;i--) { afor=ltrim(multi_fors[i]); if(strtoupper(substr(afor,3))<>'FOR') { afor='FOR '.afor; } afor=rtrim(afor); array_unshift(exprs,afor); } expr=multi_fors[0]; } name=this->_parse_for(expr); nodes=this->result_sets[name]; foreach(nodes as node) { this->bindings[name]=node; query=implode("\n",exprs); // What follows the FOR expr result.=this->_parse_query(query); // is parsed } break; case "WHERE": // If we have a where then the rest is evaluated only if the WHERE is true if(this->_parse_where(expr)) { // :TODO: cambiar el implode query=implode("\n",exprs); result.=this->_parse_query(query); } break; case "RETURN": // If we have a return we parse the return and nothing can follow a return // Theres nothing after a return result.=this->_parse_return(expr); break; case "LET": // Parse the LET statement and continue evaluating the query this->parse_let(expr); // :TODO: cambiar el implode query=implode("\n",exprs); result.=this->_parse_query(query); break; default: // If we have something else (whitespace I hope) we process what follows // :TODO: cambiar el implode query=implode("\n",exprs); result.=this->_parse_query(query); } } return result; } } /* case1=' <bib> { for b in xmlmem(bib)/bib/book where b/publisher = "Addison-Wesley" and b/@year > 1991 return <book year="{ b/@year }"> { b/title } </book> } </bib> '; bib='<bib> <book year="1994"> <title>TCP/IP Illustrated</title> <author><last>Stevens</last><first>W.</first></author> <publisher>Addison-Wesley</publisher> <price> 65.95</price> </book> <book year="1992"> <title>Advanced XML Programming in the Unix environment</title> <author><last>Stevens</last><first>W.</first></author> <publisher>Addison-Wesley</publisher> <price>65.95</price> </book> <book year="2000"> <title>Data on the Web</title> <author><last>Abiteboul</last><first>Serge</first></author> <author><last>Buneman</last><first>Peter</first></author> <author><last>Suciu</last><first>Dan</first></author> <publisher>Morgan Kaufmann Publishers</publisher> <price> 39.95</price> </book> <book year="1999"> <title>The Economics of Technology and Content for Digital TV</title> <editor> <last>Gerbarg</last><first>Darcy</first> <affiliation>CITI</affiliation> </editor> <publisher>Kluwer Academic Publishers</publisher> <price>129.95</price> </book> </bib>'; xq=new XqueryLite(); result=xq->evaluate_xqueryl(case1); print("Result:<br />"); print("<textarea rows='20' cols='50'>result</textarea>"); */ ?>


(C) Æliens 20/2/2008

You may not copy or print any of this material without explicit permission of the author or the publisher. In case of other copyright issues, contact the author.