topical media & game development
server-php-xml-class-rss-parser-class-rdf-parser.php / php
<?php
// # # # # # # # # # # # # # # # # # # # ###
// Title : Class Rdf_parser
// Version : 1.0
// Author : Jason Diammond -repat RDF parser-
// : Luis Argerich -PHP version of repat- (lrargerich@yahoo.com)
// Last modification date : 06-13-2002
// Description : A port to PHP of the Repat an RDF parser.
// This parser based on expat parses RDF files producing events
// proper of RDF documents.
// # # # # # # # # # # # # # # # # # # # ###
// History:
// 06-13-2002 : First version of this class.
// 07-17-2002 Minor bugfix (Leandro Mariano Lopez)
// # # # # # # # # # # # # # # # # # # # ###
// To-Dos:
//
// # # # # # # # # # # # # # # # # # # # ###
// How to use it:
// Read the documentation in rdf_parser.html
// # # # # # # # # # # # # # # # # # # # ###
if(defined("_class_rdf_is_included")) {
// do nothing since the class is already included
} else {
define("_class_rdf_is_included",1);
/* First we define some constants */
define("XML_NAMESPACE_URI","http://www.w3.org/XML/1998/namespace" );
define("XML_LANG","lang");
define("RDF_NAMESPACE_URI","http://www.w3.org/1999/02/22-rdf-syntax-ns#" );
define("RDF_RDF","RDF");
define("RDF_DESCRIPTION","Description");
define("RDF_ID","ID");
define("RDF_ABOUT","about");
define("RDF_ABOUT_EACH","aboutEach");
define("RDF_ABOUT_EACH_PREFIX","aboutEachPrefix");
define("RDF_BAG_ID","bagID");
define("RDF_RESOURCE","resource");
define("RDF_VALUE","value");
define("RDF_PARSE_TYPE","parseType");
define("RDF_PARSE_TYPE_LITERAL","Literal");
define("RDF_PARSE_TYPE_RESOURCE","Resource");
define("RDF_TYPE","type");
define("RDF_BAG","Bag");
define("RDF_SEQ","Seq");
define("RDF_ALT","Alt");
define("RDF_LI","li");
define("RDF_STATEMENT","Statement");
define("RDF_SUBJECT","subject");
define("RDF_PREDICATE","predicate");
define("RDF_OBJECT","object");
define("NAMESPACE_SEPARATOR_CHAR",'^');
define("NAMESPACE_SEPARATOR_STRING","^");
//define("FALSE",0);
//define("TRUE",1);
define("IN_TOP_LEVEL",0);
define("IN_RDF",1);
define("IN_DESCRIPTION",2);
define("IN_PROPERTY_UNKNOWN_OBJECT",3);
define("IN_PROPERTY_RESOURCE",4);
define("IN_PROPERTY_EMPTY_RESOURCE",5);
define("IN_PROPERTY_LITERAL",6);
define("IN_PROPERTY_PARSE_TYPE_LITERAL",7);
define("IN_PROPERTY_PARSE_TYPE_RESOURCE",8);
define("IN_XML",9);
define("IN_UNKNOWN",10);
//define("IN_PROPERTY_PARSE_TYPE_LITERAL",9);
define("RDF_SUBJECT_TYPE_URI",0);
define("RDF_SUBJECT_TYPE_DISTRIBUTED",1);
define("RDF_SUBJECT_TYPE_PREFIX",2);
define("RDF_SUBJECT_TYPE_ANONYMOUS",3);
define("RDF_OBJECT_TYPE_RESOURCE",0);
define("RDF_OBJECT_TYPE_LITERAL",1);
define("RDF_OBJECT_TYPE_XML",2);
class Rdf_parser {
var e["parent"]=Array(); // Parent is a blank Array
//e["parent"]);
e["has_property_atributes"]=0;
e["subject_type"]=0;
e["predicate"]='';
e["members"]=0;
e["xml_lang"]='';
e["statements"]=0;
e;
}
function _copy_element(destination )
{
if( destination["parent"] = destination["state"] = destination["xml_lang"] = e)
{
e["predicate"]='';
e["bag_id"]='';
e["parent"])) {
if( e["parent"]["xml_lang"] != e["xml_lang"]='';
}
}
else
{
e["xml_lang"]='';
}
//memset( e, 0, strlen( _rdf_element ) );
e["state"]=0;
e["has_member_attributes"]=0;
e["subject"]='';
e["ordinal"]=0;
e["data"]='';
e["bag_id"]='';
e["statement_id"]='';
}
function _push_element()
{
if(!isset(this->rdf_parser["free"]=Array();
}
if(count(e = e["parent"])) {
e["parent"];
} else {
this->_new_element();
}
}
else
{
this->_new_element();
}
if(!isset(this->rdf_parser["top"]=Array();
}
this->rdf_parser["top"], this->rdf_parser["top"] = e = this->rdf_parser["top"] = this->_clear_element( this->rdf_parser["free"])) {
this->rdf_parser["free"];
} else {
this->rdf_parser["free"] = local_name )
{
return ( local_name )
{
return ( local_name )
{
local_name{0}=='_')
if( ordinal = substr(ordinal > 0 ) ? local_name )
{
return local_name )
|| local_name );
}
function _is_rdf_property_element( local_name == RDF_TYPE )
|| ( local_name == RDF_PREDICATE )
|| ( local_name == RDF_VALUE )
|| ( local_name{0} == '_' );
}
function _istalnum(val);
}
function _istalpha(val);
}
function _is_absolute_uri(result = false;
uri && uri{uri_p;
while( (uri))
&& ( uri{uri{uri{uri{uri_p;
}
uri{result;
}
/*
This function returns an associative array returning any of the various components of the URL that are present. This includes the
url)
scheme - e.g. http
host
port
user
pass
path
query - after the question mark ?
fragment - after the hashmark #
*/
function _parse_uri(buffer,scheme,&path,&fragment ) {
uri);
if(isset(scheme=scheme='';
}
if(isset(host=host='';
}
if(isset(authority=authority='';
}
if(isset(path=path='';
}
if(isset(query=query='';
}
if(isset(fragment=fragment='';
}
}
function _resolve_uri_reference(reference_uri,&length )
{
reference_buffer='';
buffer = '';
reference_uri,reference_buffer ),reference_authority,
reference_query,reference_scheme == ''
&& reference_path == ''
&& buffer=reference_fragment != '' )
{
buffer.=reference_scheme != '' )
{
reference_uri;
}
else
{
base_uri,
base_buffer ),
base_authority,
base_query,
result_scheme = reference_authority != '' )
{
reference_authority;
}
else
{
base_authority;
if( reference_path{0} == '/')
|| (result_path = p = '';
path_buffer;
p = strstr( p )
{
base_path, '\\' );
}
if( path_buffer.=reference_path != '' )
{
reference_path;
}
{
//remove all occurrences of "./"
//print(path_buffer=preg_replace("/\/\.\//","/",path_buffer=preg_replace("/\/([^\/\.])*\/..path_buffer);
while(preg_match("/\.\./",path_buffer=preg_replace("/\/([^\/\.]*)\/..\//","/",path_buffer=preg_replace("/\.path_buffer);
}
}
}
// This replaces the C pointer assignament
path_buffer;
if( buffer=buffer.=":";
}
if( buffer.="//";
result_authority;
}
if( buffer.=reference_query != '' )
{
buffer.=reference_fragment != '' )
{
buffer.=id )
{
p = p_p=0;
if( this->_istalpha( p{0} == '_'
|| result = true;
while( p{++this->_istalnum( p_p} )
|| p_p} == '.'
|| p_p} == '-'
|| p_p} == '_'
|| p_p} == ':' ) )
{
result;
}
function _resolve_id(buffer,id_buffer='';
if( id ) == true )
{
id";
}
else
{
id_buffer."#_bad_ID_attribute_");
}
this->rdf_parser["base_uri"], buffer, name, &len,&local_name )
{
static buffer=buffer, NAMESPACE_SEPARATOR_CHAR ) )
{
buffer);
cosas[0];
cosas[1];
}
else
{
if( ( buffer{ 1 } == 'm' )
&& ( buffer{ 3 } == ':' ) )
{
local_name = substr(namespace_uri = '';
buffer;
}
}
}
function _generate_anonymous_uri(&len )
{
this->rdf_parser["anonymous_id"])) {
this->rdf_parser["anonymous_id"]++;
this->rdf_parser["anonymous_id"];
this->rdf_parser["base_uri"], buf, subject_type, predicate, object_type, xml_lang, statements, statement_id_type = RDF_SUBJECT_TYPE_URI;
predicate_buffer='';
if( this->rdf_parser["statement_handler"](subject_type,predicate,object_type,xml_lang );
if( statements == '' )
{
bag_id,
RDF_NAMESPACE_URI.RDF_TYPE,
0,
RDF_OBJECT_TYPE_RESOURCE,
RDF_NAMESPACE_URI.RDF_BAG,
'',
'',
'',
'' );
}
if( ! statement_id_type = RDF_SUBJECT_TYPE_ANONYMOUS;
statement_id_buffer,
strlen( statement_id = statements++;
statements;
bag_id,
statements,
RDF_OBJECT_TYPE_RESOURCE,
statement_id )
{
// rdf:type = rdf:Statement
statement_id_type,
this->_report_statement(
statement_id,
RDF_NAMESPACE_URI.RDF_SUBJECT,
0,
RDF_OBJECT_TYPE_RESOURCE,
this->_report_statement(
statement_id,
RDF_NAMESPACE_URI.RDF_PREDICATE,
0,
RDF_OBJECT_TYPE_RESOURCE,
this->_report_statement(
statement_id,
RDF_NAMESPACE_URI.RDF_OBJECT,
0,
object,
'',
'',
'',
'' );
}
}
}
function _report_start_parse_type_literal()
{
if( this->rdf_parser["start_parse_type_literal_handler"](
this->rdf_parser["end_parse_type_literal_handler"] )
{
this->rdf_parser["user_data"] );
}
}
function _handle_property_attributes(subject, xml_lang, statements )
{
attribute='';
attribute_namespace_uri='';
attribute_value='';
i = 0; isset(i ]); this->_split_name(
i ],
attribute ),
attribute_local_name );
attributes[ predicate=predicate.=attribute_namespace_uri )
{
if( attribute_local_name ) )
{
subject_type,
predicate,
0,
RDF_OBJECT_TYPE_LITERAL,
xml_lang,
statements,
'' );
}
else if( attribute_local_name ) )
{
subject_type,
predicate,
0,
RDF_OBJECT_TYPE_RESOURCE,
bag_id,
ordinal = attribute_local_name ) ) != 0 )
{
subject_type,
predicate,
attribute_value,
bag_id,
attribute_namespace_uri )
{
//do nothing
}
else if( this->_report_statement(
subject,
attribute_value,
bag_id,
name, this->rdf_parser["start_element_handler"]) )
{
this->rdf_parser["user_data"],
attributes );
}
}
function _report_end_element( this->rdf_parser["end_element_handler"]) )
{
this->rdf_parser["user_data"],
s,this->rdf_parser["character_data_handler"]) )
{
this->rdf_parser["user_data"],
len );
}
}
function _report_warning( this->rdf_parser["warning_handler"]) )
{
warning);
}
}
function _handle_resource_element( local_name, parent )
{
aux=aux2=Array();
foreach(atkey=>aux2[]=aux2[]=attributes=id = '';
about_each = '';
bag_id = '';
attribute='';
attribute_local_name='';
id_buffer='';
this->rdf_parser["top"]["has_property_attributes"] = false;
i = 0; isset(i]); this->_split_name(
i ],
attribute ),
attribute_local_name );
attributes[ attribute_namespace_uri == '' )
|| ( attribute_local_name == RDF_ID )
{
attribute_value;
++attribute_local_name == RDF_ABOUT )
{
attribute_value;
++attribute_local_name == RDF_ABOUT_EACH )
{
attribute_value;
++attribute_local_name == RDF_ABOUT_EACH_PREFIX )
{
attribute_value;
++attribute_local_name == RDF_BAG_ID)
{
attribute_value;
}
else if( attribute_local_name ) )
{
this->_is_rdf_ordinal( this->rdf_parser["top"]["has_property_attributes"] = true;
this->_report_warning(
"unknown or out of context rdf attribute:".attribute_namespace_uri == XML_NAMESPACE_URI )
{
if( this->rdf_parser["top"]["xml_lang"] = attribute_namespace_uri )
{
subjects_found == 0 )
{
id_buffer, strlen( this->rdf_parser["top"]["subject"]=this->rdf_parser["top"]["subject_type"] = RDF_SUBJECT_TYPE_ANONYMOUS;
}
else if( this->_report_warning(
"ID, about, aboutEach, and aboutEachPrefix are mutually exclusive" );
return;
}
else if( this->_resolve_id( id_buffer, strlen( this->rdf_parser["top"]["subject_type"] = RDF_SUBJECT_TYPE_URI;
id_buffer;
}
else if( this->_resolve_uri_reference( about, id_buffer ) );
this->rdf_parser["top"]["subject"]=about_each )
{
this->rdf_parser["top"]["subject"]=about_each_prefix )
{
this->rdf_parser["top"]["subject"]=this->rdf_parser["top"]["subject"] == '' )
{
this->rdf_parser["top"]["subject"]=len = strlen( len > 0 )
{
//bag_id )
{
bag_id, id_buffer ) );
id_buffer;
}
// only report the type for non-rdf:Description elements.
if( (namespace_uri != RDF_NAMESPACE_URI ) )
{
namespace_uri;
local_name;
this->rdf_parser["top"]["subject_type"],
type,
'',
this->rdf_parser["top"]["statements"],
'' );
}
// if this element is the child of some property,
// report the appropriate statement.
if( this->_report_statement(
parent["parent"]["subject"],
parent["ordinal"],
RDF_OBJECT_TYPE_RESOURCE,
parent["parent"]["bag_id"],
parent["statement_id"] );
}
if( this->_handle_property_attributes(
this->rdf_parser["top"]["subject"],
this->rdf_parser["top"]["xml_lang"],
this->rdf_parser["top"]["statements"] );
}
}
function _handle_property_element( &local_name, &buffer='';
aux=aux2=Array();
foreach(atkey=>aux2[]=aux2[]=attributes=attribute_namespace_uri='';
attribute_value = '';
statement_id = '';
parse_type = '';
namespace_uri == RDF_NAMESPACE_URI )
{
if( (this->_is_rdf_ordinal( this->rdf_parser["top"]["ordinal"] > this->rdf_parser["top"]["parent"]["members"] = this->_is_rdf_property_element( this->_report_warning(
"unknown or out of context rdf property element: ".buffer=namespace_uri == RDF_NAMESPACE_URI )
&& ( ordinal='';
this->rdf_parser["top"]["ordinal"] = this->rdf_parser["top"]["ordinal"]=ordinal{ 0 } = '_' ;
this->rdf_parser["top"]["ordinal"];
}
else
{
local_name;
}
buffer;
this->rdf_parser["top"]["has_member_attributes"] = false;
for( attributes[i += 2 )
{
attributes[buffer,
strlen( attribute_namespace_uri,
attribute_value = i + 1];
// if the attribute is not in any namespace
// or the attribute is in the RDF namespace
if( ( attribute_namespace_uri == RDF_NAMESPACE_URI ) )
{
if( ( statement_id = attribute_local_name == RDF_PARSE_TYPE )
{
attribute_value;
}
else if( resource = attribute_local_name == RDF_BAG_ID )
{
attribute_value;
}
else if( attribute_local_name ) )
{
this->_report_warning(
"unknown rdf attribute: ".attribute_namespace_uri == XML_NAMESPACE_URI )
{
if( this->rdf_parser["top"]["xml_lang"] = attribute_namespace_uri )
{
statement_id && this->_report_warning(
"rdf:ID and rdf:resource are mutually exclusive" );
return;
}
if( this->_resolve_id(buffer, strlen( this->rdf_parser["top"]["statement_id"]=parse_type )
{
if( this->_report_warning(
"property elements with rdf:parseType do not allow rdf:resource" );
return;
}
if( this->_report_warning(
"property elements with rdf:parseType do not allow rdf:bagID" );
return;
}
if( this->_report_warning(
"property elements with rdf:parseType do not allow property attributes");
return;
}
if( this->_generate_anonymous_uri( buffer ) );
// since we are sure that this is now a resource property we can report it
this->rdf_parser["top"]["parent"]["subject_type"],
this->rdf_parser["top"]["predicate"],
0,
RDF_OBJECT_TYPE_RESOURCE,
this->rdf_parser["top"]["parent"]["bag_id"],
statement_id );
this->rdf_parser["top"]["state"] = IN_PROPERTY_PARSE_TYPE_RESOURCE;
this->rdf_parser["top"]["subject"]=this->rdf_parser["top"]["bag_id"]='';
}
else
{
this->rdf_parser["top"]["parent"]["subject_type"],
this->rdf_parser["top"]["predicate"],
0,
RDF_OBJECT_TYPE_XML,
'',
'',
this->rdf_parser["top"]["parent"]["statements"],
this->rdf_parser["top"]["state"] = IN_PROPERTY_PARSE_TYPE_LITERAL;
resource || this->rdf_parser["top"]["has_property_attributes"] )
{
if( subject_type = RDF_SUBJECT_TYPE_URI;
this->rdf_parser["base_uri"], buffer, strlen( subject_type = RDF_SUBJECT_TYPE_ANONYMOUS;
buffer ) );
}
this->_report_statement(
this->rdf_parser["top"]["parent"]["subject"],
this->rdf_parser["top"]["ordinal"],
RDF_OBJECT_TYPE_RESOURCE,
this->rdf_parser["top"]["parent"]["bag_id"],
bag_id )
{
bag_id, buffer ) );
buffer;
}
if( this->_handle_property_attributes(
buffer,
this->rdf_parser["top"]["xml_lang"],
this->rdf_parser["top"]["statements"] );
}
}
}
function _start_element_handler(name, buffer='';
local_name='';
/*
if( rdf_parser->top != '' && rdf_parser->top->state != IN_TOP_LEVEL )
{
++rdf_parser->anonymous_id;
}
*/
this->_split_name(
buffer,
strlen( namespace_uri,
this->rdf_parser["top"]["state"] )
{
case IN_TOP_LEVEL:
if( RDF_NAMESPACE_URI.NAMESPACE_SEPARATOR_STRING.RDF_RDF == this->rdf_parser["top"]["state"] = IN_RDF;
}
else
{
name, this->rdf_parser["top"]["state"] = IN_DESCRIPTION;
namespace_uri, attributes, '' );
break;
case IN_DESCRIPTION:
case IN_PROPERTY_PARSE_TYPE_RESOURCE:
this->_handle_property_element( local_name, this->rdf_parser["top"]["data"]='';
this->rdf_parser["top"]["state"] = IN_DESCRIPTION;
namespace_uri,
attributes,
this->_report_warning( "no markup allowed in literals" );
break;
case IN_PROPERTY_PARSE_TYPE_LITERAL:
this->_report_start_element( attributes );
break;
case IN_PROPERTY_RESOURCE:
this->_report_warning(
"no content allowed in property with rdf:resource, rdf:bagID, or property attributes" );
break;
case IN_UNKNOWN:
break;
}
}
/*
this is only called when we're in the IN_PROPERTY_UNKNOWN_OBJECT state.
the only time we won't know what type of object a statement has is
when we encounter property statements without property attributes or
content:
<foo:property />
<foo:property ></foo:property>
<foo:property> </foo:property>
notice that the state doesn't switch to IN_PROPERTY_LITERAL when
there is only whitespace between the start and end tags. this isn't
a very useful statement since the object is anonymous and can't
have any statements with it as the subject but it is allowed.
*/
function _end_empty_resource_property()
{
this->_generate_anonymous_uri(buffer ) );
this->rdf_parser["top"]["parent"]["subject_type"],
this->rdf_parser["top"]["predicate"],
buffer,
this->rdf_parser["top"]["parent"]["bag_id"],
this->rdf_parser["top"]["statement_id"] );
}
/*
property elements with text only as content set the state to
IN_PROPERTY_LITERAL. as character data is received from expat,
it is saved in a buffer and reported when the end tag is
received.
*/
function _end_literal_property()
{
if(!isset(this->rdf_parser["top"]["statement_id"]='';
}
if(!isset(this->rdf_parser["top"]["parent"]["subject_type"]='';
}
if(!isset(this->rdf_parser["top"]["parent"]["subject"]='';
}
if(!isset(this->rdf_parser["top"]["parent"]["bag_id"]='';
}
if(!isset(this->rdf_parser["top"]["parent"]["statements"]=0;
}
if(!isset(this->rdf_parser["top"]["predicate"]='';
}
if(!isset(this->rdf_parser["top"]["ordinal"]=0;
}
this->rdf_parser["top"]["parent"]["subject_type"],
this->rdf_parser["top"]["predicate"],
this->rdf_parser["top"]["data"],
this->rdf_parser["top"]["parent"]["bag_id"],
this->rdf_parser["top"]["statement_id"] );
}
function _end_element_handler( name )
{
switch( this->_report_end_element( this->_end_empty_resource_property();
break;
case IN_PROPERTY_LITERAL:
this->_pop_element( );
break;
case IN_PROPERTY_PARSE_TYPE_LITERAL:
this->_pop_element();
}
function _character_data_handler( s)
{
s);
switch( this->rdf_parser["top"]["data"]) )
{
this->rdf_parser["top"]["data"] );
s;
}
else
{
s;
}
if( i = 0; (( len ) && ( ereg(" |\n|\t",i }) )); i++;
/* if we found non-whitespace, this is a literal */
if( len )
{
this->_report_character_data(
s) );
break;
case IN_RDF:
case IN_DESCRIPTION:
case IN_PROPERTY_RESOURCE:
case IN_PROPERTY_EMPTY_RESOURCE:
case IN_PROPERTY_PARSE_TYPE_RESOURCE:
case IN_UNKNOWN:
break;
}
}
/* public functions */
function rdf_parser_create( parser = xml_parser_create_ns( parser,XML_OPTION_CASE_FOLDING,0);
parser;
xml_set_object(this);
xml_set_element_handler( this->rdf_parser["xml_parser"], "_character_data_handler" );
return z=3;
// xml_parser_free(
this->rdf_parser["base_uri"]='';
this->rdf_parser );
unset( user_data )
{
user_data;
}
function rdf_get_user_data( )
{
return ( user_data"] );
}
function rdf_set_statement_handler(this->rdf_parser["statement_handler"] = start,this->rdf_parser["start_parse_type_literal_handler"] = this->rdf_parser["end_parse_type_literal_handler"] = start,this->rdf_parser["_start_element_handler"] = this->rdf_parser["_end_element_handler"] = handler)
{
handler;
}
function rdf_set_warning_handler(this->rdf_parser["warning_handler"] = s, is_final )
{
return XML_Parse( s, this->rdf_parser["xml_parser"]);
}
function rdf_set_base(this->rdf_parser["base_uri"]=this->rdf_parser["base_uri"];
}
function rdf_resolve_uri(buffer)
{
_resolve_uri_reference( uri_reference, buffer) );
}
}
}
?>
(C) Æliens
20/2/2008
You may not copy or print any of this material without explicit permission of the author or the publisher.
In case of other copyright issues, contact the author.