server-webapi-code-03-fixingBrokenHTML.php / php
<?php config = array('indent' => TRUE, 'output-html' => TRUE, 'wrap' => 200, 'clean' => TRUE); brokenHTML, tidy); echo tidy_get_output(xml, updatedStories = 0; foreach(story) { story->children( "http://purl.org/rss/1.0/modules/content/" ); content->encoded; if (saveFeed(source, story->pubDate, story->link) == 2) { break; } updatedStories; } function saveFeed(source, date, link) { if (strlen(pk = md5(guid); }else { source . linkID = db_connect(); //We still don't want any HTML tags in the title of the item title)); //Clean broken HTML first, to avoid problems with other steps tidy = tidy_parse_string(config, 'UTF8'); tidy_clean_repair(content = tidy_get_output(//Confirm HTML links are absolute, and append the url to the link content = preg_replace('/<a\s+.*?href=[\"\']?([^\"\'>]*)[\"\']?\s?(title=[\"\']?([^\"\'>]*)[\"\']?)?[^>]*>(.*?)<\/a>/ie', "cleanAndDisplayHREF('content); //Display images as images, but load from local server <img\s+.*?src="([^\"\' >]*)"\s?(width="([0-9]*)")?\s?(height="([0-9]*)")?[^>]*>/ie', "retreiveImages('source', '\\0','\\1','\\2','\\3','\\4', '\\5')", content = mysql_real_escape_string(strip_tags(<p><img><a>")); link = mysql_real_escape_string(source = mysql_real_escape_string(date = strtotime(date == -1) { query = "REPLACE INTO 03_feed_raw (`id`, `source`, `title`, `date`, `content`, `link`) VALUES ('source', 'date'), 'link')"; return replaceQuery(linkID); } ?>
(C) Æliens 20/2/2008
You may not copy or print any of this material without explicit permission of the author or the publisher. In case of other copyright issues, contact the author.