mashup-delicious-06-example6-8-Snoopy.class.php / php
***********************************************Snoopy - the PHP net client Author: Monte Ohrt <monte@ispi.net> Copyright (c): 1999-2000 ispi, all rights reserved Version: 1.01 * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA You may contact the author of Snoopy by e-mail at: monte@ispi.net Or, write to: Monte Ohrt CTO, ispi 237 S. 70th suite 220 Lincoln, NE 68510 The latest version of Snoopy can be obtained from: http://snoopy.sourceforge.net/ *************************************************/ class Snoopy { ** Public variables ***/* user definable vars */ var port = 80; // port we are connecting to var proxy_port = ""; // proxy port to use var proxy_pass = ""; // proxy password to use var referer = ""; // referer info to pass var cookies["username"]="joe"; var rawheaders["Content-type"]="text/html"; var lastredirectaddr = ""; // contains address of last redirected address var maxframes = 0; // frame content depth maximum. 0 = disallow var passcookies = true; // pass set cookies back through redirects // NOTE: this currently does not respect // dates, domains or paths. var pass = ""; // password for http authentication // http accept types var results = ""; // where the content is put var response_code = ""; // response code returned from server var maxlength = 500000; // max return data length (body) var timed_out = false; // if a read operation timed out var temp_dir = "/tmp"; // temporary directory that the webserver // has permission to write to. // under Windows, this should be C:\temp var ** Private variables ***var _maxlinelen = 4096; // max line length (headers) var _httpversion = "HTTP/1.0"; // default http request version var _submit_type = "application/x-www-form-urlencoded"; // default submit type var _redirectaddr = false; // will be set if page fetched is a redirect var _frameurls = array(); // frame src urls var _isproxy = false; // set if using a proxy server var URI the location of the page to fetch Output: URI) { //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",URI_PARTS); URI); if (!empty(this->user = URI_PARTS["pass"])) URI_PARTS["pass"]; if (empty(URI_PARTS["query"] = ''; if (empty(URI_PARTS["path"] = ''; switch(strtolower(this->host = URI_PARTS["port"])) URI_PARTS["port"]; if(fp)) { if(this->_httprequest(fp,this->_httpmethod); } else { URI_PARTS["path"].(URI_PARTS["query"] : ""); // no proxy, send only the path path, URI, this->_disconnect(this->_redirectaddr) { /* url was redirected, check if we've hit the max depth */ if(this->_redirectdepth) { // only follow redirect if it's on this site, or offsiteok is true if(preg_match("|^http://".preg_quote(this->_redirectaddr) || this->_redirectdepth++; this->_redirectaddr; this->_redirectaddr); } } } if(this->maxframes && count(frameurls = this->_frameurls = array(); while(list(,frameurls)) { if(this->maxframes) { frameurl); this->curl_path) return false; if(function_exists("is_executable")) if (!is_executable(this->host = URI_PARTS["port"])) URI_PARTS["port"]; if(this->_httpsrequest(URI,path = URI_PARTS["query"] ? "?".this->_httpsrequest(URI, this->_redirectaddr) { /* url was redirected, check if we've hit the max depth */ if(this->_redirectdepth) { // only follow redirect if it's on this site, or offsiteok is true if(preg_match("|^http://".preg_quote(this->_redirectaddr) || this->_redirectdepth++; this->_redirectaddr; this->_redirectaddr); } } } if(this->maxframes && count(frameurls = this->_frameurls = array(); while(list(,frameurls)) { if(this->maxframes) { frameurl); this->error = 'Invalid protocol "'.URI the location to post the data formvars["var"] = "val"; formfiles["var"] = "/dir/filename.ext"; Output: URI, formfiles="") { unset(postdata = formvars, URI_PARTS = parse_url(URI_PARTS["user"])) URI_PARTS["user"]; if (!empty(this->pass = URI_PARTS["query"])) URI_PARTS["path"])) URI_PARTS["scheme"])) { case "http": URI_PARTS["host"]; if(!empty(this->port = this->_connect(this->_isproxy) { // using proxy, send entire URI URI,URI,this->_submit_type,path = URI_PARTS["query"] ? "?".this->_httprequest(fp, this->_submit_method, postdata); } fp); if(this->maxredirs > URI_PARTS["scheme"]."://|", this->_redirectaddr = this->_redirectaddr,URI_PARTS["host"]); // only follow redirect if it's on this site, or offsiteok is true if(preg_match("|^http://".preg_quote(this->_redirectaddr) || this->_redirectdepth++; this->_redirectaddr; if( strpos( this->fetch(this->submit(formvars, this->_framedepth < this->_frameurls) > 0) { this->_frameurls; frameurl) = each(this->_framedepth < this->fetch(this->_framedepth++; } else break; } } } else { return false; } return true; break; case "https": if(!this->curl_path)) return false; URI_PARTS["host"]; if(!empty(this->port = this->_isproxy) { // using proxy, send entire URI URI, this->_submit_method, postdata); } else { URI_PARTS["path"].(URI_PARTS["query"] : ""); // no proxy, send only the path path, this->_submit_method, postdata); } if(this->maxredirs > URI_PARTS["scheme"]."://|", this->_redirectaddr = this->_redirectaddr,URI_PARTS["host"]); // only follow redirect if it's on this site, or offsiteok is true if(preg_match("|^http://".preg_quote(this->_redirectaddr) || this->_redirectdepth++; this->_redirectaddr; if( strpos( this->fetch(this->submit(formvars, this->_framedepth < this->_frameurls) > 0) { this->_frameurls; frameurl) = each(this->_framedepth < this->fetch(this->_framedepth++; } else break; } } return true; break; default: // not a valid protocol URI_PARTS["scheme"].'"\n'; return false; break; } return true; } /*======================================================================*\ Function: fetchlinks Purpose: fetch the links from a web page Input: this->results an array of the URLs \*======================================================================*/ function fetchlinks(this->fetch(this->lastredirectaddr) this->lastredirectaddr; if(is_array(x=0;this->results);this->results[this->_striplinks(x]); } else this->_striplinks(this->expandlinks) this->_expandlinks(URI); return true; } else return false; } /*======================================================================*\ Function: fetchform Purpose: fetch the form elements from a web page Input: this->results the resulting html form \*======================================================================*/ function fetchform(this->fetch(this->results)) { for(x<count(x++) x] = this->results[this->results = this->results); return true; } else return false; } /*======================================================================*\ Function: fetchtext Purpose: fetch the text from a web page, stripping the links Input: this->results the text from the web page \*======================================================================*/ function fetchtext(this->fetch(this->results)) { for(x<count(x++) x] = this->results[this->results = this->results); return true; } else return false; } /*======================================================================*\ Function: submitlinks Purpose: grab links from a form submission Input: this->results an array of the links from the post \*======================================================================*/ function submitlinks(formvars="", this->submit(formvars, this->lastredirectaddr) this->lastredirectaddr; if(is_array(x=0;this->results);this->results[this->_striplinks(x]); if(this->results[this->_expandlinks(x],this->results = this->results); if(this->results = this->results,URI where you are submitting from Output: URI, formfiles = "") { if(URI,formfiles)) { if(URI = this->results)) { for(x<count(x++) { x] = this->results[this->expandlinks) x] = this->results[URI); } } else { this->_striptext(this->expandlinks) this->_expandlinks(URI); } return true; } else return false; } /*======================================================================*\ Function: set_submit_multipart Purpose: Set the form submission content type to multipart/form-data \*======================================================================*/ function set_submit_multipart() { this->_submit_type = "application/x-www-form-urlencoded"; } /*======================================================================*\ Private functions \*======================================================================*/ /*======================================================================*\ Function: _striplinks Purpose: strip the hyperlinks from an html document Input: match an array of the links \*======================================================================*/ function _striplinks(document,key,links[2])) { if(!empty(match[] = key,links[3])) { if(!empty(match[] = match; } /*======================================================================*\ Function: _stripform Purpose: strip the form elements from an html document Input: match an array of the links \*======================================================================*/ function _stripform(document,match = implode("\r\n",match; } /*======================================================================*\ Function: _striptext Purpose: strip the text from an html document Input: text the resulting text \*======================================================================*/ function _striptext(search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript "'<[\/\!]*?[^<>]*?>'si", // strip out html tags "'([\r\n])[\s]+'", // strip out white space "'&(quot|#34|#034|#x22);'i", // replace html entities "'&(amp|#38|#038|#x26);'i", // added hexadecimal values "'&(lt|#60|#060|#x3c);'i", "'&(gt|#62|#062|#x3e);'i", "'&(nbsp|#160|#xa0);'i", "'&(iexcl|#161);'i", "'&(cent|#162);'i", "'&(pound|#163);'i", "'&(copy|#169);'i", "'&(reg|#174);'i", "'&(deg|#176);'i", "'&(#39|#039|#x27);'", "'&(euro|#8364);'i", // europe "'&a(uml|UML);'", // german "'&o(uml|UML);'", "'&u(uml|UML);'", "'&A(uml|UML);'", "'&O(uml|UML);'", "'&U(uml|UML);'", "'ß'i", ); text = preg_replace(replace,text; } /*======================================================================*\ Function: _expandlinks Purpose: expand each link into a fully qualified URL Input: URI the full URI to get the base from Output: function _expandlinks(links,URI,match = preg_replace("|/[^\/\.]+\.[^\/\.]+match[0]); |","",match_part = parse_url(match_root = match_part["host"]; this->host)."|i", "|^(\/)|i", "|^(?!http://)(?!mailto:)|i", "|/\./|", "|/[^\/]+/\.\./|" ); match_root."/", expandedLinks = preg_replace(replace,expandedLinks; } /*======================================================================*\ Function: _httprequest Purpose: go get the http data from the server Input: fp the current open file pointer body body contents to send if any (POST) Output: \*======================================================================*/ function _httprequest(fp,http_method,body="") { this->passcookies && this->setcookies(); URI); if(empty(url = "/"; http_method." ".this->_httpversion."\r\n"; if(!empty(headers .= "User-Agent: ".this->host) && !isset(headers .= "Host: ".this->port)) this->port; this->accept)) this->accept."\r\n"; if(!empty(headers .= "Referer: ".this->cookies)) { if(!is_array(this->cookies = (array)this->cookies); if ( count(cookie_headers .= 'Cookie: '; foreach ( cookieKey => cookie_headers .= cookieVal)."; "; } cookie_headers,0,-2) . "\r\n"; } } if(!empty(this->rawheaders)) this->rawheaders; while(list(headerVal) = each(headers .= headerVal."\r\n"; } if(!empty(headers .= "Content-type: content_type == "multipart/form-data") this->_mime_boundary; body)) body)."\r\n"; if(!empty(this->pass)) this->user.":".this->proxy_user)) this->proxy_user . ':' . headers .= "\r\n"; // set the read timeout if needed if (fp, this->timed_out = false; fwrite(headers.headers.this->_redirectaddr = false; unset(currentHeader = fgets(this->_maxlinelen)) { if (this->_check_timeout(this->status=-100; return false; } if(currentHeader)) { // get URL portion of the redirect preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop(matches); // look for :// in the Location header to see if hostname is included if(!preg_match("| \/\/|",this->_redirectaddr = this->host.":".matches[2])) matches[2]; else matches[2]; } else matches[2]; } if(preg_match("|^HTTP/|",currentHeader, this->status= this->response_code = this->headers[] = results = ''; do { fp, _data) == 0) { break; } _data; } while(true); if (this->_check_timeout(this->status=-100; return false; } // check if there is a a redirect meta tag if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",match)) { this->_expandlinks(URI); } // have we hit our frame depth and is there frame src to fetch? if((this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",match)) { results; for(x<count(x++) this->_expandlinks(x],this->host); } // have we already fetched framed content? elseif(is_array(this->results[] = this->results = url the url to fetch body body contents to send if any (POST) Output: \*======================================================================*/ function _httpsrequest(URI,content_type="",this->passcookies && this->setcookies(); URI_PARTS = parse_url(url)) headers[] = url." ".this->agent)) this->agent; if(!empty(this->port)) this->host.":".headers[] = "Host: ".this->accept)) this->accept; if(!empty(headers[] = "Referer: ".this->cookies)) { if(!is_array(this->cookies = (array)this->cookies); if ( count(cookie_str = 'Cookie: '; foreach ( cookieKey => cookie_str .= cookieVal)."; "; } cookie_str,0,-2); } } if(!empty(this->rawheaders)) this->rawheaders; while(list(headerVal) = each(headers[] = headerVal; } if(!empty(content_type == "multipart/form-data") content_type; boundary=".headers[] = "Content-type: body)) body); if(!empty(this->pass)) this->user.":".curr_header = 0; headers); safer_header = strtr( curr_header], "\"", " " ); safer_header."\""; } if(!empty(cmdline_params .= " -d \"this->read_timeout > 0) this->read_timeout; temp_dir, "sno"); URI, "\"", " " ); // strip quotes from the URI to avoid shell access exec(headerfile\"".safer_URI."\"",return); if(this->error = "Error: cURL could not retrieve the document, error results = implode("\r\n",result_headers = file("this->_redirectaddr = false; unset(currentHeader = 0; result_headers); result_headers[result_headers[matches); // look for :// in the Location header to see if hostname is included if(!preg_match("| \/\/|",this->_redirectaddr = this->host.":".matches[2])) matches[2]; else matches[2]; } else matches[2]; } if(preg_match("|^HTTP/|",currentHeader])) result_headers[this->headers[] = currentHeader]; } // check if there is a a redirect meta tag if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",match)) { this->_expandlinks(URI); } // have we hit our frame depth and is there frame src to fetch? if((this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",match)) { results; for(x<count(x++) this->_expandlinks(x],this->host); } // have we already fetched framed content? elseif(is_array(this->results[] = this->results = headerfile"); return true; } /*======================================================================*\ Function: setcookies() Purpose: set cookies for a redirection \*======================================================================*/ function setcookies() { for(x<count(x++) { if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', x],this->cookies[match[2]); } } /*======================================================================*\ Function: _check_timeout Purpose: checks whether timeout has occurred Input: function _check_timeout(fp) { if (fp_status = socket_get_status(fp_status["timed_out"]) { fp file pointer \*======================================================================*/ function _connect(&this->proxy_host) && !empty(this->_isproxy = true; this->proxy_host; this->proxy_port; } else { this->host; this->port; } fp = fsockopen( port, errstr, this->status = errno) { case -3: this->error="dns lookup failure (-4)"; case -5: this->error="connection failed (".fp file pointer \*======================================================================*/ function _disconnect(fp)); } /*======================================================================*\ Function: _prepare_post_body Purpose: Prepare post body according to encoding type Input: formfiles - form upload files Output: post body \*======================================================================*/ function _prepare_post_body(formfiles) { settype(formfiles, "array"); formvars) == 0 && count(this->_submit_type) { case "application/x-www-form-urlencoded": reset(key,formvars)) { if (is_array(val)) { while (list(cur_val) = each(postdata .= urlencode(cur_val)."&"; } } else key)."=".urlencode(this->_mime_boundary = "Snoopy".md5(uniqid(microtime())); reset(key,formvars)) { if (is_array(val)) { while (list(cur_val) = each(postdata .= "--".postdata .= "Content-Disposition: form-data; name=\"postdata .= "postdata .= "--".postdata .= "Content-Disposition: form-data; name=\"postdata .= "formfiles); while (list(file_names) = each(file_names, "array"); while (list(, file_names)) { if (!is_readable(fp = fopen(file_content = fread(file_name)); fclose(base_name = basename(postdata .= "--".postdata .= "Content-Disposition: form-data; name=\"base_name\"\r\n\r\n"; file_content\r\n"; } } this->_mime_boundary."--\r\n"; break; } return
[]readme course(s) prefaceI 1 2II 3 4III 5 6 7IV 8 9 10V 11 12 afterthought(s)appendix reference(s) example(s)resource(s) _![]()
(C) Æliens 20/2/2008
You may not copy or print any of this material without explicit permission of the author or the publisher. In case of other copyright issues, contact the author.