mashup-delicious-11-delcache-lib-extlib-Snoopy.class.inc / inc
***********************************************Snoopy - the PHP net client Author: Monte Ohrt <monte@ispi.net> Copyright (c): 1999-2000 ispi, all rights reserved Version: 1.0 * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA You may contact the author of Snoopy by e-mail at: monte@ispi.net Or, write to: Monte Ohrt CTO, ispi 237 S. 70th suite 220 Lincoln, NE 68510 The latest version of Snoopy can be obtained from: http://snoopy.sourceforge.com *************************************************/ class Snoopy { ** Public variables ***/* user definable vars */ var port = 80; // port we are connecting to var proxy_port = ""; // proxy port to use var referer = ""; // referer info to pass var cookies["username"]="joe"; var rawheaders["Content-type"]="text/html"; var lastredirectaddr = ""; // contains address of last redirected address var maxframes = 0; // frame content depth maximum. 0 = disallow var passcookies = true; // pass set cookies back through redirects // NOTE: this currently does not respect // dates, domains or paths. var pass = ""; // password for http authentication // http accept types var results = ""; // where the content is put var response_code = ""; // response code returned from server var maxlength = 500000; // max return data length (body) var timed_out = false; // if a read operation timed out var curl_path = "/usr/bin/curl"; // Snoopy will use cURL for fetching // SSL content if a full system path to // the cURL binary is supplied here. // set to false if you do not have // cURL installed. See http://curl.haxx.se // for details on installing cURL. // Snoopy does *not* use the cURL // library functions built into php, // as these functions are not stable // as of this Snoopy release. // send Accept-encoding: gzip? var ** Private variables ***var _maxlinelen = 4096; // max line length (headers) var _httpversion = "HTTP/1.0"; // default http request version var _submit_type = "application/x-www-form-urlencoded"; // default submit type var _redirectaddr = false; // will be set if page fetched is a redirect var _frameurls = array(); // frame src urls var _isproxy = false; // set if using a proxy server var URI the location of the page to fetch Output: URI) { //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",URI_PARTS); URI); if (!empty(this->user = URI_PARTS["pass"])) URI_PARTS["pass"]; switch(this->host = URI_PARTS["port"])) URI_PARTS["port"]; if(fp)) { if(this->_httprequest(fp,this->_httpmethod); } else { URI_PARTS["path"].(isset(URI_PARTS["query"] : ""); // no proxy, send only the path path, URI, this->_disconnect(this->_redirectaddr) { /* url was redirected, check if we've hit the max depth */ if(this->_redirectdepth) { // only follow redirect if it's on this site, or offsiteok is true if(preg_match("|^http://".preg_quote(this->_redirectaddr) || this->_redirectdepth++; this->_redirectaddr; this->_redirectaddr); } } } if(this->maxframes && count(frameurls = this->_frameurls = array(); while(list(,frameurls)) { if(this->maxframes) { frameurl); this->curl_path || (!is_executable(this->error = "Bad curl (this->host = URI_PARTS["port"])) URI_PARTS["port"]; if(this->_httpsrequest(URI,path = URI_PARTS["query"] ? "?".this->_httpsrequest(URI, this->_redirectaddr) { /* url was redirected, check if we've hit the max depth */ if(this->_redirectdepth) { // only follow redirect if it's on this site, or offsiteok is true if(preg_match("|^http://".preg_quote(this->_redirectaddr) || this->_redirectdepth++; this->_redirectaddr; this->_redirectaddr); } } } if(this->maxframes && count(frameurls = this->_frameurls = array(); while(list(,frameurls)) { if(this->maxframes) { frameurl); this->error = 'Invalid protocol "'.document document to strip. Output: document) { preg_match_all("'<\s*a\s+.*href\s*=\s* # find <a href= ([\"\'])? # find single or double quote (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching # quote, otherwise match up to next space 'isx",links); // catenate the non-empty matches from the conditional subpattern while(list(val) = each(val)) val; } while(list(val) = each(val)) val; } // return the links return document document to strip. Output: document) { preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",elements); // catenate the matches elements[0]); // return the links return document document to strip. Output: document) { // I didn't use preg eval (//e) since that is only available in PHP 4.0. // so, list your entities one by one here. I included some of the // more common ones. replace = array( "", "", "\\1", "\"", "&", "<", ">", " ", chr(161), chr(162), chr(163), chr(169)); search,document); return links the links to qualify expandedLinks the expanded links \*======================================================================*/ function _expandlinks(URI) { preg_match("/^[^\?]+/",match); |","",search = array( "|^http://".preg_quote(replace = array( "", expandedLinks = preg_replace(replace,expandedLinks; } /*======================================================================*\ Function: _httprequest Purpose: go get the http data from the server Input: fp the current open file pointer body body contents to send if any (POST) Output: \*======================================================================*/ function _httprequest(fp,http_method,body="") { if(this->_redirectaddr) URI_PARTS = parse_url(url)) headers = url." ".this->agent)) this->agent."\r\n"; if(!empty(this->rawheaders['Host'])) this->host."\r\n"; if(!empty(headers .= "Accept: ".this->use_gzip) { // make sure PHP was built with --with-zlib // and we can handle gzipp'ed data if ( function_exists(gzinflate) ) { this->referer)) this->referer."\r\n"; if(!empty(this->cookies)) this->cookies; reset(this->cookies) > 0 ) { this->cookies as cookieVal ) { cookieKey."=".urlencode(headers .= substr(this->rawheaders)) { if(!is_array(this->rawheaders = (array)headerKey,this->rawheaders)) headerKey.": ".content_type)) { content_type"; if (headers .= "; boundary=".headers .= "\r\n"; } if(!empty(headers .= "Content-length: ".strlen(this->user) || !empty(headers .= "Authorization: BASIC ".base64_encode(this->pass)."\r\n"; this->read_timeout > 0) socket_set_timeout(this->read_timeout); fp,body,strlen(body)); this->headers); // content was returned gzip encoded? currentHeader = fgets(this->_maxlinelen)) { if (this->_check_timeout(this->status=-100; return false; } // if(/", currentHeader)) { // get URL portion of the redirect preg_match("/^(Location:|URI:)\s+(.*)/",chop(matches); // look for :// in the Location header to see if hostname is included if(!preg_match("| \/\/|",this->_redirectaddr = this->host.":".matches[2])) matches[2]; else matches[2]; } else matches[2]; } if(preg_match("|^HTTP/|",currentHeader, this->status= this->response_code = currentHeader) ) { this->headers[] = results = fread(this->maxlength); data = fread(this->maxlength) ) { data; if ( strlen(this->maxlength ) { break; } } // gunzip if ( results = substr(results = gzinflate(this->read_timeout > 0 && fp)) { results,this->_redirectaddr = match[1],this->_framedepth < results,this->results[] = x=0; match[1]); this->_frameurls[] = match[1][URI_PARTS["scheme"]."://".this->results)) results; // no framed content else results; return true; } /*======================================================================*\ Function: _httpsrequest Purpose: go get the https data from the server using curl Input: URI the full URI function _httpsrequest(url,http_method,body="") { if(this->_redirectaddr) headers = array(); URI); if(empty(url = "/"; // GET ... header not needed for curl //http_method." ".this->_httpversion; if(!empty(headers[] = "User-Agent: ".this->host)) this->host; if(!empty(headers[] = "Accept: ".this->referer)) this->referer; if(!empty(this->cookies)) this->cookies; reset(this->cookies) > 0 ) { this->cookies as cookieVal ) { cookieKey."=".urlencode(headers[] = substr(this->rawheaders)) { if(!is_array(this->rawheaders = (array)headerKey,this->rawheaders)) headerKey.": ".content_type)) { if (headers[] = "Content-type: this->_mime_boundary; else content_type"; } if(!empty(headers[] = "Content-length: ".strlen(this->user) || !empty(headers[] = "Authorization: BASIC ".base64_encode(this->pass); for(curr_header < count(curr_header++) { headers[body)) body\""; if(cmdline_params .= " -m ".headerfile = uniqid(time()); # accept self-signed certs this->curl_path." -D \"/tmp/cmdline_params)." ".escapeshellcmd(results,return) { return."; return false; } results); headerfile"); this->headers); for(currentHeader < count(currentHeader++) { // if a header begins with Location: or URI:, set the redirect if(preg_match("/^(Location: |URI: )/i",currentHeader])) { // get URL portion of the redirect preg_match("/^(Location: |URI:)(.*)/",chop(currentHeader]),matches[2])) { // no host in the path, so prepend URI_PARTS["scheme"]."://".this->port; // eliminate double slash if(!preg_match("|^/|",this->_redirectaddr .= "/".this->_redirectaddr .= this->_redirectaddr = result_headers[this->response_code = currentHeader]; if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",match)) { match[1]; } } result_headers[results,this->_redirectaddr = match[1],this->_framedepth < results,this->results[] = x=0; match[1]); this->_frameurls[] = match[1][URI_PARTS["scheme"]."://".this->results)) results; // no framed content else results; unlink("/tmp/x=0; this->headers); this->headers[match)) match[1]] = fp file pointer \*======================================================================*/ function _check_timeout(this->read_timeout > 0) { fp); if (this->timed_out = true; return true; } } return false; } /*======================================================================*\ Function: _connect Purpose: make a socket connection Input: function _connect(&fp) { if(!empty(this->proxy_port)) { host = port = host = port = this->status = 0; if(host, errno, this->_fp_timeout )) { // socket connection succeeded return true; } else { // socket connection failed errno; switch(this->error="socket creation failed (-3)"; case -4: this->error="connection refused or timed out (-5)"; default: errno.")"; } return false; } } /*======================================================================*\ Function: _disconnect Purpose: disconnect a socket connection Input: function _disconnect(fp) { return(fclose(formvars - form variables formvars, formvars, "array"); settype(formvars) == 0 && count(this->_submit_type) { case "application/x-www-form-urlencoded": reset(key,formvars)) { if (is_array(val)) { while (list(cur_val) = each(postdata .= urlencode(cur_val)."&"; } } else key)."=".urlencode(this->_mime_boundary = "Snoopy".md5(uniqid(microtime())); reset(key,formvars)) { if (is_array(val)) { while (list(cur_val) = each(postdata .= "--".postdata .= "Content-Disposition: form-data; name=\"postdata .= "postdata .= "--".postdata .= "Content-Disposition: form-data; name=\"postdata .= "formfiles); while (list(file_names) = each(file_names, "array"); while (list(, file_names)) { if (!is_readable(fp = fopen(file_content = fread(file_name)); fclose(base_name = basename(postdata .= "--".postdata .= "Content-Disposition: form-data; name=\"base_name\"\r\n\r\n"; file_content\r\n"; } } this->_mime_boundary."--\r\n"; break; } return
[]readme course(s) prefaceI 1 2II 3 4III 5 6 7IV 8 9 10V 11 12 afterthought(s)appendix reference(s) example(s)resource(s) _![]()
(C) Æliens 20/2/2008
You may not copy or print any of this material without explicit permission of the author or the publisher. In case of other copyright issues, contact the author.