I am trying to write a tool to check the availability and availability of a proxy server. So far I have come up with two methods in the class below (I removed the setters and getters unnecessary to this question).
The first method uses cURLand tries to request a page through a proxy, the second tool uses fsockopenand just tries to open a connection to the proxy.
class ProxyList {
const VALIDATION_URL = "http://m.www.yahoo.com/robots.txt";
const TIMEOUT = 3;
private static $valid = array();
private $proxies = array();
public function validate($useCache=true) {
$mh = curl_multi_init();
$ch = null;
$handles = array();
$delay = count($this->proxies) * 10000;
$running = null;
$proxies = array();
$response = null;
foreach ( $this->proxies as $p ) {
if ( $useCache && !empty(self::$valid[$p]) ) {
$proxies[] = $p;
continue;
}
$ch = curl_init();
curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
curl_setopt($ch, CURLOPT_URL, self::VALIDATION_URL);
curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, true);
curl_setopt($ch, CURLOPT_PROXY, $p);
curl_setopt($ch, CURLOPT_NOBODY, true);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_TIMEOUT, self::TIMEOUT);
curl_multi_add_handle($mh, $ch);
$handles[$p] = $ch;
}
do {
curl_multi_exec($mh, $running);
usleep($delay);
} while ( $running );
foreach ( $handles as $proxy => $ch ) {
$status = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ( $status >= 200 && $status < 300 ) {
self::$valid[$proxy] = true;
$proxies[] = $proxy;
}
else {
self::$valid[$proxy] = false;
}
curl_multi_remove_handle($mh, $ch);
}
curl_multi_close($mh);
return $this->proxies = $proxies;
}
public function validate2($useCache=true) {
$proxies = array();
foreach ( $this->proxies as $proxy ) {
if ( $useCache && !empty(self::$valid[$proxy]) ) {
$proxies[] = $proxy;
continue;
}
list($host, $post) = explode(":", $proxy);
if ( $conn = @fsockopen($host, $post, $errno, $error, self::TIMEOUT) ) {
self::$valid[$proxy] = true;
$proxies[] = $proxy;
fclose($conn);
} else {
self::$valid[$proxy] = false;
}
}
return $this->proxies = $proxies;
}
}
Until now, I prefer the method cURL, as it allows me to check large batches of proxies in parallel, which is evil fast, and not one at a time, like fsockopen.
I have not done much work with proxies, so it’s hard for me to say if one of these methods is enough to check the availability of the proxy server or if there is a better method that I am missing.