Added some more timestamp measuring

This commit is contained in:
Michael Vogel 2015-03-07 23:14:26 +01:00
parent 1cf3cb493f
commit dbe8275ae5
4 changed files with 211 additions and 188 deletions

View file

@ -24,30 +24,30 @@ if(!function_exists('deletenode')) {
}
function completeurl($url, $scheme) {
$urlarr = parse_url($url);
$urlarr = parse_url($url);
if (isset($urlarr["scheme"]))
return($url);
if (isset($urlarr["scheme"]))
return($url);
$schemearr = parse_url($scheme);
$schemearr = parse_url($scheme);
$complete = $schemearr["scheme"]."://".$schemearr["host"];
$complete = $schemearr["scheme"]."://".$schemearr["host"];
if (@$schemearr["port"] != "")
$complete .= ":".$schemearr["port"];
if (@$schemearr["port"] != "")
$complete .= ":".$schemearr["port"];
if(strpos($urlarr['path'],'/') !== 0)
$complete .= '/';
$complete .= $urlarr["path"];
$complete .= $urlarr["path"];
if (@$urlarr["query"] != "")
$complete .= "?".$urlarr["query"];
if (@$urlarr["query"] != "")
$complete .= "?".$urlarr["query"];
if (@$urlarr["fragment"] != "")
$complete .= "#".$urlarr["fragment"];
if (@$urlarr["fragment"] != "")
$complete .= "#".$urlarr["fragment"];
return($complete);
return($complete);
}
function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $count = 1) {
@ -70,6 +70,8 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
$siteinfo["url"] = $url;
$siteinfo["type"] = "link";
$stamp1 = microtime(true);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 1);
@ -81,9 +83,11 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
$header = curl_exec($ch);
$curl_info = @curl_getinfo($ch);
$http_code = $curl_info['http_code'];
$http_code = $curl_info['http_code'];
curl_close($ch);
$a->save_timestamp($stamp1, "network");
if ((($curl_info['http_code'] == "301") OR ($curl_info['http_code'] == "302") OR ($curl_info['http_code'] == "303") OR ($curl_info['http_code'] == "307"))
AND (($curl_info['redirect_url'] != "") OR ($curl_info['location'] != ""))) {
if ($curl_info['redirect_url'] != "")
@ -110,6 +114,8 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
if (($curl_info["content_type"] != "") AND !strstr(strtolower($curl_info["content_type"]),"html"))
return($siteinfo);
$stamp1 = microtime(true);
// Now fetch the body as well
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
@ -121,9 +127,11 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
$header = curl_exec($ch);
$curl_info = @curl_getinfo($ch);
$http_code = $curl_info['http_code'];
$http_code = $curl_info['http_code'];
curl_close($ch);
$a->save_timestamp($stamp1, "network");
// Fetch the first mentioned charset. Can be in body or header
$charset = "";
if (preg_match('/charset=(.*?)['."'".'"\s\n]/', $header, $matches))
@ -165,25 +173,25 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
$xpath = new DomXPath($doc);
$list = $xpath->query("//meta[@content]");
foreach ($list as $node) {
$attr = array();
if ($node->attributes->length)
foreach ($node->attributes as $attribute)
$attr[$attribute->name] = $attribute->value;
foreach ($list as $node) {
$attr = array();
if ($node->attributes->length)
foreach ($node->attributes as $attribute)
$attr[$attribute->name] = $attribute->value;
if (@$attr["http-equiv"] == 'refresh') {
$path = $attr["content"];
$pathinfo = explode(";", $path);
$content = "";
foreach ($pathinfo AS $value) {
if (substr(strtolower($value), 0, 4) == "url=")
$content = substr($value, 4);
}
if ($content != "") {
$siteinfo = parseurl_getsiteinfo($content, $no_guessing, $do_oembed, ++$count);
return($siteinfo);
}
}
if (@$attr["http-equiv"] == 'refresh') {
$path = $attr["content"];
$pathinfo = explode(";", $path);
$content = "";
foreach ($pathinfo AS $value) {
if (substr(strtolower($value), 0, 4) == "url=")
$content = substr($value, 4);
}
if ($content != "") {
$siteinfo = parseurl_getsiteinfo($content, $no_guessing, $do_oembed, ++$count);
return($siteinfo);
}
}
}
//$list = $xpath->query("head/title");
@ -196,8 +204,8 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
foreach ($list as $node) {
$attr = array();
if ($node->attributes->length)
foreach ($node->attributes as $attribute)
$attr[$attribute->name] = $attribute->value;
foreach ($node->attributes as $attribute)
$attr[$attribute->name] = $attribute->value;
$attr["content"] = trim(html_entity_decode($attr["content"], ENT_QUOTES, "UTF-8"));
@ -256,8 +264,8 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
foreach ($list as $node) {
$attr = array();
if ($node->attributes->length)
foreach ($node->attributes as $attribute)
$attr[$attribute->name] = $attribute->value;
foreach ($node->attributes as $attribute)
$attr[$attribute->name] = $attribute->value;
$attr["content"] = trim(html_entity_decode($attr["content"], ENT_QUOTES, "UTF-8"));
@ -285,12 +293,12 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
}
if ((@$siteinfo["image"] == "") AND !$no_guessing) {
$list = $xpath->query("//img[@src]");
foreach ($list as $node) {
$attr = array();
if ($node->attributes->length)
foreach ($node->attributes as $attribute)
$attr[$attribute->name] = $attribute->value;
$list = $xpath->query("//img[@src]");
foreach ($list as $node) {
$attr = array();
if ($node->attributes->length)
foreach ($node->attributes as $attribute)
$attr[$attribute->name] = $attribute->value;
$src = completeurl($attr["src"], $url);
$photodata = @getimagesize($src);
@ -309,7 +317,7 @@ function parseurl_getsiteinfo($url, $no_guessing = false, $do_oembed = true, $co
"height"=>$photodata[1]);
}
}
}
} else {
$src = completeurl($siteinfo["image"], $url);