Links

Data scraping with cURL and php 5


< ?php $db = mysql_pconnect('127.0.0.1', 'root', '');
mysql_select_db('crawl',$db) or die("Error In database : ".mysql_error()); set_time_limit(999999999); //error_reporting(0); if (!function_exists('array_combine')) { function array_combine($keys, $values) { if (count($keys) != count($values)) { return false; } foreach($keys as $key) { $array[$key] = array_shift($values); } return $array; } } function get_via($query,$url) { $ch=curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $query); curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0"); curl_setopt($ch,CURLOPT_ENCODING , "UTF-8"); $content = curl_exec($ch); curl_close($ch); return $content; } function get_file1($file, $newfilename) { $err_msg = ''; $out = fopen($newfilename, 'wb'); if ($out == FALSE) { print "File not opened "; exit; } $ch = curl_init(); curl_setopt($ch, CURLOPT_FILE, $out); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_URL, $file); curl_exec($ch); curl_close($ch); } $dom=new domdocument; $alpha=$_REQUEST['searchInput'];
for($i=0;$i<100;$i=$i+10) page="($i=" query="$alpha&searchtype=" query="$alpha&searchtype=" pagestart="">loadhtml(get_via("",$page));
$nodes = $dom->getElementsByTagName('div');
foreach($nodes as $P)
{
if($P->getAttribute('class')=='searchResultsItemHolder searchResultsArtistItemHolder')
{
$insideDiv=$P->childNodes;
foreach($insideDiv as $divElement)
{
if($divElement->nodeName=="a")
{
if($divElement->getAttribute('class')=='artistLink')
{
$data["details"]=$divElement->getAttribute('href');
$data["name"]=$divElement->getAttribute('title');
$image=$divElement->childNodes;
foreach($image as $img)
{
$data["imagename"]="GsT".rand(0,time()).".jpg";
mysql_query("INSERT INTO `tN`.`tA` (`uid` ,`art_name` ,`art_pic` ,`Track`)
VALUES ('1', '".mysql_escape_string($data["name"])."', '".mysql_escape_string($data["imagename"])."', '".mysql_escape_string($data["details"])."')") print(mysql_error());
if(mysql_insert_id()>0)
{
get_file1($img->getAttribute('src'), "images/artist/".$data["imagename"]);
}
}
}
}
}
}
}
}
?>