home
/news
chat
photolog
sitemap
Sitemap Code
Snippet
// Configure the initial page here: $url="http://www.ingmardeboer.nl/test.php"; // Create an array to save the Urls $links = array(); // SiteMap function crawls through one page function SiteMap($url, $level) { Global $links; // read the page into a string error_reporting(1); // no warnings $store = file_get_contents($url); if ($store == "") { echo "
dead link
"; return false; } // Extract the Host name from the Url preg_match('@^(?:http://)?([^/]+)@i',$url, $matches); $host = $matches[1]; // echo 'Host: ' . $host . '
'; // Extract the absolute path if (strrpos($url, "/") > 10) $root= substr($url,0,strrpos($url, "/")); else $root= $url; // Extract the page title preg_match('/
(.*)<\/TITLE>/ismU', $store, $titles); $title = $titles[1]; $title = preg_replace('/Ingmar\'s Website: /U', '', $title); $title = preg_replace('/Ingmars Website: /U', '', $title); if ($title == "") $title = "[Title not found]"; // Extract all links from the page preg_match_all('/href="([^"]*)"/iU', $store, $matches, PREG_SET_ORDER); // Extract filename preg_match('/[^.\/]+\.[^.\/]+$/', $url, $files); $file1 = $files[0]; // Loop inside the links and rebuild the corresponding full urls $first = true; foreach ($matches as $a) { if (strpos($a[1],'#') === FALSE && strpos($a[1],'http://') === FALSE && strpos($a[1],'@') === FALSE && strpos($a[1],'ftp://') === FALSE && strpos($a[1],'javascript') === FALSE && strpos($a[1],'favicon.ico') === FALSE && strpos($a[1],'sitemap') === FALSE && strpos($a[1],'.css') === FALSE) { if (strpos(trim($a[1]),'/') == 0 && strpos(trim($a[1]),'/') !== FALSE) $h = 'http://' . $host . trim($a[1]); else $h = $root . '/' . trim($a[1]); // Normalize "/xyz/.." $h = preg_replace('/\/[^\/]*\/\.\./U', '', $h); // Extract filename of link preg_match('/[^.\/]+\.[^\/]+$/', $h, $files); $file2 = $files[0]; // Echo the result if ($file2 != "") { if ($first) { echo "
$title
$file2
"; $first = false; } else echo "
$file2
"; } // And follow the link if (!in_array(trim($h),$links)) { $links[] = trim($h); if (strpos(trim($h),'.php') != 0 || (strpos(trim($h),'.html')) != 0 || strpos(trim($h),'.htm') != 0) { echo "
"; SiteMap(trim($h),$level+1); echo "
"; } } } } } // Start recursion echo "
"; SiteMap($url, 0); echo "
";
© 2008-2011 -
Ingmar de Boer
- Updated: 09/20/09