#!/usr/bin/perl use HTTP::Request; use LWP::UserAgent; use HTML::LinkExtor; use URI::URL; use File::Find; use strict; my $reposit = "/tmp/test" . $$ . ".txt"; # This is only a temporary file. my $base_url = "http://www.brie.com"; my $base_dir = "/home/www/htdocs"; if (! -f "$base_dir/testlinks.html") { open (OUTFILE , ">$base_dir/testlinks.html"); } else { die "$base_dir/testlinks.html already exists.\n Remove or move it before running this\n"; } open (REPOSIT,">$reposit"); finddepth(\&wanted, $base_dir); close (REPOSIT); open (INREPOSIT,"<$reposit"); # Pages to evaluate my $oldhandle = select(OUTFILE); $~ = "BEGIN_HTML"; select ($oldhandle); write OUTFILE; my $i=0; while (my $test_page = ) { chomp $test_page; my $request = new HTTP::Request("GET","$test_page"); my $ua = new LWP::UserAgent; my $response = $ua -> request($request); if ($i % 5 == 0) { print OUTFILE "\n"}; $i++; printf OUTFILE ("
%5d",$i); if ($response->is_success) { print OUTFILE qq{ },$test_page,"\n"; my $parser = HTML::LinkExtor->new(\&test_urls_cb,"$test_page"); $parser->parse($response->content); } else { print OUTFILE "--NOT--$test_page\n"; } } $oldhandle = select(OUTFILE); $~ = "END_HTML"; select ($oldhandle); write OUTFILE; unlink ($reposit) or warn "Could not unlink $reposit: $!\n"; sub get_test_pages_cb { my ($tag, %attributes) = @_; return unless ($tag =~ m/a/i); foreach my $name (sort keys %attributes) { my $url = $attributes{$name}; next unless ($url =~ m/^http:/i); print REPOSIT $url,"\n"; } } sub test_urls_cb { my ($tag, %attributes) = @_; return unless ($tag =~ m/a/i); foreach my $name (sort keys %attributes) { my $url = $attributes{$name}; next unless ($url =~ m/^http:/i); # print $url,"\n"; my $request = new HTTP::Request("GET","$url"); my $ua = new LWP::UserAgent; my $response = $ua -> request($request); if ($response->is_success) { # Add commands here is link is successful # print OUTFILE "
$url\n"; } else { print OUTFILE "
--BAD--$url\n"; } } } sub wanted { if (/\.html$/) { my $temp = $File::Find::name; $temp =~ s/\Q$base_dir//; print REPOSIT $base_url , $temp , "\n"; } } format BEGIN_HTML = Dead Links

@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Fix Your Links Map

$base_url Lynx Rules!
.

format END_HTML =

Originally developed by:

Brian Lavender "@"
.