Sometimes I have felt the need to look ahead at websites, to see what I can expect there. So today I have been playing with Perl again:
#d://perl/bin/perl -w
use HTML::Parser;
use LWP::Simple;
use URI::Escape;
use strict;
my $fn = shift;
$fn || die "You must give an URL toparse
";
my $f = get ($fn) || die "Can not find $fn
";
HTML::Parser->new(
default_h => [&check_attrs, 'text, tagname, attr'],)->parse($f ) || die $!;
sub check_attrs {
my @forbiddenprotos= qw(javascript mocha data vbscript jscript perlscript);
my $line = shift;
my $tagname = shift;
return unless $tagname;
if ($tagname eq "object"){
print "FOUND $tagname
$line
";
}
if ($tagname eq "img"){
print "FOUND $tagname
$line
";
}
if ($tagname eq "frame"){
print "FOUND $tagname
$line
";
}
if ($tagname eq "noscript"){
print "FOUND $tagname
$line
";
}
if ($tagname eq "link"){
print "FOUND $tagname
$line
";
}
if ($tagname eq "xsl"){
print "FOUND $tagname
$line
";
}
if ($tagname eq "applet"){
print "FOUND $tagname
$line
";
}
if ($tagname eq "embed"){
print "FOUND $tagname
$line
";
}
if ($tagname eq "bgsound"){
print "FOUND $tagname
$line
";
}
if ($tagname eq "iframe"){
print "FOUND $tagname
$line
";
}
if ($tagname eq "ilayer"){
print "FOUND $tagname
$line
";
}
$tagname = uri_unescape($tagname);
print "Found script tag.
$line
" if $tagname eq "script";
my $attr = shift;
my $attrs = uri_unescape(join " ", keys %$attr);
my $attrvals = uri_unescape(join " ", values %$attr);
print "
Has $1 funnet.
$line
" if $attrs=~/(on|nosave|type|content|actw+)/;
foreach (@forbiddenprotos) {
print "$_ protocol found.
$line
" if $attrvals=~/$_:/;
}
print "javascript entity found.
$line
" if $attrvals=~/&{/;
}
The script can be altered for you own needs and is run from DOS like this (if you saved it as analyze.pl)
perl analyze.pl
http://www.somesite.com/index.html -> report.txt
And the result is found in the text file report.txt
Enjoy
Best wishes
Arne
Imici username: Arne