#!/usr/bin/perl # Author: Gavin Hanover (gavin@subnets.org) # Date: 9/28/2001 # Purpose: make urls worksafe # # if( msg =~ /complaint/ ) { me{'response'} = "it's beta"; } use LWP::UserAgent; # declare crap my( $UA, $req, $url, $domain, $path, $data, @data, $max, @pairs ); my( %badwords, $word ); $badwords{'fuck'} = 'fudge'; $badwords{'fucker'} = 'fudger'; $badwords{'ass'} = 'bum'; $badwords{'asshole'} = 'bumhole'; $badwords{'shit'} = 'poop'; $badwords{'dick'} = 'penis'; $badwords{'pussy'} = 'vagina'; $badwords{'gay'} = 'homosexual'; $badwords{'porn'} = 'adult material'; $badwords{'porno'} = 'adult material'; $badwords{'cock'} = 'penis'; $badwords{'bitch'} = 'female dog'; # get environment (we want a url, dammit!) @pairs = split(/&/, $ENV{'QUERY_STRING'}); # forms just *have* to be difficult.. foreach $pair (@pairs) { local($name, $value) = split(/=/, $pair); $name =~ tr/+/ /; $name =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; $name =~ tr/\0//d; $value =~ tr/+/ /; $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; $value =~ tr/\0//d; if( $name =~ "url" ) { $url = $value; } } # url == full url (ie - http://www.site.com/path/page.html) # path == path part (ie - http://www.site.com/path) # domain == domain part (ie - www.site.com) @data = split("/", $url); $domain = $data[2]; # start with this $path = "http://$domain"; # how many parts are there that we need? if( $url =~ /html$/ ) { $max = $#data; } else { $max = $#data + 1; } for( $i = 3 ; $i < $max ; $i += 1 ) { $path .= "/" . $data[$i]; } # make this an html page print "Content-type: text/html\n\n"; # if we got nothin, give em something to submit to if( $url =~ /^$/ ) { print "\n"; print "
\n"; print "enter url to check:\n"; print ""; print "
\n"; print "source
\n"; print "todo\n"; print ""; exit; } print "$url


\n\n\n"; # get the page $UA ||= LWP::UserAgent->new(timeout => 15) or exit; $req = $UA->get("$url") or exit; if( !$req->is_success ) { print "error loading page"; } #exit unless ($req->is_success); $data = $req->content; @data = split("\n", $data); # parse me, baby foreach $line ( @data ) { chomp $line; ############################## # A HREF so we don't break links #(do this first so we don't match the href's we create later) if( $line =~ /a.*href=([^>]*)>/i ) { # http://... if( $1 =~ /(\"?)http(.*)/i ) { # nothing needs to be done } # /... if( $1 =~ /(\"?)\/(.*)/ ) { $line =~ s/]*)>//ig; } # ... else { $line =~ s/]*)>//ig; } } ############################## # IMG if( $line =~ /img.*src=([^>]*)>.*/i ) { # http://.../image.jpg if( $1 =~ /(\"?)http(.*)/i ) { $line =~ s/img.*src=([^>]*)>/a href=$1>$1<\/a>
/ig; } # /.../image.jpg elsif( $1 =~ /(\"?)\/(.*)/ ) { $line =~ s/img.*src=(\"?)([^>]*)>/a href=$1http:\/\/$domain$2>$2<\/a>
/ig; } # image.jpg else { $line =~ s/img.*src=(\"?)([^>]*)>/a href=$1$path\/$2>$2<\/a>
/ig; } } ###################### # BGSOUND if( $line =~ /bgsound.*src=([^>]*)>/i ) { # http://.../sound.wav if( $1 =~ /(\"?)http/i ) { $line =~ s/bgsound.*src=([^>]*)>/a href=$1>$1<\/a>
/ig; } # /.../sound.wav elsif( $1 =~ /(\"?)\// ) { $line =~ s/bgsound.*src=(\"?)([^>]*)>/a href=$1http:\/\/$domain$2>$2<\/a>
/ig; } # sound.wav else { $line =~ s/bgsound.*src=(\"?)([^>]*)>/a href=$1$path\/$2>$2<\/a>
/ig; } } ######################### # SCRIPT if( $line =~ /<(\/?)script([^>]*)>/i ) { $line =~ s/<(\/?)script([^>]*)>/<$1pre$2>/ig; } ######################### # ONLOAD if( $line =~ /onload="?[^">]*"?/i ) { $line =~ s/onload=("?)([^">]*)("?)([^>]*)>/$4>onload=$1$2$3
/ig; } ######################### # ONCLOSE if( $line =~ /onclose="?[^"]*"?/i ) { $line =~ s/onclose="([^"]*)"([^>]*)>/$2>onclose="$1"
/ig; } ######################### # REMOVE COMMENTS if( $line =~ /<\!--/ ) { $line =~ s/<\!--/<!--/g; } ######################### # BAD WORDS foreach $word ( sort keys %badwords ) { $line =~ s/(([<][^>]*[>])*)\b$word\b/$1$badwords{$word}/ig; } ######################### # OBJECT if( $line =~ /