#!/usr/bin/perl
# Author: Gavin Hanover (gavin@subnets.org)
# Date: 9/28/2001
# Purpose: make urls worksafe
#
# if( msg =~ /complaint/ ) { me{'response'} = "it's beta"; }
use LWP::UserAgent;
# declare crap
my( $UA, $req, $url, $domain, $path, $data, @data, $max, @pairs );
my( %badwords, $word );
$badwords{'fuck'} = 'fudge';
$badwords{'fucker'} = 'fudger';
$badwords{'ass'} = 'bum';
$badwords{'asshole'} = 'bumhole';
$badwords{'shit'} = 'poop';
$badwords{'dick'} = 'penis';
$badwords{'pussy'} = 'vagina';
$badwords{'gay'} = 'homosexual';
$badwords{'porn'} = 'adult material';
$badwords{'porno'} = 'adult material';
$badwords{'cock'} = 'penis';
$badwords{'bitch'} = 'female dog';
# get environment (we want a url, dammit!)
@pairs = split(/&/, $ENV{'QUERY_STRING'});
# forms just *have* to be difficult..
foreach $pair (@pairs)
{
local($name, $value) = split(/=/, $pair);
$name =~ tr/+/ /;
$name =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$name =~ tr/\0//d;
$value =~ tr/+/ /;
$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$value =~ tr/\0//d;
if( $name =~ "url" )
{ $url = $value; }
}
# url == full url (ie - http://www.site.com/path/page.html)
# path == path part (ie - http://www.site.com/path)
# domain == domain part (ie - www.site.com)
@data = split("/", $url);
$domain = $data[2];
# start with this
$path = "http://$domain";
# how many parts are there that we need?
if( $url =~ /html$/ )
{ $max = $#data; }
else
{ $max = $#data + 1; }
for( $i = 3 ; $i < $max ; $i += 1 )
{ $path .= "/" . $data[$i]; }
# make this an html page
print "Content-type: text/html\n\n";
# if we got nothin, give em something to submit to
if( $url =~ /^$/ )
{
print "
\n";
print "\n";
print "source
\n";
print "todo\n";
print "";
exit;
}
print "$url
\n\n\n";
# get the page
$UA ||= LWP::UserAgent->new(timeout => 15) or exit;
$req = $UA->get("$url") or exit;
if( !$req->is_success )
{
print "error loading page";
}
#exit unless ($req->is_success);
$data = $req->content;
@data = split("\n", $data);
# parse me, baby
foreach $line ( @data )
{
chomp $line;
##############################
# A HREF so we don't break links
#(do this first so we don't match the href's we create later)
if( $line =~ /a.*href=([^>]*)>/i )
{
# http://...
if( $1 =~ /(\"?)http(.*)/i )
{
# nothing needs to be done
}
# /...
if( $1 =~ /(\"?)\/(.*)/ )
{
$line =~ s/]*)>//ig;
}
# ...
else
{
$line =~ s/]*)>//ig;
}
}
##############################
# IMG
if( $line =~ /img.*src=([^>]*)>.*/i )
{
# http://.../image.jpg
if( $1 =~ /(\"?)http(.*)/i )
{
$line =~ s/img.*src=([^>]*)>/a href=$1>$1<\/a>
/ig;
}
# /.../image.jpg
elsif( $1 =~ /(\"?)\/(.*)/ )
{
$line =~ s/img.*src=(\"?)([^>]*)>/a href=$1http:\/\/$domain$2>$2<\/a>
/ig;
}
# image.jpg
else
{
$line =~ s/img.*src=(\"?)([^>]*)>/a href=$1$path\/$2>$2<\/a>
/ig;
}
}
######################
# BGSOUND
if( $line =~ /bgsound.*src=([^>]*)>/i )
{
# http://.../sound.wav
if( $1 =~ /(\"?)http/i )
{
$line =~ s/bgsound.*src=([^>]*)>/a href=$1>$1<\/a>
/ig;
}
# /.../sound.wav
elsif( $1 =~ /(\"?)\// )
{
$line =~ s/bgsound.*src=(\"?)([^>]*)>/a href=$1http:\/\/$domain$2>$2<\/a>
/ig;
}
# sound.wav
else
{
$line =~ s/bgsound.*src=(\"?)([^>]*)>/a href=$1$path\/$2>$2<\/a>
/ig;
}
}
#########################
# SCRIPT
if( $line =~ /<(\/?)script([^>]*)>/i )
{
$line =~ s/<(\/?)script([^>]*)>/<$1pre$2>/ig;
}
#########################
# ONLOAD
if( $line =~ /onload="?[^">]*"?/i )
{
$line =~ s/onload=("?)([^">]*)("?)([^>]*)>/$4>onload=$1$2$3
/ig;
}
#########################
# ONCLOSE
if( $line =~ /onclose="?[^"]*"?/i )
{
$line =~ s/onclose="([^"]*)"([^>]*)>/$2>onclose="$1"
/ig;
}
#########################
# REMOVE COMMENTS
if( $line =~ /<\!--/ )
{
$line =~ s/<\!--/<!--/g;
}
#########################
# BAD WORDS
foreach $word ( sort keys %badwords )
{
$line =~ s/(([<][^>]*[>])*)\b$word\b/$1$badwords{$word}/ig;
}
#########################
# OBJECT
if( $line =~ /