#
# [PassMonger V0.2]
#
# This script takes in a list of webpages from the files "sites"
# containing passworded links in the form of "http://name:pass@host".
# then it extracts,combines,sorts,kills dupes, and spits out a file "list.htm"
# with the list of links.
#
# it also notes the hosts that are linked from the page and
# contain "cgi-bin" in the directory part of the url, and the hosts
# referenced by tags, password links targeting those hosts are
# erased because the script (and me) assumes those are fake make-me-money
# links to sponsors.
#
# Author: Anon.
#
# There's a deeper meaning behind this, read the text.
#
# 23/1/99
# Enjoy.
$debug = 1;
use LWP::Simple;
# open sites and out files.
open (SITE_FILE,"sites") || die "Couldn't open Sites File";
open (OUT_FILE,">list.htm");
# write HTML header for outfile
print OUT_FILE "\n
\n
\n"; @current_list = get_page_passes(); push @total_site_list,@current_list; } # sort url's @total_site_list= sort @total_site_list; # kill dupes @total_site_list=kill_dupes(@total_site_list); if( $debug ) { print "complete sorted duped sitelist"; foreach(@total_site_list) { print "$_\n"; } } print OUT_FILE "
\n";
# print passwords to file by alphabet of hostname
for( $i = 'a' ; $i cmp 'aa' ; $i++ ) {
print OUT_FILE "\n
\n"; foreach( grep(/(http\:\/\/)?(.*\:.*@)(((www[\d\w]*\.?)$i)|((?!www.*\.)$i))/,@total_site_list) ) { print OUT_FILE "$_
\n";
}
}
# print out hosts not beginning with the alphabet (mostly numerical ip's)
print OUT_FILE "\n
\n"; foreach( grep(/(http\:\/\/)?(.*\:.*@)((?!www\.?)[^a-z])/,@total_site_list) ) { print OUT_FILE "$_
\n";
}
print OUT_FILE "\n\n";
print $#total_site_list . " Passes Written Out";
sub get_page_passes {
my $user_chars = "a-z0-9\.\!\#\$\%\^\&\*\(\)";
my $site_chars = "a-z0-9\-\_\&\/\.";
my @page;
my @my_passes = undef;
# chomp off url
chomp;
print "Connecting to $_...";
# use LWP to GET the file, and split it by lines into the @page list
$page=get($_);
@page = split(/\n/,$page);
# get all links in correct form
@my_passes = $page =~ m/<[\s\n\r]*a[\s\n\r]+href[\s\n\r]*=[\s\n\r]*\"?(http\:\/\/?[$user_chars]+\:[$user_chars]+\@[$site_chars]+)/isg;
print $#my_passes . " Passes Found\n";
# convert list to scalar, there's a better way to do this,
# i havn't found it yet
# get all hosts that are referenced by tags
@banner = $page =~ m/<[\s\n\r]*img[\s\n\r]+src[\s\n\r]*=[\s\n\r]*\"?(?:http:\/\/)([\d\w\-\_\.]+)/isg;
# print "!";
if ( $debug ) {
print "img tage banner sites collected:\n";
foreach(@banner) {
print "$_\n";
}
}
# get all hosts that are referenced by links and contain *cgi* in the dir
# add to @banner.
push @banner,$page =~ m/<[\s\n\r]*a\s+href[\s\n\r]*=[\s\n\r]*(?:http:\/\/)?([\d\w\-\_\.]+?)\/[\w\_\-]*cgi[\w\_\-]*\/.+?>/isg;
# good thing i have a blacklist, but i need to make sure there are no dupes
@banner=kill_dupes(sort @banner);
if ( $debug == 1) {
print "final blacklist:\n";
foreach(@banner) {
print "$_\n";
}
}
# i go through all the passes in this page, and kill off dupes by comparing to
# entries of blacklist.
foreach (@banner) {
$banner=$_;
for($i=0;$i<$#my_passes;$i++){
if ($my_passes[$i] =~ /(http\:\/\/)?(.*\:.*@)($banner)/i) {
print "Deleted Probable Fake Link:\n";
print "\t$my_passes[$i]\n";
splice @my_passes,$i,1;
}
}
}
return @my_passes;
}
sub kill_dupes {
my $i;
my (@total_site_list) = @_;
# just an easy func. if entry == entry+1, splice off next entry and reloop
for($i=0;$i<$#total_site_list;){
if( $total_site_list[$i] eq $total_site_list[$i+1] ) {
if ( $debug ) {
print "Dupe:\n$total_site_list[$i]\n$total_site_list[$i+1]\n"
}
splice @total_site_list,$i+1,1;
}
else {
$i++;
}
}
return @total_site_list;
}