### wordlistgenerator.py by blerbl import re, sys, os, urllib #### custom useragent class AppURLopener(urllib.FancyURLopener): version = "Mozilla/5.0(compatable;MSIE 9.0; Windows NT 6.1; Trident/5.0)" urllib._urlopener = AppURLopener() uopen = urllib.urlopen uencode = urllib.urlencode ############################################################### ### ### Helper Function ### def ls(file): print(open(file,'rb').read()) def google(query,numget=10,verbose=0): numget = int(numget) start = 0 results = [] if verbose == 2: print "[+]Getting " + str(numget) + " results" while len(results) < numget: print "[+]"+str(len(results)) + " so far..." data = uopen("https://www.google.com/search?q="+query+"&start="+str(start)) if data.code != 200: print "Error " + str(data.code) break results.extend(re.findall(")"," ",data) # keep comments as normal text data = re.sub("]+>"," ",data) # remove the html tags data = re.sub("\r|\n"," ",data) # make it a strait file ### ### Add the new words ### allwords = word_reg.findall(data) allquotes = quote_reg.findall(data) for quote in allquotes: allwords.append(quote) allwords.append(quote.replace(" ","")) #flw = '' #for each in quote.split(' '): # if len(each) > 0: flw += each[0] #if flw: allwords.append(flw) for word in allwords: ### ### Mangle ### if( word.endswith('.') or word.endswith(',') or word.endswith('!') or word.endswith('?') or word.endswith(';') or word.endswith('"') or word.endswith('\'')): allwords.append(word.strip('.,!?;"\'')) if re.match("\A.*\.(jpg|png|txt|com|html)\Z",word): allwords.append(word.rsplit('.',1)[0]) ### ### Add ### if not word in words: words.append(word) total_wa = len(words) total_s = len(targetlist) words.sort() of = open(outfile,'w') for word in words: of.write(word+"\n") of.close() if verbose: print "[+]Complete!" print "[+]"+ str(total_wa) + " words in the list." if append: print "[+]"+str(total_wa - total_wb)+" are new." print "[+]Collected from " + str(total_s - dircount) + " sources." if __name__ == "__main__": ### ### User input ### verbose = 2 minlen = 6 maxlen = None find_quotes = True wordrules = ["A-z","A-z0-9","A-z0-9*-.!$#@%"] wordrule = None while not wordrule: print "Select a word rule:" for i,rule in enumerate(wordrules): print str(i + 1) + " -- " + wordrules[i] print str(i+2) + " Custom (WARNING: ADVANCED!! not validation)" que = raw_input("Rule[1-"+str(i+2)+"]:") try: que = int(que.strip()) except: que = -1 if que == i+2: wordrule = raw_input("Wordrule:").strip() elif que < 1 or que > i+2: print "Not a valid selection" else: wordrule = wordrules[i-1] if not minlen: minlen = 3 outfile = raw_input("Filename:") if os.path.exists(outfile) and not outfile.startswith("+"): que = raw_input("[?]This file exists! Overwrite[y|N]:") if not 'y' in que.lower(): exit(0) targetlist = raw_input("Input target list, separate by ';' no space or quote\n"+ "Use %g% to use google query sites\n"+ "Targets:") targetlist = targetlist.split(';') for target in targetlist: if re.match("%g[^%]+%[0-9]+",target): if verbose == 2: print "[+]Google sources: " + target[2:].split('%')[0] new_targets = google(target[2:].split("%")[0],target[2:].split("%")[1],verbose) targetlist.remove(target) targetlist.extend(new_targets) if verbose == 2: print "[+]Gathering data from the following targets:" for target in targetlist: print "[+]"+target print "=============================================" ### ### Prepare and call ### word_reg = re.compile("(["+wordrule+"]{"+str(minlen)+","+str(maxlen)+"})") genWordlist(targetlist,word_reg,outfile,verbose)