From eb71ca4352540db3fbbe68482a9f83ceb91a0810 Mon Sep 17 00:00:00 2001 From: iphelix Date: Sat, 27 Jul 2013 23:20:15 -0700 Subject: [PATCH] Implemented multiprocessing for rulegen. --- maskgen.py | 20 +--- policygen.py | 4 +- rulegen.py | 322 +++++++++++++++++++++++++++++++++------------------ 3 files changed, 213 insertions(+), 133 deletions(-) diff --git a/maskgen.py b/maskgen.py index 370e2d4..a2dfdc1 100755 --- a/maskgen.py +++ b/maskgen.py @@ -8,25 +8,7 @@ # Copyright (C) 2013 Peter Kacherginsky # All rights reserved. # -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# Please see the attached LICENSE file for additional licensing information. import sys import csv diff --git a/policygen.py b/policygen.py index e8a9666..e96d5cf 100755 --- a/policygen.py +++ b/policygen.py @@ -6,9 +6,9 @@ # VERSION 0.0.2 # # Copyright (C) 2013 Peter Kacherginsky -# All rights reserved.# +# All rights reserved. # -# Please see the attached LICENSE file for additiona licensing information. +# Please see the attached LICENSE file for additional licensing information. import sys, string, random import datetime diff --git a/rulegen.py b/rulegen.py index a88163d..0dff074 100755 --- a/rulegen.py +++ b/rulegen.py @@ -10,7 +10,7 @@ # Copyright (C) 2013 Peter Kacherginsky # All rights reserved. # -# Please see the attached LICENSE file for additiona licensing information. +# Please see the attached LICENSE file for additional licensing information. import sys import re @@ -22,6 +22,10 @@ from optparse import OptionParser, OptionGroup from collections import Counter +import subprocess + +import multiprocessing + VERSION = "0.0.3" # Testing rules with hashcat --stdout @@ -31,10 +35,7 @@ HASHCAT_PATH = "hashcat/" class RuleGen: # Initialize Rule Generator class - def __init__(self,language="en",providers="aspell,myspell",basename='analysis'): - - ####################################################################### - # Multiprocessing + def __init__(self,language="en",providers="aspell,myspell",basename='analysis',threads=4): self.enchant_broker = enchant.Broker() self.enchant_broker.set_ordering("*",providers) @@ -43,8 +44,6 @@ class RuleGen: # Output options self.basename = basename - self.output_rules_f = open("%s.rule" % basename, 'w') - self.output_words_f = open("%s.word" % basename, 'w') # Finetuning word generation self.max_word_dist = 10 @@ -194,6 +193,28 @@ class RuleGen: return matrix + def levenshtein_distance(self, s1, s2): + """Calculate the Levenshtein distance between two strings. + + This is straight from Wikipedia. + """ + if len(s1) < len(s2): + return self.levenshtein_distance(s2, s1) + if not s1: + return len(s2) + + previous_row = xrange(len(s2) + 1) + for i, c1 in enumerate(s1): + current_row = [i + 1] + for j, c2 in enumerate(s2): + insertions = previous_row[j + 1] + 1 + deletions = current_row[j] + 1 + substitutions = previous_row[j] + (c1 != c2) + current_row.append(min(insertions, deletions, substitutions)) + previous_row = current_row + + return previous_row[-1] + def levenshtein_print(self,matrix,word,password): """ Print word X password matrix """ print " %s" % " ".join(list(word)) @@ -302,7 +323,7 @@ class RuleGen: for suggestion in suggestions: - distance = enchant.utils.levenshtein(suggestion,pre_password) + distance = self.levenshtein_distance(suggestion,pre_password) word = dict() word["suggestion"] = suggestion @@ -698,67 +719,6 @@ class RuleGen: if self.debug: print "[!] Advanced Processing FAILED: %s => %s => %s (%s)" % (word," ".join(hashcat_rules),password,word_rules) return None - ############################################################################ - def print_hashcat_rules(self, words, password): - - # sorted(self.masks.keys(), key=lambda mask: self.masks[mask][sorting_mode], reverse=True): - - best_found_rule_length = 9999 - - # Sorted list based on rule length - for word in sorted(words, key=lambda word: len(word["hashcat_rules"][0])): - - for hashcat_rule in word["hashcat_rules"]: - - rule_length = len(hashcat_rule) - - if not self.more_rules: - if rule_length < best_found_rule_length: - best_found_rule_length = rule_length - - elif rule_length > best_found_rule_length: - if self.verbose: - print "[-] %s => {best rule length exceeded: %d (%d)} => %s" % \ - (word["suggestion"], rule_length, best_found_rule_length, password) - break - - if rule_length <= self.max_rule_len: - - hashcat_rule_str = " ".join(hashcat_rule + word["pre_rule"] or [':']) - if self.verbose: print "[+] %s => %s => %s" % (word["suggestion"], hashcat_rule_str, password) - - if self.hashcat: - self.verify_hashcat_rules(word["suggestion"], hashcat_rule + word["pre_rule"], password) - - # TODO: Collect statistics later - # if hashcat_rule_str in self.rule_stats: self.rule_stats[hashcat_rule_str] += 1 - # else: self.rule_stats[hashcat_rule_str] = 1 - - self.output_rules_f.write("%s\n" % hashcat_rule_str) - self.output_words_f.write("%s\n" % word["suggestion"]) - - ############################################################################ - def verify_hashcat_rules(self,word, rules, password): - import subprocess - - f = open("%s/test.rule" % HASHCAT_PATH,'w') - f.write(" ".join(rules)) - f.close() - - f = open("%s/test.word" % HASHCAT_PATH,'w') - f.write(word) - f.close() - - p = subprocess.Popen(["%s/hashcat-cli64.bin" % HASHCAT_PATH,"-r","%s/test.rule" % HASHCAT_PATH,"--stdout","%s/test.word" % HASHCAT_PATH], stdout=subprocess.PIPE) - out, err = p.communicate() - out = out.strip() - - if out == password: - hashcat_rules_str = " ".join(rules or [':']) - if self.verbose: print "[+] %s => %s => %s" % (word, hashcat_rules_str, password) - - else: - print "[!] Hashcat Verification FAILED: %s => %s => %s (%s)" % (word," ".join(rules or [':']),password,out) def check_reversible_password(self, password): """ Check whether the password is likely to be reversed successfuly. """ @@ -786,83 +746,194 @@ class RuleGen: else: return True - def analyze_password(self,password): + def analyze_password(self,password, rules_queue=multiprocessing.Queue(), words_queue=multiprocessing.Queue()): """ Analyze a single password. """ if self.verbose: print "[*] Analyzing password: %s" % password - if self.verbose: start_time = time.clock() - # Only process passwords likely to be dictionary based. - if self.check_reversible_password(password): + words = [] - # TODO: Collect statistics later - # if password in self.password_stats: self.password_stats[password] += 1 - # else: self.password_stats[password] = 1 + # Short-cut words in the dictionary + if self.enchant.check(password) and not self.word: - words = [] + word = dict() + word["password"] = password + word["suggestion"] = password + word["hashcat_rules"] = [[],] + word["pre_rule"] = [] + word["best_rule_length"] = 9999 - # Short-cut words in the dictionary - if self.enchant.check(password) and not self.word: + words.append(word) - # Record password as a source word for stats - # TODO: Collect statistics later - # if password in self.word_stats: self.word_stats[password] += 1 - # else: self.word_stats[password] = 1 + # Generate rules for words not in the dictionary + else: - word = dict() - word["password"] = password - word["suggestion"] = password - word["hashcat_rules"] = [[],] - word["pre_rule"] = [] - word["best_rule_length"] = 9999 + # Generate source words list + words = self.generate_words(password) - words.append(word) + # Generate levenshtein reverse paths for each suggestion + for word in words: - # Generate rules for words not in the dictionary - else: + # Generate a collection of hashcat_rules lists + word["hashcat_rules"] = self.generate_hashcat_rules(word["suggestion"],word["password"]) - # Generate source words list - words = self.generate_words(password) + self.print_hashcat_rules(words, password, rules_queue, words_queue) - # Generate levenshtein reverse paths for each suggestion - for word in words: + def print_hashcat_rules(self, words, password, rules_queue, words_queue): - # Generate a collection of hashcat_rules lists - word["hashcat_rules"] = self.generate_hashcat_rules(word["suggestion"],word["password"]) + best_found_rule_length = 9999 - self.print_hashcat_rules(words, password) + # Sorted list based on rule length + for word in sorted(words, key=lambda word: len(word["hashcat_rules"][0])): - if self.verbose: print "[*] Finished analysis in %.2f seconds" % (time.clock()-start_time) + words_queue.put(word["suggestion"]) + + for hashcat_rule in word["hashcat_rules"]: + + rule_length = len(hashcat_rule) + + if not self.more_rules: + if rule_length < best_found_rule_length: + best_found_rule_length = rule_length + + elif rule_length > best_found_rule_length: + if self.verbose: + print "[-] %s => {best rule length exceeded: %d (%d)} => %s" % \ + (word["suggestion"], rule_length, best_found_rule_length, password) + break + + if rule_length <= self.max_rule_len: + + hashcat_rule_str = " ".join(hashcat_rule + word["pre_rule"] or [':']) + if self.verbose: print "[+] %s => %s => %s" % (word["suggestion"], hashcat_rule_str, password) + + # Obtain a lock and write to files. + #if self.hashcat: + # self.verify_hashcat_rules(word["suggestion"], hashcat_rule + word["pre_rule"], password) + #self.output_rules_f.write("%s\n" % hashcat_rule_str) + #self.output_words_f.write("%s\n" % word["suggestion"]) + + rules_queue.put(hashcat_rule_str) + + + def password_worker(self,i, passwords_queue, rules_queue, words_queue): + print "[*] Password analysis worker [%d] started." % i + try: + while True: + password = passwords_queue.get() + + # Interrupted by a Death Pill + if password == None: break + + self.analyze_password(password, rules_queue, words_queue) + except (KeyboardInterrupt, SystemExit): + print "[*] Password analysis worker [%d] terminated." % i + + print "[*] Password analysis worker [%d] stopped." % i + + def rule_worker(self, rules_queue, output_rules_filename): + """ Worker to store generated rules. """ + + f = open(output_rules_filename, 'w') + print "[*] Rule worker started." + try: + while True: + rule = rules_queue.get() + + # Interrupted by a Death Pill + if rule == None: break + + f.write("%s\n" % rule) + f.flush() + + except (KeyboardInterrupt, SystemExit): + print "[*] Rule worker terminated." + + f.close() + print "[*] Rule worker stopped." + + def word_worker(self, words_queue, output_words_filename): + """ Worker to store generated rules. """ + + f = open(output_words_filename, 'w') + print "[*] Word worker started." + try: + while True: + word = words_queue.get() + + # Interrupted by a Death Pill + if word == None: break + + f.write("%s\n" % word) + f.flush() + + except (KeyboardInterrupt, SystemExit): + print "[*] Word worker terminated." + + f.close() + print "[*] Word worker stopped." # Analyze passwords file def analyze_passwords_file(self,passwords_file): """ Analyze provided passwords file. """ print "[*] Analyzing passwords file: %s:" % passwords_file + + # Setup queues + passwords_queue = multiprocessing.Queue(multiprocessing.cpu_count() * 100) + rules_queue = multiprocessing.Queue() + words_queue = multiprocessing.Queue() + + # Start workers + for i in range(multiprocessing.cpu_count()): + multiprocessing.Process(target=self.password_worker, args=(i, passwords_queue, rules_queue, words_queue)).start() + multiprocessing.Process(target=self.rule_worker, args=(rules_queue, "%s.rule" % self.basename)).start() + multiprocessing.Process(target=self.word_worker, args=(words_queue, "%s.word" % self.basename)).start() + + # Continue with the main thread + f = open(passwords_file,'r') password_count = 0 - analysis_start = time.clock() + analysis_start = time.time() try: for password in f: - password = password.strip() + password = password.rstrip('\r\n') if len(password) > 0: # Provide analysis time feedback to the user - if password_count != 0 and password_count % 10000 == 0: - current_analysis_time = time.clock() - analysis_start - if not self.quiet: print "[*] Processed %d passwords in %.2f seconds at the rate of %.2f p/sec" % (password_count, current_analysis_time, password_count/current_analysis_time ) - + if password_count != 0 and password_count % 5000 == 0: + current_analysis_time = time.time() - analysis_start + if not self.quiet: + print "[*] Processed %d passwords in %.2f seconds at the rate of %.2f p/sec" % \ + (password_count, current_analysis_time, password_count/current_analysis_time ) password_count += 1 - self.analyze_password(password) + + # Perform preliminary checks and add password to the queue + if self.check_reversible_password(password): + passwords_queue.put(password) except (KeyboardInterrupt, SystemExit): print "\n[!] Rulegen was interrupted." + + else: + # Signal workers to stop. + for i in range(multiprocessing.cpu_count()): + passwords_queue.put(None) + + # Wait for all of the queued passwords to finish. + while not passwords_queue.empty(): + time.sleep(1) + + # Signal writers to stop. + rules_queue.put(None) + words_queue.put(None) + f.close() - analysis_time = time.clock() - analysis_start - print "[*] Finished processing %d passwords in %.2f seconds at the rate of %.2f p/sec" % (password_count, analysis_time, float(password_count)/analysis_time ) + analysis_time = time.time() - analysis_start + print "[*] Finished processing %d passwords in %.2f seconds at the rate of %.2f p/sec" % (password_count, analysis_time, float(password_count)/analysis_time ) print "[*] Generating statistics for [%s] rules and words." % self.basename print "[-] Skipped %d all numeric passwords (%0.2f%%)" % \ @@ -872,6 +943,9 @@ class RuleGen: print "[-] Skipped %d passwords with non ascii characters (%0.2f%%)" % \ (self.foreign_stats_total, float(self.foreign_stats_total)*100.0/float(password_count)) + + # TODO: Counter breaks on large files. uniq -c | sort -rn is still the most + # optimal way. rules_file = open("%s.rule" % self.basename,'r') rules_sorted_file = open("%s-sorted.rule" % self.basename, 'w') rules_counter = Counter(rules_file) @@ -903,6 +977,28 @@ class RuleGen: words_file.close() words_sorted_file.close() + ############################################################################ + def verify_hashcat_rules(self,word, rules, password): + + f = open("%s/test.rule" % HASHCAT_PATH,'w') + f.write(" ".join(rules)) + f.close() + + f = open("%s/test.word" % HASHCAT_PATH,'w') + f.write(word) + f.close() + + p = subprocess.Popen(["%s/hashcat-cli64.bin" % HASHCAT_PATH,"-r","%s/test.rule" % HASHCAT_PATH,"--stdout","%s/test.word" % HASHCAT_PATH], stdout=subprocess.PIPE) + out, err = p.communicate() + out = out.strip() + + if out == password: + hashcat_rules_str = " ".join(rules or [':']) + if self.verbose: print "[+] %s => %s => %s" % (word, hashcat_rules_str, password) + + else: + print "[!] Hashcat Verification FAILED: %s => %s => %s (%s)" % (word," ".join(rules or [':']),password,out) + if __name__ == "__main__": header = " _ \n" @@ -921,6 +1017,7 @@ if __name__ == "__main__": parser.add_option("-b","--basename", help="Output base name. The following files will be generated: basename.words, basename.rules and basename.stats", default="analysis",metavar="rockyou") parser.add_option("-w","--wordlist", help="Use a custom wordlist for rule analysis.", metavar="wiki.dict") parser.add_option("-q", "--quiet", action="store_true", dest="quiet", default=False, help="Don't show headers.") + parser.add_option("--threads", type="int", default=10, help="Parallel threads to use for processing.") wordtune = OptionGroup(parser, "Fine tune source word generation:") wordtune.add_option("--maxworddist", help="Maximum word edit distance (Levenshtein)", type="int", default=10, metavar="10") @@ -959,7 +1056,7 @@ if __name__ == "__main__": parser.error("no passwords file specified") exit(1) - rulegen = RuleGen(language="en", providers=options.providers, basename=options.basename) + rulegen = RuleGen(language="en", providers=options.providers, basename=options.basename, threads=options.threads) # Finetuning word generation rulegen.max_word_dist=options.maxworddist @@ -994,6 +1091,7 @@ if __name__ == "__main__": print "[*] Press Ctrl-C to end execution and generate statistical analysis." # Analyze a single password or several passwords in a file - if options.password: rulegen.analyze_password(args[0]) + if options.password: + rulegen.analyze_password(args[0]) else: - rulegen.analyze_passwords_file(args[0]) \ No newline at end of file + rulegen.analyze_passwords_file(args[0])