From eb71ca4352540db3fbbe68482a9f83ceb91a0810 Mon Sep 17 00:00:00 2001
From: iphelix <iphelix@debian.(none)>
Date: Sat, 27 Jul 2013 23:20:15 -0700
Subject: [PATCH] Implemented multiprocessing for rulegen.

---
 maskgen.py   |  20 +---
 policygen.py |   4 +-
 rulegen.py   | 322 +++++++++++++++++++++++++++++++++------------------
 3 files changed, 213 insertions(+), 133 deletions(-)

diff --git a/maskgen.py b/maskgen.py
index 370e2d4..a2dfdc1 100755
--- a/maskgen.py
+++ b/maskgen.py
@@ -8,25 +8,7 @@
 # Copyright (C) 2013 Peter Kacherginsky
 # All rights reserved.
 #
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met: 
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer. 
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution. 
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# Please see the attached LICENSE file for additional licensing information.
 
 import sys
 import csv
diff --git a/policygen.py b/policygen.py
index e8a9666..e96d5cf 100755
--- a/policygen.py
+++ b/policygen.py
@@ -6,9 +6,9 @@
 # VERSION 0.0.2
 #
 # Copyright (C) 2013 Peter Kacherginsky
-# All rights reserved.#
+# All rights reserved.
 #
-# Please see the attached LICENSE file for additiona licensing information.
+# Please see the attached LICENSE file for additional licensing information.
 
 import sys, string, random
 import datetime
diff --git a/rulegen.py b/rulegen.py
index a88163d..0dff074 100755
--- a/rulegen.py
+++ b/rulegen.py
@@ -10,7 +10,7 @@
 # Copyright (C) 2013 Peter Kacherginsky
 # All rights reserved.
 #
-# Please see the attached LICENSE file for additiona licensing information.
+# Please see the attached LICENSE file for additional licensing information.
 
 import sys
 import re
@@ -22,6 +22,10 @@ from optparse import OptionParser, OptionGroup
 
 from collections import Counter
 
+import subprocess
+
+import multiprocessing
+
 VERSION = "0.0.3"
 
 # Testing rules with hashcat --stdout
@@ -31,10 +35,7 @@ HASHCAT_PATH = "hashcat/"
 class RuleGen:
 
     # Initialize Rule Generator class
-    def __init__(self,language="en",providers="aspell,myspell",basename='analysis'):
-
-        #######################################################################
-        # Multiprocessing
+    def __init__(self,language="en",providers="aspell,myspell",basename='analysis',threads=4):
 
         self.enchant_broker = enchant.Broker()
         self.enchant_broker.set_ordering("*",providers)
@@ -43,8 +44,6 @@ class RuleGen:
 
         # Output options
         self.basename = basename
-        self.output_rules_f = open("%s.rule" % basename, 'w')
-        self.output_words_f = open("%s.word" % basename, 'w')
 
         # Finetuning word generation
         self.max_word_dist = 10
@@ -194,6 +193,28 @@ class RuleGen:
 
         return matrix
 
+    def levenshtein_distance(self, s1, s2):
+        """Calculate the Levenshtein distance between two strings.
+
+        This is straight from Wikipedia.
+        """
+        if len(s1) < len(s2):
+            return self.levenshtein_distance(s2, s1)
+        if not s1:
+            return len(s2)
+     
+        previous_row = xrange(len(s2) + 1)
+        for i, c1 in enumerate(s1):
+            current_row = [i + 1]
+            for j, c2 in enumerate(s2):
+                insertions = previous_row[j + 1] + 1
+                deletions = current_row[j] + 1
+                substitutions = previous_row[j] + (c1 != c2)
+                current_row.append(min(insertions, deletions, substitutions))
+            previous_row = current_row
+     
+        return previous_row[-1]
+
     def levenshtein_print(self,matrix,word,password):
         """ Print word X password matrix """
         print "      %s" % "  ".join(list(word))
@@ -302,7 +323,7 @@ class RuleGen:
 
             for suggestion in suggestions:
 
-                distance = enchant.utils.levenshtein(suggestion,pre_password)
+                distance = self.levenshtein_distance(suggestion,pre_password)
 
                 word = dict()
                 word["suggestion"] = suggestion
@@ -698,67 +719,6 @@ class RuleGen:
             if self.debug: print "[!] Advanced Processing FAILED: %s => %s => %s (%s)" % (word," ".join(hashcat_rules),password,word_rules)
             return None
 
-    ############################################################################
-    def print_hashcat_rules(self, words, password):
-
-        # sorted(self.masks.keys(), key=lambda mask: self.masks[mask][sorting_mode], reverse=True):
-
-        best_found_rule_length = 9999
-
-        # Sorted list based on rule length
-        for word in sorted(words, key=lambda word: len(word["hashcat_rules"][0])):
-
-            for hashcat_rule in word["hashcat_rules"]:
-
-                rule_length = len(hashcat_rule)
-
-                if not self.more_rules:
-                    if rule_length < best_found_rule_length:
-                        best_found_rule_length = rule_length
-
-                    elif rule_length > best_found_rule_length:
-                        if self.verbose: 
-                            print "[-] %s => {best rule length exceeded: %d (%d)} => %s" % \
-                            (word["suggestion"], rule_length, best_found_rule_length, password)
-                        break
-
-                if rule_length <= self.max_rule_len:
-
-                    hashcat_rule_str = " ".join(hashcat_rule + word["pre_rule"] or [':'])
-                    if self.verbose: print "[+] %s => %s => %s" % (word["suggestion"], hashcat_rule_str, password)
-
-                    if self.hashcat:
-                        self.verify_hashcat_rules(word["suggestion"], hashcat_rule + word["pre_rule"], password)
-
-                    # TODO: Collect statistics later                        
-                    # if hashcat_rule_str in self.rule_stats: self.rule_stats[hashcat_rule_str] += 1
-                    # else: self.rule_stats[hashcat_rule_str] = 1
-
-                    self.output_rules_f.write("%s\n" % hashcat_rule_str)
-                    self.output_words_f.write("%s\n" % word["suggestion"])
-
-    ############################################################################
-    def verify_hashcat_rules(self,word, rules, password):
-        import subprocess
-
-        f = open("%s/test.rule" % HASHCAT_PATH,'w')
-        f.write(" ".join(rules))
-        f.close()
-
-        f = open("%s/test.word" % HASHCAT_PATH,'w')
-        f.write(word)
-        f.close()
-
-        p = subprocess.Popen(["%s/hashcat-cli64.bin" % HASHCAT_PATH,"-r","%s/test.rule" % HASHCAT_PATH,"--stdout","%s/test.word" % HASHCAT_PATH], stdout=subprocess.PIPE)
-        out, err = p.communicate()
-        out = out.strip()
-
-        if out == password:
-            hashcat_rules_str = " ".join(rules or [':'])
-            if self.verbose: print "[+] %s => %s => %s" % (word, hashcat_rules_str, password)
-
-        else:
-            print "[!] Hashcat Verification FAILED: %s => %s => %s (%s)" % (word," ".join(rules or [':']),password,out)
 
     def check_reversible_password(self, password):
         """ Check whether the password is likely to be reversed successfuly. """
@@ -786,83 +746,194 @@ class RuleGen:
         else:
             return True
 
-    def analyze_password(self,password):
+    def analyze_password(self,password, rules_queue=multiprocessing.Queue(), words_queue=multiprocessing.Queue()):
         """ Analyze a single password. """
 
         if self.verbose: print "[*] Analyzing password: %s" % password
-        if self.verbose: start_time = time.clock()
 
-        # Only process passwords likely to be dictionary based.
-        if self.check_reversible_password(password):
+        words = []
 
-            # TODO: Collect statistics later
-            # if password in self.password_stats: self.password_stats[password] += 1
-            # else: self.password_stats[password] = 1
+        # Short-cut words in the dictionary
+        if self.enchant.check(password) and not self.word:
 
-            words = []
+            word = dict()
+            word["password"] = password
+            word["suggestion"] = password
+            word["hashcat_rules"] = [[],]
+            word["pre_rule"] = []
+            word["best_rule_length"] = 9999
 
-            # Short-cut words in the dictionary
-            if self.enchant.check(password) and not self.word:
+            words.append(word)
 
-                # Record password as a source word for stats
-                # TODO: Collect statistics later
-                # if password in self.word_stats: self.word_stats[password] += 1
-                # else: self.word_stats[password] = 1
+        # Generate rules for words not in the dictionary
+        else:
 
-                word = dict()
-                word["password"] = password
-                word["suggestion"] = password
-                word["hashcat_rules"] = [[],]
-                word["pre_rule"] = []
-                word["best_rule_length"] = 9999
+            # Generate source words list
+            words = self.generate_words(password)
 
-                words.append(word)
+            # Generate levenshtein reverse paths for each suggestion
+            for word in words:
 
-            # Generate rules for words not in the dictionary
-            else:
+                # Generate a collection of hashcat_rules lists
+                word["hashcat_rules"] = self.generate_hashcat_rules(word["suggestion"],word["password"])
 
-                # Generate source words list
-                words = self.generate_words(password)
+        self.print_hashcat_rules(words, password, rules_queue, words_queue)
 
-                # Generate levenshtein reverse paths for each suggestion
-                for word in words:
+    def print_hashcat_rules(self, words, password, rules_queue, words_queue):
 
-                    # Generate a collection of hashcat_rules lists
-                    word["hashcat_rules"] = self.generate_hashcat_rules(word["suggestion"],word["password"])
+        best_found_rule_length = 9999
 
-            self.print_hashcat_rules(words, password)
+        # Sorted list based on rule length
+        for word in sorted(words, key=lambda word: len(word["hashcat_rules"][0])):
 
-        if self.verbose: print "[*] Finished analysis in %.2f seconds" % (time.clock()-start_time)
+            words_queue.put(word["suggestion"])
+
+            for hashcat_rule in word["hashcat_rules"]:
+
+                rule_length = len(hashcat_rule)
+
+                if not self.more_rules:
+                    if rule_length < best_found_rule_length:
+                        best_found_rule_length = rule_length
+
+                    elif rule_length > best_found_rule_length:
+                        if self.verbose: 
+                            print "[-] %s => {best rule length exceeded: %d (%d)} => %s" % \
+                            (word["suggestion"], rule_length, best_found_rule_length, password)
+                        break
+
+                if rule_length <= self.max_rule_len:
+
+                    hashcat_rule_str = " ".join(hashcat_rule + word["pre_rule"] or [':'])
+                    if self.verbose: print "[+] %s => %s => %s" % (word["suggestion"], hashcat_rule_str, password)
+
+                    # Obtain a lock and write to files.
+                    #if self.hashcat:
+                    #    self.verify_hashcat_rules(word["suggestion"], hashcat_rule + word["pre_rule"], password)
+                    #self.output_rules_f.write("%s\n" % hashcat_rule_str)
+                    #self.output_words_f.write("%s\n" % word["suggestion"])
+
+                    rules_queue.put(hashcat_rule_str)
+                    
+
+    def password_worker(self,i, passwords_queue, rules_queue, words_queue):
+        print "[*] Password analysis worker [%d] started." % i
+        try:
+            while True:
+                password = passwords_queue.get()
+
+                # Interrupted by a Death Pill
+                if password == None: break
+
+                self.analyze_password(password, rules_queue, words_queue)
+        except (KeyboardInterrupt, SystemExit):
+            print "[*] Password analysis worker [%d] terminated." % i
+
+        print "[*] Password analysis worker [%d] stopped." % i
+
+    def rule_worker(self, rules_queue, output_rules_filename):
+        """ Worker to store generated rules. """
+
+        f = open(output_rules_filename, 'w')
+        print "[*] Rule worker started."
+        try:
+            while True:
+                rule = rules_queue.get()
+
+                # Interrupted by a Death Pill
+                if rule == None: break
+
+                f.write("%s\n" % rule)
+                f.flush()
+
+        except (KeyboardInterrupt, SystemExit):
+            print "[*] Rule worker terminated."
+
+        f.close()
+        print "[*] Rule worker stopped."
+
+    def word_worker(self, words_queue, output_words_filename):
+        """ Worker to store generated rules. """
+
+        f = open(output_words_filename, 'w')
+        print "[*] Word worker started."
+        try:
+            while True:
+                word = words_queue.get()
+
+                # Interrupted by a Death Pill
+                if word == None: break
+
+                f.write("%s\n" % word)
+                f.flush()
+
+        except (KeyboardInterrupt, SystemExit):
+            print "[*] Word worker terminated."
+
+        f.close()
+        print "[*] Word worker stopped."
 
     # Analyze passwords file
     def analyze_passwords_file(self,passwords_file):
         """ Analyze provided passwords file. """
 
         print "[*] Analyzing passwords file: %s:" % passwords_file
+
+        # Setup queues
+        passwords_queue = multiprocessing.Queue(multiprocessing.cpu_count() * 100)
+        rules_queue = multiprocessing.Queue()
+        words_queue = multiprocessing.Queue()
+
+        # Start workers
+        for i in range(multiprocessing.cpu_count()):
+            multiprocessing.Process(target=self.password_worker, args=(i, passwords_queue, rules_queue, words_queue)).start()
+        multiprocessing.Process(target=self.rule_worker, args=(rules_queue, "%s.rule" % self.basename)).start()
+        multiprocessing.Process(target=self.word_worker, args=(words_queue, "%s.word" % self.basename)).start()
+
+        # Continue with the main thread
+
         f = open(passwords_file,'r')
 
         password_count = 0
-        analysis_start = time.clock()
+        analysis_start = time.time()
         try:        
             for password in f:
-                password = password.strip()
+                password = password.rstrip('\r\n')
                 if len(password) > 0:
 
                     # Provide analysis time feedback to the user
-                    if password_count != 0 and password_count % 10000 == 0:
-                        current_analysis_time = time.clock() - analysis_start
-                        if not self.quiet: print "[*] Processed %d passwords in %.2f seconds at the rate of %.2f p/sec" % (password_count, current_analysis_time, password_count/current_analysis_time )
-
+                    if password_count != 0 and password_count % 5000 == 0:
+                        current_analysis_time = time.time() - analysis_start
+                        if not self.quiet: 
+                            print "[*] Processed %d passwords in %.2f seconds at the rate of %.2f p/sec" % \
+                            (password_count, current_analysis_time, password_count/current_analysis_time )
                     password_count += 1
-                    self.analyze_password(password)
+
+                    # Perform preliminary checks and add password to the queue
+                    if self.check_reversible_password(password):
+                        passwords_queue.put(password)
 
         except (KeyboardInterrupt, SystemExit):
             print "\n[!] Rulegen was interrupted."
+
+        else:
+            # Signal workers to stop.
+            for i in range(multiprocessing.cpu_count()):
+                passwords_queue.put(None) 
+
+           # Wait for all of the queued passwords to finish.
+            while not passwords_queue.empty():
+                time.sleep(1)
+
+            # Signal writers to stop.
+            rules_queue.put(None)
+            words_queue.put(None)
+
         f.close()
 
-        analysis_time = time.clock() - analysis_start
-        print "[*] Finished processing %d passwords in %.2f seconds at the rate of %.2f p/sec" % (password_count, analysis_time, float(password_count)/analysis_time )
 
+        analysis_time = time.time() - analysis_start
+        print "[*] Finished processing %d passwords in %.2f seconds at the rate of %.2f p/sec" % (password_count, analysis_time, float(password_count)/analysis_time )
 
         print "[*] Generating statistics for [%s] rules and words." % self.basename
         print "[-] Skipped %d all numeric passwords (%0.2f%%)" % \
@@ -872,6 +943,9 @@ class RuleGen:
         print "[-] Skipped %d passwords with non ascii characters (%0.2f%%)" % \
                     (self.foreign_stats_total, float(self.foreign_stats_total)*100.0/float(password_count))
 
+
+        # TODO: Counter breaks on large files. uniq -c | sort -rn is still the most 
+        #       optimal way.
         rules_file = open("%s.rule" % self.basename,'r')
         rules_sorted_file = open("%s-sorted.rule" % self.basename, 'w')
         rules_counter = Counter(rules_file)
@@ -903,6 +977,28 @@ class RuleGen:
         words_file.close()
         words_sorted_file.close()
 
+    ############################################################################
+    def verify_hashcat_rules(self,word, rules, password):
+
+        f = open("%s/test.rule" % HASHCAT_PATH,'w')
+        f.write(" ".join(rules))
+        f.close()
+
+        f = open("%s/test.word" % HASHCAT_PATH,'w')
+        f.write(word)
+        f.close()
+
+        p = subprocess.Popen(["%s/hashcat-cli64.bin" % HASHCAT_PATH,"-r","%s/test.rule" % HASHCAT_PATH,"--stdout","%s/test.word" % HASHCAT_PATH], stdout=subprocess.PIPE)
+        out, err = p.communicate()
+        out = out.strip()
+
+        if out == password:
+            hashcat_rules_str = " ".join(rules or [':'])
+            if self.verbose: print "[+] %s => %s => %s" % (word, hashcat_rules_str, password)
+
+        else:
+            print "[!] Hashcat Verification FAILED: %s => %s => %s (%s)" % (word," ".join(rules or [':']),password,out)
+
 if __name__ == "__main__":
 
     header  = "                       _ \n"
@@ -921,6 +1017,7 @@ if __name__ == "__main__":
     parser.add_option("-b","--basename", help="Output base name. The following files will be generated: basename.words, basename.rules and basename.stats", default="analysis",metavar="rockyou")
     parser.add_option("-w","--wordlist", help="Use a custom wordlist for rule analysis.", metavar="wiki.dict")
     parser.add_option("-q", "--quiet", action="store_true", dest="quiet", default=False, help="Don't show headers.")
+    parser.add_option("--threads", type="int", default=10, help="Parallel threads to use for processing.")
 
     wordtune = OptionGroup(parser, "Fine tune source word generation:")
     wordtune.add_option("--maxworddist", help="Maximum word edit distance (Levenshtein)", type="int", default=10, metavar="10")
@@ -959,7 +1056,7 @@ if __name__ == "__main__":
         parser.error("no passwords file specified")
         exit(1)
 
-    rulegen = RuleGen(language="en", providers=options.providers, basename=options.basename)
+    rulegen = RuleGen(language="en", providers=options.providers, basename=options.basename, threads=options.threads)
 
     # Finetuning word generation
     rulegen.max_word_dist=options.maxworddist
@@ -994,6 +1091,7 @@ if __name__ == "__main__":
         print "[*] Press Ctrl-C to end execution and generate statistical analysis."
 
     # Analyze a single password or several passwords in a file
-    if options.password: rulegen.analyze_password(args[0])
+    if options.password: 
+        rulegen.analyze_password(args[0])
     else: 
-        rulegen.analyze_passwords_file(args[0])
\ No newline at end of file
+        rulegen.analyze_passwords_file(args[0])