Subversion Repositories Code-Repo

Rev

Blame | Last modification | View Log | RSS feed

import string, re

# Mapping of letter frequencies
letter_freq = {'A':.082,'B':.015,'C':0.028,'D':.043,'E':.127,'F':.022,'G':.02,'H':.061,\
                'I':.07,'J':.002,'K':.008,'L':.04,'M':.024,'N':.067,'O':.075,'P':.019,'Q':.001,\
                'R':.06,'S':.063,'T':.091,'U':.028,'V':.01,'W':.023,'X':.001,'Y':.02,'Z':.001}
alphabet_length = 26 # Number of letters in the alphabet
alphabet_offset = 65 # Offset of 'A' for ASCII characters
english_IC = 0.065 # IC of english
key_lengths = 10 # Number of key lengths to test
# Encrypted message
input = 'XKJUROWMLLPXWZNPIMBVBQJCNOWXPCCHHVVFVSLLFVXHAZITYXOHULX \
        QOJAXELXZXMYJAQFSTSRULHHUCDSKBXKNJQIDALLPQSLLUHIAQFPBPC \
        IDSVCIHWHWEWTHBTXRLJNRSNCIHUVFFUXVOUKJLJSWMAQFVJWJSDYLJ \
        OGJXDBOXAJULTUCPZMPLIWMLUBZXVOODYBAFDSKXGQFADSHXNXEHSAR \
        UOJAQFPFKNDHSAAFVULLUWTAQFRUPWJRSZXGPFUTJQIYNRXNYNTWMHC'

# letter_freq = {'A':0.7,'B':0.2,'C':0.1}
# alphabet_length = 3
# alphabet_offset = 65
# english_IC = 0.54
# key_lengths = 4
# input = 'ABCBABBBAC'

def calculate_IC(str):
        '''Calculates and returns the IC for a given string'''
        IC = 0
        for letter in letter_freq.keys():
                count = str.count(letter)       # count the frequency of every letter
                IC += float(count * (count-1))/float(len(str) * (len(str)-1))
        return IC

def get_substring(str, keylen):
        '''Returns a list of substrings of an offset of mod keylen'''
        ret = []
        for incr in range(keylen): # Iterate through the positions within each key
                substring = ''
                for char in range(len(str)): # Get every X characters from the input
                        if char % keylen == incr:
                                substring += str[char]
                ret.append(substring)
        return ret

def char_shift(char, x):
        '''Shifts a given character x amount and returns it'''
        i = ord(char) - alphabet_offset
        i += x
        while i < 0:
                i += alphabet_length
        return chr(i + alphabet_offset)

if __name__ == '__main__':
        # input = raw_input("Enter string: ")

        # Remove all whitespace from the input string and convert the string to uppercase
        input_stripped = re.sub('\s', '', input)
        input_stripped = input_stripped.upper()

        # Print out the sample size (length of input)
        sample_size = len(input_stripped)
        print "Sample Size: {0}\n".format(sample_size)

        # Calculate and print out the IC with keys of length 1-9
        print "Index of Coincidence (0.065 = IC of english):"
        freq_avg = dict()
        freq_diff = dict()

        # Create a mapping of IC to key lengths
        for length in range(1,key_lengths):
                avg_IC = 0
                substrings = get_substring(input_stripped, length) # Get a list of substrings
                print "M = {0}:".format(length),
                for str in substrings: # Calculate the average IC of all the substrings
                        IC = calculate_IC(str)
                        avg_IC += IC
                        print "{0:.2}".format(IC),
                print " : {0:.2}".format(avg_IC/length)
                freq_avg[avg_IC/length] = length # Store the key length and resulting IC

        # Create a mapping of |IC - 0.065| to key lengths
        for entry in freq_avg:
                freq_diff[abs(entry - english_IC)] = freq_avg[entry]

        # Sort this mapping and print out the most likely key lengths
        keys = freq_diff.keys()
        keys.sort()
        print "\nThe most likely key lengths are:"
        for entry in keys:
                print "{0} : {1:.3}".format(freq_diff[entry], entry)

        # Using the found keylength, calculate the most likely decrypting key
        print "\nAssuming the key has a length of {0} characters".format(freq_diff[keys[0]])
        key_length = freq_diff[keys[0]]

        # Now we shift each substring and try to find a shift where the IC is closest to 0.065
        print "The most likely key solutions are (read down on the left column):"
        substrings = get_substring(input_stripped, key_length) # First divide the input string by the key length
        key_final = ''

        for substr in substrings:
                freq_avg.clear() # First clear out both frequency dictionaries
                freq_diff.clear()

                # Create a mapping of IC to letters
                for g in range(alphabet_length): # Iterate through each letter + shifted value
                        IC = 0
                        for i in range(alphabet_length): # Iterate through each letter
                                # Calculate the IC using M_g = Sum(i=0-25) : p_i * f_(i+g) / n'
                                IC += float(letter_freq[chr(i+alphabet_offset)] * substr.count(chr((i+g)%alphabet_length + alphabet_offset))) / float(sample_size/key_length)
                        freq_avg[IC] = chr((g)%alphabet_length + alphabet_offset) # Store the letter and calculated IC

                # Create a mapping of |IC - 0.065| to letters
                for entry in freq_avg:
                        freq_diff[abs(entry - english_IC)] = freq_avg[entry]

                # Sort this mapping and print out the key
                keys = freq_diff.keys()
                keys.sort()
                output = ''
                key_final += freq_diff[keys[0]]
                for entry in keys:
                        output += freq_diff[entry] + ' '
                print output
                for i in range(3):
                        print "{0} : {1:.3}  ".format(freq_diff[keys[i]], keys[i])
                print

        # Decode the encrypted message using the key
        key_ind = 0
        output = ''
        for i in range(sample_size):
                output += char_shift(input_stripped[i], 0-ord(key_final[key_ind])-alphabet_offset)
                key_ind += 1
                key_ind %= key_length

        # Print the decoded message
        print "Encryption key: {0}, Decrypted message:".format(key_final)
        print output