Blame | Last modification | View Log | RSS feed
import string, re
# Mapping of letter frequencies
letter_freq = {'A':.082,'B':.015,'C':0.028,'D':.043,'E':.127,'F':.022,'G':.02,'H':.061,\
'I':.07,'J':.002,'K':.008,'L':.04,'M':.024,'N':.067,'O':.075,'P':.019,'Q':.001,\
'R':.06,'S':.063,'T':.091,'U':.028,'V':.01,'W':.023,'X':.001,'Y':.02,'Z':.001}
alphabet_length = 26 # Number of letters in the alphabet
alphabet_offset = 65 # Offset of 'A' for ASCII characters
english_IC = 0.065 # IC of english
key_lengths = 10 # Number of key lengths to test
# Encrypted message
input = 'XKJUROWMLLPXWZNPIMBVBQJCNOWXPCCHHVVFVSLLFVXHAZITYXOHULX \
QOJAXELXZXMYJAQFSTSRULHHUCDSKBXKNJQIDALLPQSLLUHIAQFPBPC \
IDSVCIHWHWEWTHBTXRLJNRSNCIHUVFFUXVOUKJLJSWMAQFVJWJSDYLJ \
OGJXDBOXAJULTUCPZMPLIWMLUBZXVOODYBAFDSKXGQFADSHXNXEHSAR \
UOJAQFPFKNDHSAAFVULLUWTAQFRUPWJRSZXGPFUTJQIYNRXNYNTWMHC'
# letter_freq = {'A':0.7,'B':0.2,'C':0.1}
# alphabet_length = 3
# alphabet_offset = 65
# english_IC = 0.54
# key_lengths = 4
# input = 'ABCBABBBAC'
def calculate_IC(str):
'''Calculates and returns the IC for a given string'''
IC = 0
for letter in letter_freq.keys():
count = str.count(letter) # count the frequency of every letter
IC += float(count * (count-1))/float(len(str) * (len(str)-1))
return IC
def get_substring(str, keylen):
'''Returns a list of substrings of an offset of mod keylen'''
ret = []
for incr in range(keylen): # Iterate through the positions within each key
substring = ''
for char in range(len(str)): # Get every X characters from the input
if char % keylen == incr:
substring += str[char]
ret.append(substring)
return ret
def char_shift(char, x):
'''Shifts a given character x amount and returns it'''
i = ord(char) - alphabet_offset
i += x
while i < 0:
i += alphabet_length
return chr(i + alphabet_offset)
if __name__ == '__main__':
# input = raw_input("Enter string: ")
# Remove all whitespace from the input string and convert the string to uppercase
input_stripped = re.sub('\s', '', input)
input_stripped = input_stripped.upper()
# Print out the sample size (length of input)
sample_size = len(input_stripped)
print "Sample Size: {0}\n".format(sample_size)
# Calculate and print out the IC with keys of length 1-9
print "Index of Coincidence (0.065 = IC of english):"
freq_avg = dict()
freq_diff = dict()
# Create a mapping of IC to key lengths
for length in range(1,key_lengths):
avg_IC = 0
substrings = get_substring(input_stripped, length) # Get a list of substrings
print "M = {0}:".format(length),
for str in substrings: # Calculate the average IC of all the substrings
IC = calculate_IC(str)
avg_IC += IC
print "{0:.2}".format(IC),
print " : {0:.2}".format(avg_IC/length)
freq_avg[avg_IC/length] = length # Store the key length and resulting IC
# Create a mapping of |IC - 0.065| to key lengths
for entry in freq_avg:
freq_diff[abs(entry - english_IC)] = freq_avg[entry]
# Sort this mapping and print out the most likely key lengths
keys = freq_diff.keys()
keys.sort()
print "\nThe most likely key lengths are:"
for entry in keys:
print "{0} : {1:.3}".format(freq_diff[entry], entry)
# Using the found keylength, calculate the most likely decrypting key
print "\nAssuming the key has a length of {0} characters".format(freq_diff[keys[0]])
key_length = freq_diff[keys[0]]
# Now we shift each substring and try to find a shift where the IC is closest to 0.065
print "The most likely key solutions are (read down on the left column):"
substrings = get_substring(input_stripped, key_length) # First divide the input string by the key length
key_final = ''
for substr in substrings:
freq_avg.clear() # First clear out both frequency dictionaries
freq_diff.clear()
# Create a mapping of IC to letters
for g in range(alphabet_length): # Iterate through each letter + shifted value
IC = 0
for i in range(alphabet_length): # Iterate through each letter
# Calculate the IC using M_g = Sum(i=0-25) : p_i * f_(i+g) / n'
IC += float(letter_freq[chr(i+alphabet_offset)] * substr.count(chr((i+g)%alphabet_length + alphabet_offset))) / float(sample_size/key_length)
freq_avg[IC] = chr((g)%alphabet_length + alphabet_offset) # Store the letter and calculated IC
# Create a mapping of |IC - 0.065| to letters
for entry in freq_avg:
freq_diff[abs(entry - english_IC)] = freq_avg[entry]
# Sort this mapping and print out the key
keys = freq_diff.keys()
keys.sort()
output = ''
key_final += freq_diff[keys[0]]
for entry in keys:
output += freq_diff[entry] + ' '
print output
for i in range(3):
print "{0} : {1:.3} ".format(freq_diff[keys[i]], keys[i])
# Decode the encrypted message using the key
key_ind = 0
output = ''
for i in range(sample_size):
output += char_shift(input_stripped[i], 0-ord(key_final[key_ind])-alphabet_offset)
key_ind += 1
key_ind %= key_length
# Print the decoded message
print "Encryption key: {0}, Decrypted message:".format(key_final)
print output