| 140 |
Kevin |
1 |
import string, re
|
|
|
2 |
|
|
|
3 |
# Mapping of letter frequencies
|
|
|
4 |
letter_freq = {'A':.082,'B':.015,'C':0.028,'D':.043,'E':.127,'F':.022,'G':.02,'H':.061,\
|
|
|
5 |
'I':.07,'J':.002,'K':.008,'L':.04,'M':.024,'N':.067,'O':.075,'P':.019,'Q':.001,\
|
|
|
6 |
'R':.06,'S':.063,'T':.091,'U':.028,'V':.01,'W':.023,'X':.001,'Y':.02,'Z':.001}
|
|
|
7 |
alphabet_length = 26 # Number of letters in the alphabet
|
|
|
8 |
alphabet_offset = 65 # Offset of 'A' for ASCII characters
|
|
|
9 |
english_IC = 0.065 # IC of english
|
|
|
10 |
key_lengths = 10 # Number of key lengths to test
|
|
|
11 |
# Encrypted message
|
|
|
12 |
input = 'XKJUROWMLLPXWZNPIMBVBQJCNOWXPCCHHVVFVSLLFVXHAZITYXOHULX \
|
|
|
13 |
QOJAXELXZXMYJAQFSTSRULHHUCDSKBXKNJQIDALLPQSLLUHIAQFPBPC \
|
|
|
14 |
IDSVCIHWHWEWTHBTXRLJNRSNCIHUVFFUXVOUKJLJSWMAQFVJWJSDYLJ \
|
|
|
15 |
OGJXDBOXAJULTUCPZMPLIWMLUBZXVOODYBAFDSKXGQFADSHXNXEHSAR \
|
|
|
16 |
UOJAQFPFKNDHSAAFVULLUWTAQFRUPWJRSZXGPFUTJQIYNRXNYNTWMHC'
|
|
|
17 |
|
|
|
18 |
# letter_freq = {'A':0.7,'B':0.2,'C':0.1}
|
|
|
19 |
# alphabet_length = 3
|
|
|
20 |
# alphabet_offset = 65
|
|
|
21 |
# english_IC = 0.54
|
|
|
22 |
# key_lengths = 4
|
|
|
23 |
# input = 'ABCBABBBAC'
|
|
|
24 |
|
|
|
25 |
def calculate_IC(str):
|
|
|
26 |
'''Calculates and returns the IC for a given string'''
|
|
|
27 |
IC = 0
|
|
|
28 |
for letter in letter_freq.keys():
|
|
|
29 |
count = str.count(letter) # count the frequency of every letter
|
|
|
30 |
IC += float(count * (count-1))/float(len(str) * (len(str)-1))
|
|
|
31 |
return IC
|
|
|
32 |
|
|
|
33 |
def get_substring(str, keylen):
|
|
|
34 |
'''Returns a list of substrings of an offset of mod keylen'''
|
|
|
35 |
ret = []
|
|
|
36 |
for incr in range(keylen): # Iterate through the positions within each key
|
|
|
37 |
substring = ''
|
|
|
38 |
for char in range(len(str)): # Get every X characters from the input
|
|
|
39 |
if char % keylen == incr:
|
|
|
40 |
substring += str[char]
|
|
|
41 |
ret.append(substring)
|
|
|
42 |
return ret
|
|
|
43 |
|
|
|
44 |
def char_shift(char, x):
|
|
|
45 |
'''Shifts a given character x amount and returns it'''
|
|
|
46 |
i = ord(char) - alphabet_offset
|
|
|
47 |
i += x
|
|
|
48 |
while i < 0:
|
|
|
49 |
i += alphabet_length
|
|
|
50 |
return chr(i + alphabet_offset)
|
|
|
51 |
|
|
|
52 |
if __name__ == '__main__':
|
|
|
53 |
# input = raw_input("Enter string: ")
|
|
|
54 |
|
|
|
55 |
# Remove all whitespace from the input string and convert the string to uppercase
|
|
|
56 |
input_stripped = re.sub('\s', '', input)
|
|
|
57 |
input_stripped = input_stripped.upper()
|
|
|
58 |
|
|
|
59 |
# Print out the sample size (length of input)
|
|
|
60 |
sample_size = len(input_stripped)
|
|
|
61 |
print "Sample Size: {0}\n".format(sample_size)
|
|
|
62 |
|
|
|
63 |
# Calculate and print out the IC with keys of length 1-9
|
|
|
64 |
print "Index of Coincidence (0.065 = IC of english):"
|
|
|
65 |
freq_avg = dict()
|
|
|
66 |
freq_diff = dict()
|
|
|
67 |
|
|
|
68 |
# Create a mapping of IC to key lengths
|
|
|
69 |
for length in range(1,key_lengths):
|
|
|
70 |
avg_IC = 0
|
|
|
71 |
substrings = get_substring(input_stripped, length) # Get a list of substrings
|
|
|
72 |
print "M = {0}:".format(length),
|
|
|
73 |
for str in substrings: # Calculate the average IC of all the substrings
|
|
|
74 |
IC = calculate_IC(str)
|
|
|
75 |
avg_IC += IC
|
|
|
76 |
print "{0:.2}".format(IC),
|
|
|
77 |
print " : {0:.2}".format(avg_IC/length)
|
|
|
78 |
freq_avg[avg_IC/length] = length # Store the key length and resulting IC
|
|
|
79 |
|
|
|
80 |
# Create a mapping of |IC - 0.065| to key lengths
|
|
|
81 |
for entry in freq_avg:
|
|
|
82 |
freq_diff[abs(entry - english_IC)] = freq_avg[entry]
|
|
|
83 |
|
|
|
84 |
# Sort this mapping and print out the most likely key lengths
|
|
|
85 |
keys = freq_diff.keys()
|
|
|
86 |
keys.sort()
|
|
|
87 |
print "\nThe most likely key lengths are:"
|
|
|
88 |
for entry in keys:
|
|
|
89 |
print "{0} : {1:.3}".format(freq_diff[entry], entry)
|
|
|
90 |
|
|
|
91 |
# Using the found keylength, calculate the most likely decrypting key
|
|
|
92 |
print "\nAssuming the key has a length of {0} characters".format(freq_diff[keys[0]])
|
|
|
93 |
key_length = freq_diff[keys[0]]
|
|
|
94 |
|
|
|
95 |
# Now we shift each substring and try to find a shift where the IC is closest to 0.065
|
|
|
96 |
print "The most likely key solutions are (read down on the left column):"
|
|
|
97 |
substrings = get_substring(input_stripped, key_length) # First divide the input string by the key length
|
|
|
98 |
key_final = ''
|
|
|
99 |
|
|
|
100 |
for substr in substrings:
|
|
|
101 |
freq_avg.clear() # First clear out both frequency dictionaries
|
|
|
102 |
freq_diff.clear()
|
|
|
103 |
|
|
|
104 |
# Create a mapping of IC to letters
|
|
|
105 |
for g in range(alphabet_length): # Iterate through each letter + shifted value
|
|
|
106 |
IC = 0
|
|
|
107 |
for i in range(alphabet_length): # Iterate through each letter
|
|
|
108 |
# Calculate the IC using M_g = Sum(i=0-25) : p_i * f_(i+g) / n'
|
|
|
109 |
IC += float(letter_freq[chr(i+alphabet_offset)] * substr.count(chr((i+g)%alphabet_length + alphabet_offset))) / float(sample_size/key_length)
|
|
|
110 |
freq_avg[IC] = chr((g)%alphabet_length + alphabet_offset) # Store the letter and calculated IC
|
|
|
111 |
|
|
|
112 |
# Create a mapping of |IC - 0.065| to letters
|
|
|
113 |
for entry in freq_avg:
|
|
|
114 |
freq_diff[abs(entry - english_IC)] = freq_avg[entry]
|
|
|
115 |
|
|
|
116 |
# Sort this mapping and print out the key
|
|
|
117 |
keys = freq_diff.keys()
|
|
|
118 |
keys.sort()
|
|
|
119 |
output = ''
|
|
|
120 |
key_final += freq_diff[keys[0]]
|
|
|
121 |
for entry in keys:
|
|
|
122 |
output += freq_diff[entry] + ' '
|
|
|
123 |
print output
|
|
|
124 |
for i in range(3):
|
|
|
125 |
print "{0} : {1:.3} ".format(freq_diff[keys[i]], keys[i])
|
|
|
126 |
print
|
|
|
127 |
|
|
|
128 |
# Decode the encrypted message using the key
|
|
|
129 |
key_ind = 0
|
|
|
130 |
output = ''
|
|
|
131 |
for i in range(sample_size):
|
|
|
132 |
output += char_shift(input_stripped[i], 0-ord(key_final[key_ind])-alphabet_offset)
|
|
|
133 |
key_ind += 1
|
|
|
134 |
key_ind %= key_length
|
|
|
135 |
|
|
|
136 |
# Print the decoded message
|
|
|
137 |
print "Encryption key: {0}, Decrypted message:".format(key_final)
|
|
|
138 |
print output
|
|
|
139 |
|