Subversion Repositories Code-Repo

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
140 Kevin 1
import string, re
2
 
3
# Mapping of letter frequencies
4
letter_freq = {'A':.082,'B':.015,'C':0.028,'D':.043,'E':.127,'F':.022,'G':.02,'H':.061,\
5
		'I':.07,'J':.002,'K':.008,'L':.04,'M':.024,'N':.067,'O':.075,'P':.019,'Q':.001,\
6
		'R':.06,'S':.063,'T':.091,'U':.028,'V':.01,'W':.023,'X':.001,'Y':.02,'Z':.001}
7
alphabet_length = 26 # Number of letters in the alphabet
8
alphabet_offset = 65 # Offset of 'A' for ASCII characters
9
english_IC = 0.065 # IC of english
10
key_lengths = 10 # Number of key lengths to test
11
# Encrypted message
12
input = 'XKJUROWMLLPXWZNPIMBVBQJCNOWXPCCHHVVFVSLLFVXHAZITYXOHULX \
13
	QOJAXELXZXMYJAQFSTSRULHHUCDSKBXKNJQIDALLPQSLLUHIAQFPBPC \
14
	IDSVCIHWHWEWTHBTXRLJNRSNCIHUVFFUXVOUKJLJSWMAQFVJWJSDYLJ \
15
	OGJXDBOXAJULTUCPZMPLIWMLUBZXVOODYBAFDSKXGQFADSHXNXEHSAR \
16
	UOJAQFPFKNDHSAAFVULLUWTAQFRUPWJRSZXGPFUTJQIYNRXNYNTWMHC'
17
 
18
# letter_freq = {'A':0.7,'B':0.2,'C':0.1}
19
# alphabet_length = 3
20
# alphabet_offset = 65
21
# english_IC = 0.54
22
# key_lengths = 4
23
# input = 'ABCBABBBAC'
24
 
25
def calculate_IC(str):
26
	'''Calculates and returns the IC for a given string'''
27
	IC = 0
28
	for letter in letter_freq.keys():
29
		count = str.count(letter)	# count the frequency of every letter
30
		IC += float(count * (count-1))/float(len(str) * (len(str)-1))
31
	return IC
32
 
33
def get_substring(str, keylen):
34
	'''Returns a list of substrings of an offset of mod keylen'''
35
	ret = []
36
 	for incr in range(keylen): # Iterate through the positions within each key
37
 		substring = ''
38
 		for char in range(len(str)): # Get every X characters from the input
39
 			if char % keylen == incr:
40
 				substring += str[char]
41
 		ret.append(substring)
42
 	return ret
43
 
44
def char_shift(char, x):
45
	'''Shifts a given character x amount and returns it'''
46
	i = ord(char) - alphabet_offset
47
	i += x
48
	while i < 0:
49
		i += alphabet_length
50
	return chr(i + alphabet_offset)
51
 
52
if __name__ == '__main__':
53
	# input = raw_input("Enter string: ")
54
 
55
	# Remove all whitespace from the input string and convert the string to uppercase
56
	input_stripped = re.sub('\s', '', input)
57
	input_stripped = input_stripped.upper()
58
 
59
	# Print out the sample size (length of input)
60
	sample_size = len(input_stripped)
61
	print "Sample Size: {0}\n".format(sample_size)
62
 
63
	# Calculate and print out the IC with keys of length 1-9
64
	print "Index of Coincidence (0.065 = IC of english):"
65
	freq_avg = dict()
66
	freq_diff = dict()
67
 
68
	# Create a mapping of IC to key lengths
69
	for length in range(1,key_lengths):
70
		avg_IC = 0
71
		substrings = get_substring(input_stripped, length) # Get a list of substrings
72
		print "M = {0}:".format(length),
73
		for str in substrings: # Calculate the average IC of all the substrings
74
			IC = calculate_IC(str)
75
			avg_IC += IC
76
			print "{0:.2}".format(IC),
77
		print " : {0:.2}".format(avg_IC/length)
78
		freq_avg[avg_IC/length] = length # Store the key length and resulting IC
79
 
80
	# Create a mapping of |IC - 0.065| to key lengths
81
	for entry in freq_avg:
82
		freq_diff[abs(entry - english_IC)] = freq_avg[entry]
83
 
84
	# Sort this mapping and print out the most likely key lengths
85
	keys = freq_diff.keys()
86
	keys.sort()
87
	print "\nThe most likely key lengths are:"
88
	for entry in keys:
89
		print "{0} : {1:.3}".format(freq_diff[entry], entry)
90
 
91
	# Using the found keylength, calculate the most likely decrypting key
92
	print "\nAssuming the key has a length of {0} characters".format(freq_diff[keys[0]])
93
	key_length = freq_diff[keys[0]]
94
 
95
	# Now we shift each substring and try to find a shift where the IC is closest to 0.065
96
	print "The most likely key solutions are (read down on the left column):"
97
	substrings = get_substring(input_stripped, key_length) # First divide the input string by the key length
98
	key_final = ''
99
 
100
	for substr in substrings:
101
		freq_avg.clear() # First clear out both frequency dictionaries
102
		freq_diff.clear()
103
 
104
		# Create a mapping of IC to letters
105
		for g in range(alphabet_length): # Iterate through each letter + shifted value
106
			IC = 0
107
			for i in range(alphabet_length): # Iterate through each letter
108
				# Calculate the IC using M_g = Sum(i=0-25) : p_i * f_(i+g) / n'
109
				IC += float(letter_freq[chr(i+alphabet_offset)] * substr.count(chr((i+g)%alphabet_length + alphabet_offset))) / float(sample_size/key_length)
110
			freq_avg[IC] = chr((g)%alphabet_length + alphabet_offset) # Store the letter and calculated IC
111
 
112
		# Create a mapping of |IC - 0.065| to letters
113
		for entry in freq_avg:
114
			freq_diff[abs(entry - english_IC)] = freq_avg[entry]
115
 
116
		# Sort this mapping and print out the key
117
		keys = freq_diff.keys()
118
		keys.sort()
119
		output = ''
120
		key_final += freq_diff[keys[0]]
121
		for entry in keys:
122
			output += freq_diff[entry] + ' '
123
		print output
124
		for i in range(3):
125
			print "{0} : {1:.3}  ".format(freq_diff[keys[i]], keys[i])
126
		print
127
 
128
	# Decode the encrypted message using the key
129
	key_ind = 0
130
	output = ''
131
	for i in range(sample_size):
132
		output += char_shift(input_stripped[i], 0-ord(key_final[key_ind])-alphabet_offset)
133
		key_ind += 1
134
		key_ind %= key_length
135
 
136
	# Print the decoded message
137
	print "Encryption key: {0}, Decrypted message:".format(key_final)
138
	print output
139