Subversion Repositories Code-Repo

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
140 Kevin 1
import string, re
2
 
3
common_digram = ['TH','HE','IN','ER','AN','RE','ED','ON','ES','ST','EN','AT','TO','NT','HA','ND','OU','EA','NG','AS','OR','TI','IS','ET','IT','AR','TE','SE','HI','OF']
4
common_trigram = ['THE','ING','AND','HER','ERE','ENT','THA','NTH','WAS','ETH','FOR','DTH']
5
 
6
def print_frequency(str, num_chars):
7
	# Calculate the frequency of 'num_chars' characters in 'str'
8
	frequency = dict()
9
	for i in range(0,len(str)+1-num_chars):
10
		result = input_stripped.count(input_stripped[i:i+num_chars])
11
		if result != 0:
12
			if input_stripped[i:i+num_chars] not in frequency:
13
				frequency[input_stripped[i:i+num_chars]] = 1
14
			else:
15
				frequency[input_stripped[i:i+num_chars]] += 1
16
 
17
	# Print out the frequency in decreasing order
18
	m = max(frequency.values())
19
	while m > 0:
20
		if m in frequency.values():
21
			print m,':',
22
			for entry in frequency:
23
				if frequency[entry] == m:
24
					print entry,
25
			print
26
		m = m - 1
27
 
28
if __name__ == '__main__':
29
	# input = raw_input("Enter string: ")
30
	input = 'XKJUROWMLLPXWZNPIMBVBQJCNOWXPCCHHVVFVSLLFVXHAZITYXOHULX \
31
		QOJAXELXZXMYJAQFSTSRULHHUCDSKBXKNJQIDALLPQSLLUHIAQFPBPC \
32
		IDSVCIHWHWEWTHBTXRLJNRSNCIHUVFFUXVOUKJLJSWMAQFVJWJSDYLJ \
33
		OGJXDBOXAJULTUCPZMPLIWMLUBZXVOODYBAFDSKXGQFADSHXNXEHSAR \
34
		UOJAQFPFKNDHSAAFVULLUWTAQFRUPWJRSZXGPFUTJQIYNRXNYNTWMHC'
35
	# Remove all whitespace from the input string
36
	input_stripped = re.sub('\s', '', input)
37
 
38
	# Print out the sample size
39
	sample_size = len(input_stripped)
40
	print "Sample Size:", sample_size
41
 
42
	# Print out the letter frequency and % for each letter
43
	print "Letter Frequency Count:"
44
	for letter in string.uppercase:
45
		count = input_stripped.count(letter)
46
		print "{0} : {1:<2} : {2:.2}".format(letter,count,float(count)/float(sample_size)) 
47
 
48
	# Print out the sorted frequency for letters
49
	print "Letter Frequency Count (Sorted):"
50
 	print_frequency(input_stripped, 1)
51
 
52
 	# Print out the sorted frequency for digrams
53
	print "Digram Frequency Count:"
54
	print_frequency(input_stripped, 2)
55
 
56
	# Print out the sorted frequency for trigrams
57
	print "Trigram Frequency Count:"
58
	print_frequency(input_stripped, 3)