Blame | Last modification | View Log | Download | RSS feed
import string, recommon_digram = ['TH','HE','IN','ER','AN','RE','ED','ON','ES','ST','EN','AT','TO','NT','HA','ND','OU','EA','NG','AS','OR','TI','IS','ET','IT','AR','TE','SE','HI','OF']common_trigram = ['THE','ING','AND','HER','ERE','ENT','THA','NTH','WAS','ETH','FOR','DTH']def print_frequency(str, num_chars):# Calculate the frequency of 'num_chars' characters in 'str'frequency = dict()for i in range(0,len(str)+1-num_chars):result = input_stripped.count(input_stripped[i:i+num_chars])if result != 0:if input_stripped[i:i+num_chars] not in frequency:frequency[input_stripped[i:i+num_chars]] = 1else:frequency[input_stripped[i:i+num_chars]] += 1# Print out the frequency in decreasing orderm = max(frequency.values())while m > 0:if m in frequency.values():print m,':',for entry in frequency:if frequency[entry] == m:print entry,m = m - 1if __name__ == '__main__':# input = raw_input("Enter string: ")input = 'XKJUROWMLLPXWZNPIMBVBQJCNOWXPCCHHVVFVSLLFVXHAZITYXOHULX \QOJAXELXZXMYJAQFSTSRULHHUCDSKBXKNJQIDALLPQSLLUHIAQFPBPC \IDSVCIHWHWEWTHBTXRLJNRSNCIHUVFFUXVOUKJLJSWMAQFVJWJSDYLJ \OGJXDBOXAJULTUCPZMPLIWMLUBZXVOODYBAFDSKXGQFADSHXNXEHSAR \UOJAQFPFKNDHSAAFVULLUWTAQFRUPWJRSZXGPFUTJQIYNRXNYNTWMHC'# Remove all whitespace from the input stringinput_stripped = re.sub('\s', '', input)# Print out the sample sizesample_size = len(input_stripped)print "Sample Size:", sample_size# Print out the letter frequency and % for each letterprint "Letter Frequency Count:"for letter in string.uppercase:count = input_stripped.count(letter)print "{0} : {1:<2} : {2:.2}".format(letter,count,float(count)/float(sample_size))# Print out the sorted frequency for lettersprint "Letter Frequency Count (Sorted):"print_frequency(input_stripped, 1)# Print out the sorted frequency for digramsprint "Digram Frequency Count:"print_frequency(input_stripped, 2)# Print out the sorted frequency for trigramsprint "Trigram Frequency Count:"print_frequency(input_stripped, 3)