Size: 2410
Comment:
|
Size: 2410
Comment:
|
Deletions are marked like this. | Additions are marked like this. |
Line 1: | Line 1: |
Python实现标签去 | Python实现标签云 |
Python实现标签云 来源:http://sujitpal.blogspot.com/2007/04/building-tag-cloud-with-python.html
1 # !/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import string
5
6 def main():
7 # get the list of tags and their frequency from input file
8 taglist = getTagListSortedByFrequency('/tmp/tag.txt')
9 # find max and min frequency
10 ranges = getRanges(taglist)
11 # write out results to output, tags are written out alphabetically
12 # with size indicating the relative frequency of their occurence
13 writeCloud(taglist, ranges, 'tags.html')
14
15 def getTagListSortedByFrequency(inputfile):
16 inputf = open(inputfile, 'r')
17 taglist = []
18 for line in inputf:
19 line = inputf[:-1]
20 (tag, count) = line.split('|')
21 taglist.append((tag, int(count)))
22
23 inputf.close()
24 # sort tagdict by count
25 taglist.sort(lambda x, y: cmp(x[1], y[1]))
26 return taglist
27
28 def getRanges(taglist):
29 mincount = taglist[0][1]
30 maxcount = taglist[len(taglist) - 1][1]
31 distrib = (maxcount - mincount) / 4
32 index = mincount
33 ranges = []
34 while index <= maxcount:
35 range = (index, index + distrib)
36 index = index + distrib
37 ranges.append(range)
38 return ranges
39
40 def writeCloud(taglist, ranges, outputfile):
41 outputf = open(outputfile, 'w')
42 outputf.write("<style type=\"text/css\">\n")
43 outputf.write(".smallestTag {font-size: xx-small;}\n")
44 outputf.write(".smallTag {font-size: small;}\n")
45 outputf.write(".mediumTag {font-size: medium;}\n")
46 outputf.write(".largeTag {font-size: large;}\n")
47 outputf.write(".largestTag {font-size: xx-large;}\n")
48 outputf.write("</style>\n")
49 rangeStyle = ['smallesTag', 'smallTag', 'mediumTag', 'largeTag', 'largestTag']
50 # resort the tags alphabetically
51 taglist.sort(lambda x, y: cmp(x[0], y[0]))
52 for tag in taglist:
53 rangeIndex = 0
54 for range in ranges:
55 url = 'http://www.google.com/search?q=' + tag[0].replace(' ', '+') + '+site%3Asujitpal.blogspot.com'
56 if tag[1] >= range[0] and tag[1] <= range[1]:
57 outputf.write('<span class=\"' + rangeStyle[rangeIndex] + '\"><a href=\"' + url + '\">' + tag[0] + '</a></span>')
58 break
59 rangeIndex = rangeIndex + 1
60 outputf.close()
61
62 if __name__ == '__main__':
63 main()