Contents
Python实现标签云
来源:http://sujitpal.blogspot.com/2007/04/building-tag-cloud-with-python.html
1 # !/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # !/usr/bin/env python
4 # -*- coding: utf-8 -*-
5
6 import string
7
8 def main():
9 # get the list of tags and their frequency from input file
10 taglist = getTagListSortedByFrequency('/tmp/tag.txt')
11 # find max and min frequency
12 ranges = getRanges(taglist)
13 # write out results to output, tags are written out alphabetically
14 # with size indicating the relative frequency of their occurence
15 writeCloud(taglist, ranges, 'tags.html')
16
17 def getTagListSortedByFrequency(inputfile):
18 inputf = open(inputfile, 'r')
19 taglist = []
20
21 for line in inputf: #原文中使用readlines,改为直接迭代文件对象,更pythonic
22 line = line[:-1]
23 (tag, count) = line.split('|')
24 taglist.append((tag, int(count)))
25
26 inputf.close()
27 # sort tagdict by count
28 taglist.sort(lambda x, y: cmp(x[1], y[1]))
29 return taglist
30
31 def getRanges(taglist):
32 mincount = taglist[0][1]
33 maxcount = taglist[len(taglist) - 1][1]
34 distrib = (maxcount - mincount) / 4
35 index = mincount
36 ranges = []
37 while index <= maxcount:
38 range = (index, index + distrib)
39 index = index + distrib
40 ranges.append(range)
41 return ranges
42
43 def writeCloud(taglist, ranges, outputfile):
44 outputf = open(outputfile, 'w')
45 outputf.write("<style type=\"text/css\">\n")
46 outputf.write(".smallestTag {font-size: xx-small;}\n")
47 outputf.write(".smallTag {font-size: small;}\n")
48 outputf.write(".mediumTag {font-size: medium;}\n")
49 outputf.write(".largeTag {font-size: large;}\n")
50 outputf.write(".largestTag {font-size: xx-large;}\n")
51 outputf.write("</style>\n")
52 rangeStyle = ['smallesTag', 'smallTag', 'mediumTag', 'largeTag', 'largestTag']
53 # resort the tags alphabetically
54 taglist.sort(lambda x, y: cmp(x[0], y[0]))
55 for tag in taglist:
56 rangeIndex = 0
57 for range in ranges:
58 url = 'http://www.google.com/search?q=' + tag[0].replace(' ', '+') + '+site%3yoursiteurl'
59 if tag[1] >= range[0] and tag[1] <= range[1]:
60 outputf.write('<span class=\"' + rangeStyle[rangeIndex] + '\"><a href=\"' + url + '\">' + tag[0] + '</a></span>')
61 break
62 rangeIndex = rangeIndex + 1
63 outputf.close()
64
65 if __name__ == '__main__':
66 main()