Size: 2460
Comment:
|
Size: 2637
Comment:
|
Deletions are marked like this. | Additions are marked like this. |
Line 1: | Line 1: |
Python实现标签云 | ##language:zh #pragma section-numbers off ##含有章节索引导航的 ZPyUG 文章通用模板 [[TableOfContents]] ## 默许导航,请保留 [[Include(ZPyUGnav)]] = Python实现标签云 = |
Python实现标签云
来源:http://sujitpal.blogspot.com/2007/04/building-tag-cloud-with-python.html
1 # !/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # !/usr/bin/env python
4 # -*- coding: utf-8 -*-
5
6 import string
7
8 def main():
9 # get the list of tags and their frequency from input file
10 taglist = getTagListSortedByFrequency('/tmp/tag.txt')
11 # find max and min frequency
12 ranges = getRanges(taglist)
13 # write out results to output, tags are written out alphabetically
14 # with size indicating the relative frequency of their occurence
15 writeCloud(taglist, ranges, 'tags.html')
16
17 def getTagListSortedByFrequency(inputfile):
18 inputf = open(inputfile, 'r')
19 taglist = []
20 for line in inputf:
21 line = line[:-1]
22 (tag, count) = line.split('|')
23 taglist.append((tag, int(count)))
24
25 inputf.close()
26 # sort tagdict by count
27 taglist.sort(lambda x, y: cmp(x[1], y[1]))
28 return taglist
29
30 def getRanges(taglist):
31 mincount = taglist[0][1]
32 maxcount = taglist[len(taglist) - 1][1]
33 distrib = (maxcount - mincount) / 4
34 index = mincount
35 ranges = []
36 while index <= maxcount:
37 range = (index, index + distrib)
38 index = index + distrib
39 ranges.append(range)
40 return ranges
41
42 def writeCloud(taglist, ranges, outputfile):
43 outputf = open(outputfile, 'w')
44 outputf.write("<style type=\"text/css\">\n")
45 outputf.write(".smallestTag {font-size: xx-small;}\n")
46 outputf.write(".smallTag {font-size: small;}\n")
47 outputf.write(".mediumTag {font-size: medium;}\n")
48 outputf.write(".largeTag {font-size: large;}\n")
49 outputf.write(".largestTag {font-size: xx-large;}\n")
50 outputf.write("</style>\n")
51 rangeStyle = ['smallesTag', 'smallTag', 'mediumTag', 'largeTag', 'largestTag']
52 # resort the tags alphabetically
53 taglist.sort(lambda x, y: cmp(x[0], y[0]))
54 for tag in taglist:
55 rangeIndex = 0
56 for range in ranges:
57 url = 'http://www.google.com/search?q=' + tag[0].replace(' ', '+') + '+site%3Asujitpal.blogspot.com'
58 if tag[1] >= range[0] and tag[1] <= range[1]:
59 outputf.write('<span class=\"' + rangeStyle[rangeIndex] + '\"><a href=\"' + url + '\">' + tag[0] + '</a></span>')
60 break
61 rangeIndex = rangeIndex + 1
62 outputf.close()
63
64 if __name__ == '__main__':
65 main()