Python实现标签云
来源:http://sujitpal.blogspot.com/2007/04/building-tag-cloud-with-python.html
   1 # !/usr/bin/env python
   2 # -*- coding: utf-8 -*- 
   3 # !/usr/bin/env python
   4 # -*- coding: utf-8 -*- 
   5 
   6 import string
   7 
   8 def main():
   9     # get the list of tags and their frequency from input file
  10     taglist = getTagListSortedByFrequency('/tmp/tag.txt')
  11     # find max and min frequency
  12     ranges = getRanges(taglist)
  13     # write out results to output, tags are written out alphabetically
  14     # with size indicating the relative frequency of their occurence
  15     writeCloud(taglist, ranges, 'tags.html')
  16 
  17 def getTagListSortedByFrequency(inputfile):
  18     inputf = open(inputfile, 'r')
  19     taglist = []
  20 
  21     for line in inputf: #原文中使用readlines,改为直接迭代文件对象,更pythonic
  22         line = line[:-1]
  23         (tag, count) = line.split('|')
  24         taglist.append((tag, int(count)))
  25 
  26     inputf.close()
  27     # sort tagdict by count
  28     taglist.sort(lambda x, y: cmp(x[1], y[1]))
  29     return taglist
  30 
  31 def getRanges(taglist):
  32     mincount = taglist[0][1]
  33     maxcount = taglist[len(taglist) - 1][1]
  34     distrib = (maxcount - mincount) / 4
  35     index = mincount
  36     ranges = []
  37     while index <= maxcount:
  38         range = (index, index + distrib)
  39         index = index + distrib
  40         ranges.append(range)
  41     return ranges
  42 
  43 def writeCloud(taglist, ranges, outputfile):
  44     outputf = open(outputfile, 'w')
  45     outputf.write("<style type=\"text/css\">\n")
  46     outputf.write(".smallestTag {font-size: xx-small;}\n")
  47     outputf.write(".smallTag {font-size: small;}\n")
  48     outputf.write(".mediumTag {font-size: medium;}\n")
  49     outputf.write(".largeTag {font-size: large;}\n")
  50     outputf.write(".largestTag {font-size: xx-large;}\n")
  51     outputf.write("</style>\n")
  52     rangeStyle = ['smallesTag', 'smallTag', 'mediumTag', 'largeTag', 'largestTag']
  53     # resort the tags alphabetically
  54     taglist.sort(lambda x, y: cmp(x[0], y[0]))
  55     for tag in taglist:
  56         rangeIndex = 0
  57         for range in ranges:
  58             url = 'http://www.google.com/search?q=' + tag[0].replace(' ', '+') + '+site%3yoursiteurl'
  59             if tag[1] >= range[0] and tag[1] <= range[1]:
  60                 outputf.write('<span class=\"' + rangeStyle[rangeIndex] + '\"><a href=\"' + url + '\">' + tag[0] + '</a></span>')
  61                 break
  62             rangeIndex = rangeIndex + 1
  63     outputf.close()
  64 
  65 if __name__ == '__main__':
  66     main()
