Python实现标签云 来源:http://sujitpal.blogspot.com/2007/04/building-tag-cloud-with-python.html

   1 # !/usr/bin/env python
   2 # -*- coding: utf-8 -*- 
   3 
   4 import string
   5 
   6 def main():
   7     # get the list of tags and their frequency from input file
   8     taglist = getTagListSortedByFrequency('/tmp/tag.txt')
   9     # find max and min frequency
  10     ranges = getRanges(taglist)
  11     # write out results to output, tags are written out alphabetically
  12     # with size indicating the relative frequency of their occurence
  13     writeCloud(taglist, ranges, 'tags.html')
  14 
  15 def getTagListSortedByFrequency(inputfile):
  16     inputf = open(inputfile, 'r')
  17     taglist = []
  18     for line in inputf:
  19         line = inputf[:-1]
  20         (tag, count) = line.split('|')
  21         taglist.append((tag, int(count)))
  22 
  23     inputf.close()
  24     # sort tagdict by count
  25     taglist.sort(lambda x, y: cmp(x[1], y[1]))
  26     return taglist
  27 
  28 def getRanges(taglist):
  29     mincount = taglist[0][1]
  30     maxcount = taglist[len(taglist) - 1][1]
  31     distrib = (maxcount - mincount) / 4
  32     index = mincount
  33     ranges = []
  34     while index <= maxcount:
  35         range = (index, index + distrib)
  36         index = index + distrib
  37         ranges.append(range)
  38     return ranges
  39 
  40 def writeCloud(taglist, ranges, outputfile):
  41     outputf = open(outputfile, 'w')
  42     outputf.write("<style type=\"text/css\">\n")
  43     outputf.write(".smallestTag {font-size: xx-small;}\n")
  44     outputf.write(".smallTag {font-size: small;}\n")
  45     outputf.write(".mediumTag {font-size: medium;}\n")
  46     outputf.write(".largeTag {font-size: large;}\n")
  47     outputf.write(".largestTag {font-size: xx-large;}\n")
  48     outputf.write("</style>\n")
  49     rangeStyle = ['smallesTag', 'smallTag', 'mediumTag', 'largeTag', 'largestTag']
  50     # resort the tags alphabetically
  51     taglist.sort(lambda x, y: cmp(x[0], y[0]))
  52     for tag in taglist:
  53         rangeIndex = 0
  54         for range in ranges:
  55             url = 'http://www.google.com/search?q=' + tag[0].replace(' ', '+') + '+site%3Asujitpal.blogspot.com'
  56             if tag[1] >= range[0] and tag[1] <= range[1]:
  57                 outputf.write('<span class=\"' + rangeStyle[rangeIndex] + '\"><a href=\"' + url + '\">' + tag[0] + '</a></span>')
  58                 break
  59             rangeIndex = rangeIndex + 1
  60     outputf.close()
  61 
  62 if __name__ == '__main__':
  63     main()