TableOfContents

Include(ZPyUGnav)

Python实现标签云

来源:http://sujitpal.blogspot.com/2007/04/building-tag-cloud-with-python.html

   1 # !/usr/bin/env python
   2 # -*- coding: utf-8 -*- 
   3 # !/usr/bin/env python
   4 # -*- coding: utf-8 -*- 
   5 
   6 import string
   7 
   8 def main():
   9     # get the list of tags and their frequency from input file
  10     taglist = getTagListSortedByFrequency('/tmp/tag.txt')
  11     # find max and min frequency
  12     ranges = getRanges(taglist)
  13     # write out results to output, tags are written out alphabetically
  14     # with size indicating the relative frequency of their occurence
  15     writeCloud(taglist, ranges, 'tags.html')
  16 
  17 def getTagListSortedByFrequency(inputfile):
  18     inputf = open(inputfile, 'r')
  19     taglist = []
  20     for line in inputf:
  21         line = line[:-1]
  22         (tag, count) = line.split('|')
  23         taglist.append((tag, int(count)))
  24 
  25     inputf.close()
  26     # sort tagdict by count
  27     taglist.sort(lambda x, y: cmp(x[1], y[1]))
  28     return taglist
  29 
  30 def getRanges(taglist):
  31     mincount = taglist[0][1]
  32     maxcount = taglist[len(taglist) - 1][1]
  33     distrib = (maxcount - mincount) / 4
  34     index = mincount
  35     ranges = []
  36     while index <= maxcount:
  37         range = (index, index + distrib)
  38         index = index + distrib
  39         ranges.append(range)
  40     return ranges
  41 
  42 def writeCloud(taglist, ranges, outputfile):
  43     outputf = open(outputfile, 'w')
  44     outputf.write("<style type=\"text/css\">\n")
  45     outputf.write(".smallestTag {font-size: xx-small;}\n")
  46     outputf.write(".smallTag {font-size: small;}\n")
  47     outputf.write(".mediumTag {font-size: medium;}\n")
  48     outputf.write(".largeTag {font-size: large;}\n")
  49     outputf.write(".largestTag {font-size: xx-large;}\n")
  50     outputf.write("</style>\n")
  51     rangeStyle = ['smallesTag', 'smallTag', 'mediumTag', 'largeTag', 'largestTag']
  52     # resort the tags alphabetically
  53     taglist.sort(lambda x, y: cmp(x[0], y[0]))
  54     for tag in taglist:
  55         rangeIndex = 0
  56         for range in ranges:
  57             url = 'http://www.google.com/search?q=' + tag[0].replace(' ', '+') + '+site%3Asujitpal.blogspot.com'
  58             if tag[1] >= range[0] and tag[1] <= range[1]:
  59                 outputf.write('<span class=\"' + rangeStyle[rangeIndex] + '\"><a href=\"' + url + '\">' + tag[0] + '</a></span>')
  60                 break
  61             rangeIndex = rangeIndex + 1
  62     outputf.close()
  63 
  64 if __name__ == '__main__':
  65     main()