Python实现标签去 来源:http://sujitpal.blogspot.com/2007/04/building-tag-cloud-with-python.html
# !/usr/bin/env python # -*- coding: utf-8 -*-
import string
def main():
- # get the list of tags and their frequency from input file taglist = getTagListSortedByFrequency('/tmp/tag.txt') # find max and min frequency ranges = getRanges(taglist) # write out results to output, tags are written out alphabetically # with size indicating the relative frequency of their occurence writeCloud(taglist, ranges, 'tags.html')
def getTagListSortedByFrequency(inputfile):
- inputf = open(inputfile, 'r') taglist = [] for line in inputf:
- line = inputf[:-1] (tag, count) = line.split('|') taglist.append((tag, int(count)))
def getRanges(taglist):
- mincount = taglist[0][1] maxcount = taglist[len(taglist) - 1][1] distrib = (maxcount - mincount) / 4 index = mincount ranges = []
while index <= maxcount:
- range = (index, index + distrib) index = index + distrib ranges.append(range)
def writeCloud(taglist, ranges, outputfile):
- outputf = open(outputfile, 'w')
outputf.write("<style type=\"text/css\">\n") outputf.write(".smallestTag {font-size: xx-small;}\n") outputf.write(".smallTag {font-size: small;}\n") outputf.write(".mediumTag {font-size: medium;}\n") outputf.write(".largeTag {font-size: large;}\n") outputf.write(".largestTag {font-size: xx-large;}\n") outputf.write("</style>\n") rangeStyle = ['smallesTag', 'smallTag', 'mediumTag', 'largeTag', 'largestTag'] # resort the tags alphabetically taglist.sort(lambda x, y: cmp(x[0], y[0])) for tag in taglist:
- rangeIndex = 0 for range in ranges:
url = 'http://www.google.com/search?q=' + tag[0].replace(' ', '+') + '+site%3Asujitpal.blogspot.com' if tag[1] >= range[0] and tag[1] <= range[1]:
outputf.write('<span class=\"' + rangeStyle[rangeIndex] + '\"><a href=\"' + url + '\">' + tag[0] + '</a></span>') break
- rangeIndex = 0 for range in ranges:
if name == 'main':
- main()