##language:zh
#pragma section-numbers on
'''
含有章节索引的 *PUG 文章通用模板
'''
::-- [[hoxide]] [<<DateTime(2006-04-29T09:12:35Z)>>]
<<TableOfContents>>
## 默许导航,请保留
<<Include(CPUGnav)>>


= 代理服务器测试 =
''简述''
校园网上外网要money, 所以离不开proxy了, 不过网上n多proxy哪个最快呢?
好像有很多实现的软件, 但是用起来都不顺手, 既然会写程序, 为什么不自己写一个小工具?

== 代码 ==

{{{
#!python
import urllib
from HTMLParser import HTMLParser
from string import letters 
import time

import pprint

def parserhtmllist(htmldata):
    class MyHTMLParser(HTMLParser):
        def set(self):
            self.S = 'none'
            self.I = []
            self.l = 0
        def handle_starttag(self, tag, attrs):
            if ('class', 'cells') in attrs \
                   and tag == 'tr' :
                self.l = 0
                self.S = 'cell'
                self.I.append([])
            if self.S == 'cell' and tag =='td':
                self.l += 1
                self.S = 'celltd'
        def handle_endtag(self, tag):
            if self.S == 'celltd' and tag == 'td':
                self.l -= 1
                if self.l == 0:
                    self.S = 'cell'
                if self.S == 'cell' and tag == 'tr':
                    self.S = 'none'
        def handle_data(self,data):
            if self.S == 'celltd' and self.l >=1:
                self.I[-1].append(data)
        def getlist(self):
            I = []
            for x in self.I:
                try:
                    int(x[0])
                    I.append((x[1],x[2]))
                except:
                    pass
            return I
                
    p = MyHTMLParser()
    p.set()
    p.feed(htmldata)
    p.close()
    return p.getlist()

def getproxylist(proxylisturl, testurls
                 , proxies = {},maxtime=20, debug=True):
    opener = urllib.FancyURLopener(proxies)
    data = opener.open(proxylisturl).read()
    I = parserhtmllist(data)
    TI = []
    for server,port in I:
        proxy = {'http':'http://%s:%s'%(server,port)}
        opener = urllib.FancyURLopener(proxies)
        TI.append([])
        if debug:
            print 'testing %s:%s'%(server,port)
        for url in testurls:
            try:
                st = time.time()
                filehandle = opener.open(url)
                et = time.time()
                TI[-1].append(et-st)
            except IOError:
                TI[-1].append(maxtime)
    return zip(I,TI)

if __name__ == '__main__':
    proxylisturl = 'http://www.haozs.net/proxyip/index.php?' \
                   'act=list&port=&type=&country=China&page=1'
    #proxylisturl = 'a.html'
    
    testurls = ['http://www.google.com']
    
    M = getproxylist(proxylisturl, testurls)
    
    
    M.sort(key=lambda x: sum(x[1]))

    print '\nResult(sorted):'
    for x in M:
        print '%s:%s\t%g'%(x[0][0],x[0][1],sum(x[1]))
        


}}}

== 分析 ==
Html的处理上有点麻烦, 用了HTMLParser, 相信熟悉Parser的同学们肯定都明白.

= 反馈 =