Attachment 'lightTracker.py'

Download

   1 #!/usr/bin/env python
   2 
   3 # --------- Section: lightTracker Project Introduction ------------
   4 # lightTracker is a BitTorrent tracker implementation using
   5 # the web.py framework for request/response handling and MySQL
   6 # as data store. 
   7 #
   8 # The official tracker is
   9 # * twisted based so that it has a relatively steep learning curve.
  10 # * lots of async request/responses which make the code hard to understand
  11 # * implement local data store without using standard databases.
  12 #
  13 # lightTracker is an attempt to write a succint, clear and
  14 # easy-to-understand tracker.
  15 # * based on the popular, neat web.py framework.
  16 # * all requests done in a sync manner. 
  17 # * use MySQL as the local datastore so make scale out to
  18 #   multiple boxes possible.
  19 # * Instead of managing the cache by ourselves, we depend on
  20 #   the database software to handle that for us.
  21 #   
  22 #
  23 # In order to run the tracker, simply do:
  24 #    >>python lightTracker.py
  25 #
  26 #
  27 # ----------- Section: Code Structure ----------------------
  28 # The web.py's 'urls' is a nice TOC, or sitemap, to understand
  29 # the structure. This tracker implements only 'announce' and
  30 # 'scrape' functions.
  31 #
  32 # I'd recommend you to follow these steps to understand the code:
  33 # 1. understand the database schema. Only two tables, one foreign key.
  34 # 2. read the unit test code to see how parameters are put together.
  35 # 3. start with Section "Book keeping" and "Prepare Response Messages"
  36 #
  37 #
  38 # ----------- Section: Road Map ----------------------------
  39 # 1. Performance boost: the official tracker can serve 82 requests/sec
  40 #    while lightTracker can only take 24 reqs/sec. One idea is to
  41 #    use cache but the logic will get quite complicated if we want to
  42 #    get the statistics right. 
  43 #
  44 # Alex Dong ( alex.dong at gmail.com)
  45 
  46 import web
  47 import const
  48 from models import torrent_db, peer_db
  49 from bencode import bencode, bdecode
  50 from urllib import quote, unquote
  51 from logger import logger
  52 
  53 logger = logger('TRACKER')
  54 
  55 def size_format(s):
  56     if (s < 1024):
  57         r = str(s) + 'B'
  58     elif (s < 1048576):
  59         r = str(int(s/1024)) + 'KiB'
  60     elif (s < 1073741824):
  61         r = str(int(s/1048576)) + 'MiB'
  62     elif (s < 1099511627776):
  63         r = str(int((s/1073741824.0)*100.0)/100.0) + 'GiB'
  64     else:
  65         r = str(int((s/1099511627776.0)*100.0)/100.0) + 'TiB'
  66     return r
  67 
  68 
  69 class announce:
  70     def GET(self):
  71         params = web.input()
  72         web.header("Content-Type", "text/plain")
  73         web.header("Pragma", "no-cache")
  74 
  75 
  76         # ------------------- Section: Input Validation --------------------
  77 
  78         # Rules: http://wiki.theory.org/BitTorrentSpecification
  79         #
  80         # info_hash:    mandatory.  20-byte. 
  81         # peer_id:      mandatory.  20-byte. 
  82         # port:         mandatory.  integer
  83         #
  84         # compact:      optional.   '1' means compact peer list.
  85         # uploaded:     optional.   base ten of ascii. bytes uploaded
  86         # downloaded:   optional.   bytes downloaded.
  87         # left:         optional.   bytes left.
  88         # event:        optional.   once specified, must be one of 
  89         #                           'started', 'completed', 'stopped'
  90         # numwant:      optional:   integer >= 0
  91         #
  92         # ip:           optional    [ignore] dotted quad format or rfc3513. 
  93         # key:          optional:   [ignore] string
  94         # trackerid:    optional:   [ignore] string
  95         if  not hasattr(params, 'info_hash') or \
  96             not hasattr(params, 'peer_id')   or \
  97             not hasattr(params, 'port'):
  98             web.output( bencode({'failure reason:': "Mandatory fields are missing. "}))
  99             logger.info("At least one of ('info_hash', 'peer_id', 'port') is missing. " \
 100                 + " request: (%s, %s, %s). returning." %  (hasattr(params, 'info_hash'), \
 101                 hasattr(params, 'peer_id'), hasattr(params, 'port')))
 102             return
 103             
 104         if len(params.peer_id) != 20:
 105             logger.info("peer_id '%s' is less than 20." %(params.peer_id))
 106             web.output( bencode({'failure reason:': "Request.peer_id expects to be length 20."}))
 107             return
 108 
 109         # Need to encode the peer_id since some of it, like uTorrent, use hex digits instead
 110         # of pure string for the peer_id
 111         peer_id = unquote(params.peer_id).encode('hex')
 112         logger.debug("Processing request from peer: %s" % peer_id)
 113         
 114         if not params.port.isdigit():
 115             web.output( bencode({'failure reason:': "Request.port is invalid"}))
 116             logger.info("invalid port received: %s" % params.port)
 117             return
 118 
 119         port = int(params.port)
 120         if port < 0 or port > 65535:
 121             web.output( bencode({'failure reason:': "Request.port is invalid"}))
 122             logger.info("invalid port number: %d." % port)
 123             return
 124 
 125         if hasattr(params, 'event') and params.event not in ['started', 'completed', 'stopped']:
 126             web.output( bencode({'failure reason:': "Request.event should be started, completed or stopped."}))
 127             logger.info("invalid event: '%s'." % params.event)
 128             return
 129 
 130         # ----------- Section: Input Normalization -------------------
 131         #
 132         # This section we're normalizing the parameters by
 133         # 1. setting default values.
 134         # 2. checking input parameter types.
 135         # 3. replace incorrect parameters with default values.
 136         # 4. detect parameter conflictions and try to resolve them.
 137         if not hasattr(params, 'numwant'):
 138             params.numwant = 25
 139         elif not params.numwant.isdigit():
 140             web.output( bencode({'failure reason:': "Request.numwant should be integer. "}))
 141             logger.info("invalid numwant: '%s'." % params.numwant)
 142             return            
 143         else:
 144             params.numwant = int(params.numwant)
 145         # Before we start looking through the peers table
 146         # and return the peer_list, we have to decide how
 147         # many peers we want to return.
 148         # Do check out the "Implementor's Note" in
 149         # http://wiki.theory.org/BitTorrentSpecification
 150         numwant = min(params.numwant, 55)        
 151 
 152         if not hasattr(params, 'left'): 
 153             logger.info("'left' not present, setting it to 0")
 154             params.left = 0
 155         elif not params.left.isdigit():
 156             web.output( bencode({'failure reason:': "Request.left should be ten-based ascii"}))
 157             logger.info("invalid left bytes: %s." % params.left)
 158             return
 159         else:
 160             params.left = int(params.left)
 161         logger.debug("params.left = %s" % params.left)
 162     
 163         # 'complete' event can't still have stuff to download ('left'>0)
 164         if hasattr(params, 'event') and params.event == 'completed' and params.left > 0:
 165             web.output( bencode({'failure reason:': "Inconsistent completed and left parameters."}))
 166             logger.info("'complete' event conflicts with params.left (%s)" % params.left)
 167             return
 168         logger.debug("params.event = %s" % (hasattr(params, 'event') and params.event or None))
 169 
 170         # For personal and corporate trackers, you have the choice to only track torrents you've created. 
 171         info_hash = unquote(params.info_hash).encode('hex')
 172         t = torrent_db.getone(info_hash=info_hash)
 173         if not t:
 174             web.output( bencode({'failure reason:': "Requested download is not authorized for use with this tracker."}))
 175             logger.debug("can not find torrent with info_hash: %s." % info_hash)
 176             return
 177         torrent_id = t.id
 178 
 179             
 180         # Update 'peers' table with this peer's information. 
 181         # Note that the 'peers' table's primary key is composited
 182         # by the 'peer_id' and 'torrent_id' columns.
 183 
 184         # TODO: need to make sure our support of IP_forward, NAT, etc.
 185         ip = web.ctx.ip
 186         if not peer_db.has(peer_id=peer_id, torrent_id=torrent_id):
 187             peer_db.insert(peer_id=peer_id, torrent_id=torrent_id, ip=ip, port=port, status=const.PEER_UNINITIALIZED)
 188             logger.debug("peer inserted into 'peers' table.")
 189         else:
 190             peer_db.update(
 191                     where='peer_id = "$peer_id" and torrent_id = torrent_id',
 192                     vars={'peer_id':peer_id, 'torrent_id':torrent_id},
 193                     ip=ip, port=port, torrent_id=torrent_id)
 194 
 195 
 196         # --------------------- Section: Book Keeping ----------------------
 197         # Update the peer's status according to the event we received.
 198         logger.debug("Updating book keeping. event=%s, left=%s" % (hasattr(params, 'event') and params.event or None, params.left))
 199         if not hasattr(params, 'event') or params.event == 'started':
 200             if params.left > 0:
 201                 logger.debug("params.left>0. set peer status.to 'incomplete'")
 202                 self._update_peer_status(peer_id, torrent_id, const.PEER_INCOMPLETE)
 203             else:
 204                 logger.debug("params.left==0. set peer status to 'complete'")
 205                 self._update_peer_status(peer_id, torrent_id, const.PEER_COMPLETE)
 206         elif params.event == 'completed':
 207             self._update_peer_status(peer_id, torrent_id, const.PEER_COMPLETE)
 208         else:
 209             # params.event == 'stopped':
 210             self._update_peer_status(peer_id, torrent_id, const.PEER_STOPPED)
 211             
 212                
 213         # ---------------Section: Prepare Response Message ------------------
 214         # Here the logic is quite simple. Just go through the 'peers' table
 215         # to pick out the peers for the specified  torrent_id and peer_id pair.
 216         # According to the spec, put everything into hash_table, bencode it
 217         # and return.
 218 
 219         # Retrieve the torrent detail info again since the _update_peer_status
 220         # might have modified the statistics info. 
 221         t = torrent_db.getone(info_hash=info_hash)
 222         assert t
 223         data = {}
 224         data['complete'] = t.seeders
 225         data['incomplete'] = t.leechers
 226         data['interval'] = 30 * 60
 227         data['min_internal'] = 30 * 60
 228         logger.debug("torrent has %s seeders and %s leechers." % (t.seeders, t.leechers))
 229 
 230         
 231         # Return the list of peers who're associated with the
 232         # selected torrent. 
 233         #
 234         # TODO: we might want to implement something to optimize the peers
 235         #       Based on speed? geography? uptime? existing peers?
 236         #       One premature optimization idea is to return complete peers 
 237         #       first. The problem with this approach is that complete peers
 238         #       might be less stable than incomplete peers, who are actively
 239         #       downloading or participating.
 240         # TODO: shall we maintain the peer list by 'ping' them
 241         #       to ensure 'signal of life' and NAT traversal still working?
 242         # TODO: Clean up zombie peers who didn't signal event "stopped"
 243         #       before they die.
 244         # TODO: we should not delete any peer_id from 'peers' table. 
 245         peers = web.query('select * from peers where torrent_id = $torrent_id and peer_id != "$peer_id" limit $numwant', 
 246                         vars={"torrent_id":torrent_id, "peer_id":params.peer_id, "numwant":numwant})
 247         logger.debug("returning peer list: %s" % ', '.join(["%s:%s"%(p.ip, p.port) for p in peers]))    
 248         if not hasattr(params, 'compact') or params.compact == '1':
 249             peerlist = ''
 250             for peer in peers:
 251                 peerdata = self._compact_peer_info(peer.ip, peer.port)
 252                 peerlist += peerdata
 253         else:
 254             peerlist = []
 255             for peer in peers:
 256                 peerdata = {'peer_id':peer.peer_id,
 257                             'ip':peer.ip,
 258                             'port':peer.port}
 259                 peerlist.append(peerdata)
 260         
 261         data['peers'] = peerlist
 262 
 263         # Here we should use 'web.output' instead of 'print' because
 264         # 'print' will append a '\n' at the end of the response message,
 265         # which corrupts the bencode.bdecode() process.
 266         web.output( bencode(data))
 267 
 268     def _update_peer_status(self, peer_id, torrent_id, status):
 269         """
 270         Update Peer Status and Torrent's seeder/leecher statistics info.
 271 
 272         The logic of upadting seeder/leecher counts is a little bit complicated. 
 273         We need to know the peer's last known status in order to tell how should
 274         we update the status. Here is a matrix where 
 275         * '0' stands for 'incomplete',
 276         * '1' for 'complete',
 277         * '2' for 'stopped'.
 278         * '-1' for 'new inserted' found 
 279         * 'LE' for leechers
 280         * 'SE' for seeders and 
 281         * 'N' for no action.
 282 
 283         ||  Last State      ||  Current State   ||     Result       ||    idx   ||
 284         ||     -1           ||      0           ||      LE++        ||    (1)   || 
 285         ||     -1           ||      1           ||      SE++        ||    (2)   ||
 286         ||     -1           ||      2           ||       N          ||    (3)   ||
 287         ||     0            ||      0           ||       N          ||    (4)   ||
 288         ||     0            ||      1           ||  LE--, SE++      ||    (5)   ||
 289         ||     0            ||      2           ||      LE--        ||    (6)   ||
 290         ||     1            ||      0           ||  LE++, SE--      ||    (7)   ||
 291         ||     1            ||      1           ||       N          ||    (8)   ||
 292         ||     1            ||      2           ||      SE--        ||    (9)   ||
 293         ||     2            ||      0           ||      LE++        ||    (10)  ||
 294         ||     2            ||      1           ||      SE++        ||    (11)  ||
 295         ||     2            ||      2           ||       N          ||    (12)  ||
 296         """
 297         def update(field, dir):
 298             if field=='LE':
 299                 logger.debug("Update torrent (%s) leecher count %d with %d" % (t.id, t.leechers, dir))
 300                 torrent_db.update( 'id=$id', {'id':t.id}, leechers=t.leechers+dir)
 301             else:
 302                 logger.debug("Update torrent (%s) seeder count %d with %d" % (t.id, t.seeders, dir))
 303                 torrent_db.update( 'id=$id', {'id':t.id}, seeders=t.seeders+dir)
 304 
 305         logger.debug("set peer_id: %s, torrent_id: %s status to %s" % (peer_id, torrent_id, status))
 306         t = torrent_db.getone(id=torrent_id)
 307         p = peer_db.getone(peer_id=peer_id, torrent_id=torrent_id)
 308         assert p
 309         logger.debug("state transition: %s to %s" % (p.status, status))
 310 
 311         if p.status==-1 and status==0:           # [1]
 312             update('LE', 1)
 313         elif p.status==-1 and status==1:         # [2]
 314             update('SE', 1)
 315         elif p.status==-1 and status==2:         # [3]
 316             pass
 317         elif p.status==0 and status==1:     # [5]
 318             update('LE', -1)
 319             update('SE', 1)
 320         elif p.status==0 and status==2:     # [6]
 321             update('LE', -1)
 322         elif p.status==1 and status==0:     # [7]
 323             update('LE', 1)
 324             update('SE', -1)
 325         elif p.status==1 and status==2:     # [9]
 326             update('SE', -1)
 327         elif p.status==2 and status==0:     # [10]
 328             update('LE', 1)
 329         elif p.status==2 and status==1:     # [11]
 330             update('SE', 1)
 331         elif p.status==status:               # [4, 8, 12]
 332             pass
 333             
 334         peer_db.update(
 335             where='peer_id = $peer_id and torrent_id = $torrent_id', 
 336             vars={'peer_id':peer_id, 'torrent_id':torrent_id},
 337             status=status)
 338 
 339     def _compact_peer_info(self, ip, port):
 340         import socket, struct
 341         return socket.inet_aton(ip) + struct.pack('>H', int(port))
 342    
 343     def is_valid_ipv4(ip):
 344         a = ip.split('.')
 345         if len(a) != 4:
 346             return False
 347         try:
 348             for x in a:
 349                 chr(int(x))
 350             return True
 351         except:
 352             return False
 353 
 354     def is_local_ip(ip):
 355         try:
 356             v = [int(x) for x in ip.split('.')]
 357             if v[0] == 10 or v[0] == 127 or v[:2] in ([192, 168], [169, 254]):
 358                 return 1
 359             if v[0] == 172 and v[1] >= 16 and v[1] <= 31:
 360                 return 1
 361         except ValueError:
 362             return 0
 363          
 364 
 365     # TODO: Need to figure out what this is for. 
 366     def _get_forwarded_ip(headers):
 367         if headers.has_key('http_x_forwarded_for'):
 368             header = headers['http_x_forwarded_for']
 369             try:
 370                 x,y = header.split(',')
 371             except:
 372                 return header
 373             if not is_local_ip(x):
 374                 return x
 375             return y
 376         if headers.has_key('http_client_ip'):
 377             return headers['http_client_ip']
 378         if headers.has_key('http_via'):
 379             x = http_via_filter.search(headers['http_via'])
 380             try:
 381                 return x.group(1)
 382             except:
 383                 pass
 384         if headers.has_key('http_from'):
 385             return headers['http_from']
 386         return None
 387 
 388     def get_forwarded_ip(headers):
 389         x = _get_forwarded_ip(headers)
 390         if x is None or not is_valid_ipv4(x) or is_local_ip(x):
 391             return None
 392         return x
 393 
 394 class scrape:
 395     def GET(self):
 396         params = web.input()
 397         web.header("Content-Type", "text/plain")
 398         web.header("Pragma", "no-cache")
 399 
 400         # Althougth the specification says we should return a list
 401         # of all torrents we're hosting, in order to save the bandwidth,
 402         # we only allow 'scrape' on one torrent every time. 
 403         # As the result, we expect the info_hash to be presented.
 404         if not hasattr(params, 'info_hash'):
 405             web.output( bencode({'failure reason:':
 406             "Full scrape function is not available with this tracker."}))
 407             logger.debug("scrape without info_hash is returning. ")
 408             return
 409                 
 410         # The scrape response is a two level dictionary with first level
 411         # only one key named files. The second level is a dictionary by itself,
 412         # with the hex encoded 20-bytes info_hash as the key
 413         # and torrent statistics info dict as value. 
 414         #
 415         # Note that
 416         # 1. we always have only one torrent to track.
 417         # 2. the info_hash here doesn't need to be quoted.
 418         # 3. the database key in 'torrents' table is 40-char encoded string
 419         #    of the original infohash 20-byte stream.
 420         # 4. we need to encode the info_hash before using it to look up
 421         #    the corresponding torrent in database.
 422         info_hash = unquote(params.info_hash)
 423         t = torrent_db.getone(info_hash=info_hash.encode('hex'))
 424         logger.debug("info_hash: %s" % info_hash.encode('hex'))
 425 
 426         if not t:
 427             web.output( bencode({'failure reason:':
 428             "Requested scrape is not authorized for use with this tracker."}))
 429             logger.debug("Can't find the info_hash in torrents table. Returning.")
 430             return
 431         else:
 432             torrent_id = t.id
 433 
 434         logger.debug("torrent id: %s" % torrent_id)
 435         torrent = { 'complete':   t.seeders,
 436                     'incomplete': t.leechers,
 437                     'downloaded': t.downloaded }
 438 
 439         logger.debug("returning info: %s" % torrent)
 440         web.output(bencode({'files':{info_hash:torrent}}))

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2021-05-11 08:51:49, 18.9 KB) [[attachment:lightTracker.py]]
  • [get | view] (2021-05-11 08:51:49, 12.2 KB) [[attachment:test_lightTracker.py]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.