#!/usr/bin/env python

# --------- Section: lightTracker Project Introduction ------------
# lightTracker is a BitTorrent tracker implementation using
# the web.py framework for request/response handling and MySQL
# as data store. 
#
# The official tracker is
# * twisted based so that it has a relatively steep learning curve.
# * lots of async request/responses which make the code hard to understand
# * implement local data store without using standard databases.
#
# lightTracker is an attempt to write a succint, clear and
# easy-to-understand tracker.
# * based on the popular, neat web.py framework.
# * all requests done in a sync manner. 
# * use MySQL as the local datastore so make scale out to
#   multiple boxes possible.
# * Instead of managing the cache by ourselves, we depend on
#   the database software to handle that for us.
#   
#
# In order to run the tracker, simply do:
#    >>python lightTracker.py
#
#
# ----------- Section: Code Structure ----------------------
# The web.py's 'urls' is a nice TOC, or sitemap, to understand
# the structure. This tracker implements only 'announce' and
# 'scrape' functions.
#
# I'd recommend you to follow these steps to understand the code:
# 1. understand the database schema. Only two tables, one foreign key.
# 2. read the unit test code to see how parameters are put together.
# 3. start with Section "Book keeping" and "Prepare Response Messages"
#
#
# ----------- Section: Road Map ----------------------------
# 1. Performance boost: the official tracker can serve 82 requests/sec
#    while lightTracker can only take 24 reqs/sec. One idea is to
#    use cache but the logic will get quite complicated if we want to
#    get the statistics right. 
#
# Alex Dong ( alex.dong at gmail.com)

import web
import const
from models import torrent_db, peer_db
from bencode import bencode, bdecode
from urllib import quote, unquote
from logger import logger

logger = logger('TRACKER')

def size_format(s):
    if (s < 1024):
        r = str(s) + 'B'
    elif (s < 1048576):
        r = str(int(s/1024)) + 'KiB'
    elif (s < 1073741824):
        r = str(int(s/1048576)) + 'MiB'
    elif (s < 1099511627776):
        r = str(int((s/1073741824.0)*100.0)/100.0) + 'GiB'
    else:
        r = str(int((s/1099511627776.0)*100.0)/100.0) + 'TiB'
    return r


class announce:
    def GET(self):
        params = web.input()
        web.header("Content-Type", "text/plain")
        web.header("Pragma", "no-cache")


        # ------------------- Section: Input Validation --------------------

        # Rules: http://wiki.theory.org/BitTorrentSpecification
        #
        # info_hash:    mandatory.  20-byte. 
        # peer_id:      mandatory.  20-byte. 
        # port:         mandatory.  integer
        #
        # compact:      optional.   '1' means compact peer list.
        # uploaded:     optional.   base ten of ascii. bytes uploaded
        # downloaded:   optional.   bytes downloaded.
        # left:         optional.   bytes left.
        # event:        optional.   once specified, must be one of 
        #                           'started', 'completed', 'stopped'
        # numwant:      optional:   integer >= 0
        #
        # ip:           optional    [ignore] dotted quad format or rfc3513. 
        # key:          optional:   [ignore] string
        # trackerid:    optional:   [ignore] string
        if  not hasattr(params, 'info_hash') or \
            not hasattr(params, 'peer_id')   or \
            not hasattr(params, 'port'):
            web.output( bencode({'failure reason:': "Mandatory fields are missing. "}))
            logger.info("At least one of ('info_hash', 'peer_id', 'port') is missing. " \
                + " request: (%s, %s, %s). returning." %  (hasattr(params, 'info_hash'), \
                hasattr(params, 'peer_id'), hasattr(params, 'port')))
            return
            
        if len(params.peer_id) != 20:
            logger.info("peer_id '%s' is less than 20." %(params.peer_id))
            web.output( bencode({'failure reason:': "Request.peer_id expects to be length 20."}))
            return

        # Need to encode the peer_id since some of it, like uTorrent, use hex digits instead
        # of pure string for the peer_id
        peer_id = unquote(params.peer_id).encode('hex')
        logger.debug("Processing request from peer: %s" % peer_id)
        
        if not params.port.isdigit():
            web.output( bencode({'failure reason:': "Request.port is invalid"}))
            logger.info("invalid port received: %s" % params.port)
            return

        port = int(params.port)
        if port < 0 or port > 65535:
            web.output( bencode({'failure reason:': "Request.port is invalid"}))
            logger.info("invalid port number: %d." % port)
            return

        if hasattr(params, 'event') and params.event not in ['started', 'completed', 'stopped']:
            web.output( bencode({'failure reason:': "Request.event should be started, completed or stopped."}))
            logger.info("invalid event: '%s'." % params.event)
            return

        # ----------- Section: Input Normalization -------------------
        #
        # This section we're normalizing the parameters by
        # 1. setting default values.
        # 2. checking input parameter types.
        # 3. replace incorrect parameters with default values.
        # 4. detect parameter conflictions and try to resolve them.
        if not hasattr(params, 'numwant'):
            params.numwant = 25
        elif not params.numwant.isdigit():
            web.output( bencode({'failure reason:': "Request.numwant should be integer. "}))
            logger.info("invalid numwant: '%s'." % params.numwant)
            return            
        else:
            params.numwant = int(params.numwant)
        # Before we start looking through the peers table
        # and return the peer_list, we have to decide how
        # many peers we want to return.
        # Do check out the "Implementor's Note" in
        # http://wiki.theory.org/BitTorrentSpecification
        numwant = min(params.numwant, 55)        

        if not hasattr(params, 'left'): 
            logger.info("'left' not present, setting it to 0")
            params.left = 0
        elif not params.left.isdigit():
            web.output( bencode({'failure reason:': "Request.left should be ten-based ascii"}))
            logger.info("invalid left bytes: %s." % params.left)
            return
        else:
            params.left = int(params.left)
        logger.debug("params.left = %s" % params.left)
    
        # 'complete' event can't still have stuff to download ('left'>0)
        if hasattr(params, 'event') and params.event == 'completed' and params.left > 0:
            web.output( bencode({'failure reason:': "Inconsistent completed and left parameters."}))
            logger.info("'complete' event conflicts with params.left (%s)" % params.left)
            return
        logger.debug("params.event = %s" % (hasattr(params, 'event') and params.event or None))

        # For personal and corporate trackers, you have the choice to only track torrents you've created. 
        info_hash = unquote(params.info_hash).encode('hex')
        t = torrent_db.getone(info_hash=info_hash)
        if not t:
            web.output( bencode({'failure reason:': "Requested download is not authorized for use with this tracker."}))
            logger.debug("can not find torrent with info_hash: %s." % info_hash)
            return
        torrent_id = t.id

            
        # Update 'peers' table with this peer's information. 
        # Note that the 'peers' table's primary key is composited
        # by the 'peer_id' and 'torrent_id' columns.

        # TODO: need to make sure our support of IP_forward, NAT, etc.
        ip = web.ctx.ip
        if not peer_db.has(peer_id=peer_id, torrent_id=torrent_id):
            peer_db.insert(peer_id=peer_id, torrent_id=torrent_id, ip=ip, port=port, status=const.PEER_UNINITIALIZED)
            logger.debug("peer inserted into 'peers' table.")
        else:
            peer_db.update(
                    where='peer_id = "$peer_id" and torrent_id = torrent_id',
                    vars={'peer_id':peer_id, 'torrent_id':torrent_id},
                    ip=ip, port=port, torrent_id=torrent_id)


        # --------------------- Section: Book Keeping ----------------------
        # Update the peer's status according to the event we received.
        logger.debug("Updating book keeping. event=%s, left=%s" % (hasattr(params, 'event') and params.event or None, params.left))
        if not hasattr(params, 'event') or params.event == 'started':
            if params.left > 0:
                logger.debug("params.left>0. set peer status.to 'incomplete'")
                self._update_peer_status(peer_id, torrent_id, const.PEER_INCOMPLETE)
            else:
                logger.debug("params.left==0. set peer status to 'complete'")
                self._update_peer_status(peer_id, torrent_id, const.PEER_COMPLETE)
        elif params.event == 'completed':
            self._update_peer_status(peer_id, torrent_id, const.PEER_COMPLETE)
        else:
            # params.event == 'stopped':
            self._update_peer_status(peer_id, torrent_id, const.PEER_STOPPED)
            
               
        # ---------------Section: Prepare Response Message ------------------
        # Here the logic is quite simple. Just go through the 'peers' table
        # to pick out the peers for the specified  torrent_id and peer_id pair.
        # According to the spec, put everything into hash_table, bencode it
        # and return.

        # Retrieve the torrent detail info again since the _update_peer_status
        # might have modified the statistics info. 
        t = torrent_db.getone(info_hash=info_hash)
        assert t
        data = {}
        data['complete'] = t.seeders
        data['incomplete'] = t.leechers
        data['interval'] = 30 * 60
        data['min_internal'] = 30 * 60
        logger.debug("torrent has %s seeders and %s leechers." % (t.seeders, t.leechers))

        
        # Return the list of peers who're associated with the
        # selected torrent. 
        #
        # TODO: we might want to implement something to optimize the peers
        #       Based on speed? geography? uptime? existing peers?
        #       One premature optimization idea is to return complete peers 
        #       first. The problem with this approach is that complete peers
        #       might be less stable than incomplete peers, who are actively
        #       downloading or participating.
        # TODO: shall we maintain the peer list by 'ping' them
        #       to ensure 'signal of life' and NAT traversal still working?
        # TODO: Clean up zombie peers who didn't signal event "stopped"
        #       before they die.
        # TODO: we should not delete any peer_id from 'peers' table. 
        peers = web.query('select * from peers where torrent_id = $torrent_id and peer_id != "$peer_id" limit $numwant', 
                        vars={"torrent_id":torrent_id, "peer_id":params.peer_id, "numwant":numwant})
        logger.debug("returning peer list: %s" % ', '.join(["%s:%s"%(p.ip, p.port) for p in peers]))    
        if not hasattr(params, 'compact') or params.compact == '1':
            peerlist = ''
            for peer in peers:
                peerdata = self._compact_peer_info(peer.ip, peer.port)
                peerlist += peerdata
        else:
            peerlist = []
            for peer in peers:
                peerdata = {'peer_id':peer.peer_id,
                            'ip':peer.ip,
                            'port':peer.port}
                peerlist.append(peerdata)
        
        data['peers'] = peerlist

        # Here we should use 'web.output' instead of 'print' because
        # 'print' will append a '\n' at the end of the response message,
        # which corrupts the bencode.bdecode() process.
        web.output( bencode(data))

    def _update_peer_status(self, peer_id, torrent_id, status):
        """
        Update Peer Status and Torrent's seeder/leecher statistics info.

        The logic of upadting seeder/leecher counts is a little bit complicated. 
        We need to know the peer's last known status in order to tell how should
        we update the status. Here is a matrix where 
        * '0' stands for 'incomplete',
        * '1' for 'complete',
        * '2' for 'stopped'.
        * '-1' for 'new inserted' found 
        * 'LE' for leechers
        * 'SE' for seeders and 
        * 'N' for no action.

        ||  Last State      ||  Current State   ||     Result       ||    idx   ||
        ||     -1           ||      0           ||      LE++        ||    (1)   || 
        ||     -1           ||      1           ||      SE++        ||    (2)   ||
        ||     -1           ||      2           ||       N          ||    (3)   ||
        ||     0            ||      0           ||       N          ||    (4)   ||
        ||     0            ||      1           ||  LE--, SE++      ||    (5)   ||
        ||     0            ||      2           ||      LE--        ||    (6)   ||
        ||     1            ||      0           ||  LE++, SE--      ||    (7)   ||
        ||     1            ||      1           ||       N          ||    (8)   ||
        ||     1            ||      2           ||      SE--        ||    (9)   ||
        ||     2            ||      0           ||      LE++        ||    (10)  ||
        ||     2            ||      1           ||      SE++        ||    (11)  ||
        ||     2            ||      2           ||       N          ||    (12)  ||
        """
        def update(field, dir):
            if field=='LE':
                logger.debug("Update torrent (%s) leecher count %d with %d" % (t.id, t.leechers, dir))
                torrent_db.update( 'id=$id', {'id':t.id}, leechers=t.leechers+dir)
            else:
                logger.debug("Update torrent (%s) seeder count %d with %d" % (t.id, t.seeders, dir))
                torrent_db.update( 'id=$id', {'id':t.id}, seeders=t.seeders+dir)

        logger.debug("set peer_id: %s, torrent_id: %s status to %s" % (peer_id, torrent_id, status))
        t = torrent_db.getone(id=torrent_id)
        p = peer_db.getone(peer_id=peer_id, torrent_id=torrent_id)
        assert p
        logger.debug("state transition: %s to %s" % (p.status, status))

        if p.status==-1 and status==0:           # [1]
            update('LE', 1)
        elif p.status==-1 and status==1:         # [2]
            update('SE', 1)
        elif p.status==-1 and status==2:         # [3]
            pass
        elif p.status==0 and status==1:     # [5]
            update('LE', -1)
            update('SE', 1)
        elif p.status==0 and status==2:     # [6]
            update('LE', -1)
        elif p.status==1 and status==0:     # [7]
            update('LE', 1)
            update('SE', -1)
        elif p.status==1 and status==2:     # [9]
            update('SE', -1)
        elif p.status==2 and status==0:     # [10]
            update('LE', 1)
        elif p.status==2 and status==1:     # [11]
            update('SE', 1)
        elif p.status==status:               # [4, 8, 12]
            pass
            
        peer_db.update(
            where='peer_id = $peer_id and torrent_id = $torrent_id', 
            vars={'peer_id':peer_id, 'torrent_id':torrent_id},
            status=status)

    def _compact_peer_info(self, ip, port):
        import socket, struct
        return socket.inet_aton(ip) + struct.pack('>H', int(port))
   
    def is_valid_ipv4(ip):
        a = ip.split('.')
        if len(a) != 4:
            return False
        try:
            for x in a:
                chr(int(x))
            return True
        except:
            return False

    def is_local_ip(ip):
        try:
            v = [int(x) for x in ip.split('.')]
            if v[0] == 10 or v[0] == 127 or v[:2] in ([192, 168], [169, 254]):
                return 1
            if v[0] == 172 and v[1] >= 16 and v[1] <= 31:
                return 1
        except ValueError:
            return 0
         

    # TODO: Need to figure out what this is for. 
    def _get_forwarded_ip(headers):
        if headers.has_key('http_x_forwarded_for'):
            header = headers['http_x_forwarded_for']
            try:
                x,y = header.split(',')
            except:
                return header
            if not is_local_ip(x):
                return x
            return y
        if headers.has_key('http_client_ip'):
            return headers['http_client_ip']
        if headers.has_key('http_via'):
            x = http_via_filter.search(headers['http_via'])
            try:
                return x.group(1)
            except:
                pass
        if headers.has_key('http_from'):
            return headers['http_from']
        return None

    def get_forwarded_ip(headers):
        x = _get_forwarded_ip(headers)
        if x is None or not is_valid_ipv4(x) or is_local_ip(x):
            return None
        return x

class scrape:
    def GET(self):
        params = web.input()
        web.header("Content-Type", "text/plain")
        web.header("Pragma", "no-cache")

        # Althougth the specification says we should return a list
        # of all torrents we're hosting, in order to save the bandwidth,
        # we only allow 'scrape' on one torrent every time. 
        # As the result, we expect the info_hash to be presented.
        if not hasattr(params, 'info_hash'):
            web.output( bencode({'failure reason:':
            "Full scrape function is not available with this tracker."}))
            logger.debug("scrape without info_hash is returning. ")
            return
                
        # The scrape response is a two level dictionary with first level
        # only one key named files. The second level is a dictionary by itself,
        # with the hex encoded 20-bytes info_hash as the key
        # and torrent statistics info dict as value. 
        #
        # Note that
        # 1. we always have only one torrent to track.
        # 2. the info_hash here doesn't need to be quoted.
        # 3. the database key in 'torrents' table is 40-char encoded string
        #    of the original infohash 20-byte stream.
        # 4. we need to encode the info_hash before using it to look up
        #    the corresponding torrent in database.
        info_hash = unquote(params.info_hash)
        t = torrent_db.getone(info_hash=info_hash.encode('hex'))
        logger.debug("info_hash: %s" % info_hash.encode('hex'))

        if not t:
            web.output( bencode({'failure reason:':
            "Requested scrape is not authorized for use with this tracker."}))
            logger.debug("Can't find the info_hash in torrents table. Returning.")
            return
        else:
            torrent_id = t.id

        logger.debug("torrent id: %s" % torrent_id)
        torrent = { 'complete':   t.seeders,
                    'incomplete': t.leechers,
                    'downloaded': t.downloaded }

        logger.debug("returning info: %s" % torrent)
        web.output(bencode({'files':{info_hash:torrent}}))
