Attachment 'lightTracker.py'
Download 1 #!/usr/bin/env python
2
3 # --------- Section: lightTracker Project Introduction ------------
4 # lightTracker is a BitTorrent tracker implementation using
5 # the web.py framework for request/response handling and MySQL
6 # as data store.
7 #
8 # The official tracker is
9 # * twisted based so that it has a relatively steep learning curve.
10 # * lots of async request/responses which make the code hard to understand
11 # * implement local data store without using standard databases.
12 #
13 # lightTracker is an attempt to write a succint, clear and
14 # easy-to-understand tracker.
15 # * based on the popular, neat web.py framework.
16 # * all requests done in a sync manner.
17 # * use MySQL as the local datastore so make scale out to
18 # multiple boxes possible.
19 # * Instead of managing the cache by ourselves, we depend on
20 # the database software to handle that for us.
21 #
22 #
23 # In order to run the tracker, simply do:
24 # >>python lightTracker.py
25 #
26 #
27 # ----------- Section: Code Structure ----------------------
28 # The web.py's 'urls' is a nice TOC, or sitemap, to understand
29 # the structure. This tracker implements only 'announce' and
30 # 'scrape' functions.
31 #
32 # I'd recommend you to follow these steps to understand the code:
33 # 1. understand the database schema. Only two tables, one foreign key.
34 # 2. read the unit test code to see how parameters are put together.
35 # 3. start with Section "Book keeping" and "Prepare Response Messages"
36 #
37 #
38 # ----------- Section: Road Map ----------------------------
39 # 1. Performance boost: the official tracker can serve 82 requests/sec
40 # while lightTracker can only take 24 reqs/sec. One idea is to
41 # use cache but the logic will get quite complicated if we want to
42 # get the statistics right.
43 #
44 # Alex Dong ( alex.dong at gmail.com)
45
46 import web
47 import const
48 from models import torrent_db, peer_db
49 from bencode import bencode, bdecode
50 from urllib import quote, unquote
51 from logger import logger
52
53 logger = logger('TRACKER')
54
55 def size_format(s):
56 if (s < 1024):
57 r = str(s) + 'B'
58 elif (s < 1048576):
59 r = str(int(s/1024)) + 'KiB'
60 elif (s < 1073741824):
61 r = str(int(s/1048576)) + 'MiB'
62 elif (s < 1099511627776):
63 r = str(int((s/1073741824.0)*100.0)/100.0) + 'GiB'
64 else:
65 r = str(int((s/1099511627776.0)*100.0)/100.0) + 'TiB'
66 return r
67
68
69 class announce:
70 def GET(self):
71 params = web.input()
72 web.header("Content-Type", "text/plain")
73 web.header("Pragma", "no-cache")
74
75
76 # ------------------- Section: Input Validation --------------------
77
78 # Rules: http://wiki.theory.org/BitTorrentSpecification
79 #
80 # info_hash: mandatory. 20-byte.
81 # peer_id: mandatory. 20-byte.
82 # port: mandatory. integer
83 #
84 # compact: optional. '1' means compact peer list.
85 # uploaded: optional. base ten of ascii. bytes uploaded
86 # downloaded: optional. bytes downloaded.
87 # left: optional. bytes left.
88 # event: optional. once specified, must be one of
89 # 'started', 'completed', 'stopped'
90 # numwant: optional: integer >= 0
91 #
92 # ip: optional [ignore] dotted quad format or rfc3513.
93 # key: optional: [ignore] string
94 # trackerid: optional: [ignore] string
95 if not hasattr(params, 'info_hash') or \
96 not hasattr(params, 'peer_id') or \
97 not hasattr(params, 'port'):
98 web.output( bencode({'failure reason:': "Mandatory fields are missing. "}))
99 logger.info("At least one of ('info_hash', 'peer_id', 'port') is missing. " \
100 + " request: (%s, %s, %s). returning." % (hasattr(params, 'info_hash'), \
101 hasattr(params, 'peer_id'), hasattr(params, 'port')))
102 return
103
104 if len(params.peer_id) != 20:
105 logger.info("peer_id '%s' is less than 20." %(params.peer_id))
106 web.output( bencode({'failure reason:': "Request.peer_id expects to be length 20."}))
107 return
108
109 # Need to encode the peer_id since some of it, like uTorrent, use hex digits instead
110 # of pure string for the peer_id
111 peer_id = unquote(params.peer_id).encode('hex')
112 logger.debug("Processing request from peer: %s" % peer_id)
113
114 if not params.port.isdigit():
115 web.output( bencode({'failure reason:': "Request.port is invalid"}))
116 logger.info("invalid port received: %s" % params.port)
117 return
118
119 port = int(params.port)
120 if port < 0 or port > 65535:
121 web.output( bencode({'failure reason:': "Request.port is invalid"}))
122 logger.info("invalid port number: %d." % port)
123 return
124
125 if hasattr(params, 'event') and params.event not in ['started', 'completed', 'stopped']:
126 web.output( bencode({'failure reason:': "Request.event should be started, completed or stopped."}))
127 logger.info("invalid event: '%s'." % params.event)
128 return
129
130 # ----------- Section: Input Normalization -------------------
131 #
132 # This section we're normalizing the parameters by
133 # 1. setting default values.
134 # 2. checking input parameter types.
135 # 3. replace incorrect parameters with default values.
136 # 4. detect parameter conflictions and try to resolve them.
137 if not hasattr(params, 'numwant'):
138 params.numwant = 25
139 elif not params.numwant.isdigit():
140 web.output( bencode({'failure reason:': "Request.numwant should be integer. "}))
141 logger.info("invalid numwant: '%s'." % params.numwant)
142 return
143 else:
144 params.numwant = int(params.numwant)
145 # Before we start looking through the peers table
146 # and return the peer_list, we have to decide how
147 # many peers we want to return.
148 # Do check out the "Implementor's Note" in
149 # http://wiki.theory.org/BitTorrentSpecification
150 numwant = min(params.numwant, 55)
151
152 if not hasattr(params, 'left'):
153 logger.info("'left' not present, setting it to 0")
154 params.left = 0
155 elif not params.left.isdigit():
156 web.output( bencode({'failure reason:': "Request.left should be ten-based ascii"}))
157 logger.info("invalid left bytes: %s." % params.left)
158 return
159 else:
160 params.left = int(params.left)
161 logger.debug("params.left = %s" % params.left)
162
163 # 'complete' event can't still have stuff to download ('left'>0)
164 if hasattr(params, 'event') and params.event == 'completed' and params.left > 0:
165 web.output( bencode({'failure reason:': "Inconsistent completed and left parameters."}))
166 logger.info("'complete' event conflicts with params.left (%s)" % params.left)
167 return
168 logger.debug("params.event = %s" % (hasattr(params, 'event') and params.event or None))
169
170 # For personal and corporate trackers, you have the choice to only track torrents you've created.
171 info_hash = unquote(params.info_hash).encode('hex')
172 t = torrent_db.getone(info_hash=info_hash)
173 if not t:
174 web.output( bencode({'failure reason:': "Requested download is not authorized for use with this tracker."}))
175 logger.debug("can not find torrent with info_hash: %s." % info_hash)
176 return
177 torrent_id = t.id
178
179
180 # Update 'peers' table with this peer's information.
181 # Note that the 'peers' table's primary key is composited
182 # by the 'peer_id' and 'torrent_id' columns.
183
184 # TODO: need to make sure our support of IP_forward, NAT, etc.
185 ip = web.ctx.ip
186 if not peer_db.has(peer_id=peer_id, torrent_id=torrent_id):
187 peer_db.insert(peer_id=peer_id, torrent_id=torrent_id, ip=ip, port=port, status=const.PEER_UNINITIALIZED)
188 logger.debug("peer inserted into 'peers' table.")
189 else:
190 peer_db.update(
191 where='peer_id = "$peer_id" and torrent_id = torrent_id',
192 vars={'peer_id':peer_id, 'torrent_id':torrent_id},
193 ip=ip, port=port, torrent_id=torrent_id)
194
195
196 # --------------------- Section: Book Keeping ----------------------
197 # Update the peer's status according to the event we received.
198 logger.debug("Updating book keeping. event=%s, left=%s" % (hasattr(params, 'event') and params.event or None, params.left))
199 if not hasattr(params, 'event') or params.event == 'started':
200 if params.left > 0:
201 logger.debug("params.left>0. set peer status.to 'incomplete'")
202 self._update_peer_status(peer_id, torrent_id, const.PEER_INCOMPLETE)
203 else:
204 logger.debug("params.left==0. set peer status to 'complete'")
205 self._update_peer_status(peer_id, torrent_id, const.PEER_COMPLETE)
206 elif params.event == 'completed':
207 self._update_peer_status(peer_id, torrent_id, const.PEER_COMPLETE)
208 else:
209 # params.event == 'stopped':
210 self._update_peer_status(peer_id, torrent_id, const.PEER_STOPPED)
211
212
213 # ---------------Section: Prepare Response Message ------------------
214 # Here the logic is quite simple. Just go through the 'peers' table
215 # to pick out the peers for the specified torrent_id and peer_id pair.
216 # According to the spec, put everything into hash_table, bencode it
217 # and return.
218
219 # Retrieve the torrent detail info again since the _update_peer_status
220 # might have modified the statistics info.
221 t = torrent_db.getone(info_hash=info_hash)
222 assert t
223 data = {}
224 data['complete'] = t.seeders
225 data['incomplete'] = t.leechers
226 data['interval'] = 30 * 60
227 data['min_internal'] = 30 * 60
228 logger.debug("torrent has %s seeders and %s leechers." % (t.seeders, t.leechers))
229
230
231 # Return the list of peers who're associated with the
232 # selected torrent.
233 #
234 # TODO: we might want to implement something to optimize the peers
235 # Based on speed? geography? uptime? existing peers?
236 # One premature optimization idea is to return complete peers
237 # first. The problem with this approach is that complete peers
238 # might be less stable than incomplete peers, who are actively
239 # downloading or participating.
240 # TODO: shall we maintain the peer list by 'ping' them
241 # to ensure 'signal of life' and NAT traversal still working?
242 # TODO: Clean up zombie peers who didn't signal event "stopped"
243 # before they die.
244 # TODO: we should not delete any peer_id from 'peers' table.
245 peers = web.query('select * from peers where torrent_id = $torrent_id and peer_id != "$peer_id" limit $numwant',
246 vars={"torrent_id":torrent_id, "peer_id":params.peer_id, "numwant":numwant})
247 logger.debug("returning peer list: %s" % ', '.join(["%s:%s"%(p.ip, p.port) for p in peers]))
248 if not hasattr(params, 'compact') or params.compact == '1':
249 peerlist = ''
250 for peer in peers:
251 peerdata = self._compact_peer_info(peer.ip, peer.port)
252 peerlist += peerdata
253 else:
254 peerlist = []
255 for peer in peers:
256 peerdata = {'peer_id':peer.peer_id,
257 'ip':peer.ip,
258 'port':peer.port}
259 peerlist.append(peerdata)
260
261 data['peers'] = peerlist
262
263 # Here we should use 'web.output' instead of 'print' because
264 # 'print' will append a '\n' at the end of the response message,
265 # which corrupts the bencode.bdecode() process.
266 web.output( bencode(data))
267
268 def _update_peer_status(self, peer_id, torrent_id, status):
269 """
270 Update Peer Status and Torrent's seeder/leecher statistics info.
271
272 The logic of upadting seeder/leecher counts is a little bit complicated.
273 We need to know the peer's last known status in order to tell how should
274 we update the status. Here is a matrix where
275 * '0' stands for 'incomplete',
276 * '1' for 'complete',
277 * '2' for 'stopped'.
278 * '-1' for 'new inserted' found
279 * 'LE' for leechers
280 * 'SE' for seeders and
281 * 'N' for no action.
282
283 || Last State || Current State || Result || idx ||
284 || -1 || 0 || LE++ || (1) ||
285 || -1 || 1 || SE++ || (2) ||
286 || -1 || 2 || N || (3) ||
287 || 0 || 0 || N || (4) ||
288 || 0 || 1 || LE--, SE++ || (5) ||
289 || 0 || 2 || LE-- || (6) ||
290 || 1 || 0 || LE++, SE-- || (7) ||
291 || 1 || 1 || N || (8) ||
292 || 1 || 2 || SE-- || (9) ||
293 || 2 || 0 || LE++ || (10) ||
294 || 2 || 1 || SE++ || (11) ||
295 || 2 || 2 || N || (12) ||
296 """
297 def update(field, dir):
298 if field=='LE':
299 logger.debug("Update torrent (%s) leecher count %d with %d" % (t.id, t.leechers, dir))
300 torrent_db.update( 'id=$id', {'id':t.id}, leechers=t.leechers+dir)
301 else:
302 logger.debug("Update torrent (%s) seeder count %d with %d" % (t.id, t.seeders, dir))
303 torrent_db.update( 'id=$id', {'id':t.id}, seeders=t.seeders+dir)
304
305 logger.debug("set peer_id: %s, torrent_id: %s status to %s" % (peer_id, torrent_id, status))
306 t = torrent_db.getone(id=torrent_id)
307 p = peer_db.getone(peer_id=peer_id, torrent_id=torrent_id)
308 assert p
309 logger.debug("state transition: %s to %s" % (p.status, status))
310
311 if p.status==-1 and status==0: # [1]
312 update('LE', 1)
313 elif p.status==-1 and status==1: # [2]
314 update('SE', 1)
315 elif p.status==-1 and status==2: # [3]
316 pass
317 elif p.status==0 and status==1: # [5]
318 update('LE', -1)
319 update('SE', 1)
320 elif p.status==0 and status==2: # [6]
321 update('LE', -1)
322 elif p.status==1 and status==0: # [7]
323 update('LE', 1)
324 update('SE', -1)
325 elif p.status==1 and status==2: # [9]
326 update('SE', -1)
327 elif p.status==2 and status==0: # [10]
328 update('LE', 1)
329 elif p.status==2 and status==1: # [11]
330 update('SE', 1)
331 elif p.status==status: # [4, 8, 12]
332 pass
333
334 peer_db.update(
335 where='peer_id = $peer_id and torrent_id = $torrent_id',
336 vars={'peer_id':peer_id, 'torrent_id':torrent_id},
337 status=status)
338
339 def _compact_peer_info(self, ip, port):
340 import socket, struct
341 return socket.inet_aton(ip) + struct.pack('>H', int(port))
342
343 def is_valid_ipv4(ip):
344 a = ip.split('.')
345 if len(a) != 4:
346 return False
347 try:
348 for x in a:
349 chr(int(x))
350 return True
351 except:
352 return False
353
354 def is_local_ip(ip):
355 try:
356 v = [int(x) for x in ip.split('.')]
357 if v[0] == 10 or v[0] == 127 or v[:2] in ([192, 168], [169, 254]):
358 return 1
359 if v[0] == 172 and v[1] >= 16 and v[1] <= 31:
360 return 1
361 except ValueError:
362 return 0
363
364
365 # TODO: Need to figure out what this is for.
366 def _get_forwarded_ip(headers):
367 if headers.has_key('http_x_forwarded_for'):
368 header = headers['http_x_forwarded_for']
369 try:
370 x,y = header.split(',')
371 except:
372 return header
373 if not is_local_ip(x):
374 return x
375 return y
376 if headers.has_key('http_client_ip'):
377 return headers['http_client_ip']
378 if headers.has_key('http_via'):
379 x = http_via_filter.search(headers['http_via'])
380 try:
381 return x.group(1)
382 except:
383 pass
384 if headers.has_key('http_from'):
385 return headers['http_from']
386 return None
387
388 def get_forwarded_ip(headers):
389 x = _get_forwarded_ip(headers)
390 if x is None or not is_valid_ipv4(x) or is_local_ip(x):
391 return None
392 return x
393
394 class scrape:
395 def GET(self):
396 params = web.input()
397 web.header("Content-Type", "text/plain")
398 web.header("Pragma", "no-cache")
399
400 # Althougth the specification says we should return a list
401 # of all torrents we're hosting, in order to save the bandwidth,
402 # we only allow 'scrape' on one torrent every time.
403 # As the result, we expect the info_hash to be presented.
404 if not hasattr(params, 'info_hash'):
405 web.output( bencode({'failure reason:':
406 "Full scrape function is not available with this tracker."}))
407 logger.debug("scrape without info_hash is returning. ")
408 return
409
410 # The scrape response is a two level dictionary with first level
411 # only one key named files. The second level is a dictionary by itself,
412 # with the hex encoded 20-bytes info_hash as the key
413 # and torrent statistics info dict as value.
414 #
415 # Note that
416 # 1. we always have only one torrent to track.
417 # 2. the info_hash here doesn't need to be quoted.
418 # 3. the database key in 'torrents' table is 40-char encoded string
419 # of the original infohash 20-byte stream.
420 # 4. we need to encode the info_hash before using it to look up
421 # the corresponding torrent in database.
422 info_hash = unquote(params.info_hash)
423 t = torrent_db.getone(info_hash=info_hash.encode('hex'))
424 logger.debug("info_hash: %s" % info_hash.encode('hex'))
425
426 if not t:
427 web.output( bencode({'failure reason:':
428 "Requested scrape is not authorized for use with this tracker."}))
429 logger.debug("Can't find the info_hash in torrents table. Returning.")
430 return
431 else:
432 torrent_id = t.id
433
434 logger.debug("torrent id: %s" % torrent_id)
435 torrent = { 'complete': t.seeders,
436 'incomplete': t.leechers,
437 'downloaded': t.downloaded }
438
439 logger.debug("returning info: %s" % torrent)
440 web.output(bencode({'files':{info_hash:torrent}}))
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.