| Trees | Indices | Help |
|
|---|
|
|
1 """ Metafile Support.
2
3 Copyright (c) 2009, 2010, 2011 The PyroScope Project <pyroscope.project@gmail.com>
4 """
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 from __future__ import with_statement
19
20 import re
21 import sys
22 import time
23 import stat
24 import math
25 import errno
26 import pprint
27 import fnmatch
28 import hashlib
29 import urlparse
30 from contextlib import closing
31
32 from pyrobase import bencode
33 from pyrobase.parts import Bunch
34 from pyrocore import config, error
35 from pyrocore.util import os, fmt, pymagic
36
37
38 # Allowed characters in a metafile filename or path
39 ALLOWED_NAME = re.compile(r"^[^/\\.~][^/\\]*$")
40
41 # Character sequences considered secret (roughly, any path part or query parameter
42 # that looks like an alphanumeric sequence or url-safe base64 string)
43 PASSKEY_RE = re.compile(r"(?<=[/=])[-_0-9a-zA-Z]{5,64}={0,3}(?=[/&]|$)")
44
45 # Non-secret exemptions
46 PASSKEY_OK = ("announce", "TrackerServlet",)
47
48 # List of all standard keys in a metafile
49 METAFILE_STD_KEYS = [i.split('.') for i in (
50 "announce",
51 "comment",
52 "created by",
53 "creation date",
54 "encoding",
55 "info",
56 "info.length",
57 "info.name",
58 "info.piece length",
59 "info.pieces",
60 "info.private",
61 "info.files",
62 "info.files.length",
63 "info.files.path",
64 )]
65
66 del i
67
68
70 """ Return a progress indicator for consoles if
71 stdout is a tty.
72 """
73 def progress(totalhashed, totalsize):
74 msg = " " * 30
75 if totalhashed < totalsize:
76 msg = "%5.1f%% complete" % (totalhashed * 100.0 / totalsize)
77 sys.stdout.write(msg + " \r")
78 sys.stdout.flush()
79
80 try:
81 return progress if sys.stdout.isatty() else None
82 except AttributeError:
83 return None
84
85
87 """ Mask any passkeys (hex sequences) in an announce URL.
88 """
89 return PASSKEY_RE.sub(
90 lambda m: m.group() if m.group() in PASSKEY_OK else "*" * len(m.group()),
91 announce_url)
92
93
104
105
107 """ Validate info dict.
108
109 Raise ValueError if validation fails.
110 """
111 if not isinstance(info, dict):
112 raise ValueError("bad metainfo - not a dictionary")
113
114 pieces = info.get("pieces")
115 if not isinstance(pieces, basestring) or len(pieces) % 20 != 0:
116 raise ValueError("bad metainfo - bad pieces key")
117
118 piece_size = info.get("piece length")
119 if not isinstance(piece_size, (int, long)) or piece_size <= 0:
120 raise ValueError("bad metainfo - illegal piece length")
121
122 name = info.get("name")
123 if not isinstance(name, basestring):
124 raise ValueError("bad metainfo - bad name (type is %r)" % type(name).__name__)
125 if not ALLOWED_NAME.match(name):
126 raise ValueError("name %s disallowed for security reasons" % name)
127
128 if info.has_key("files") == info.has_key("length"):
129 raise ValueError("single/multiple file mix")
130
131 if info.has_key("length"):
132 length = info.get("length")
133 if not isinstance(length, (int, long)) or length < 0:
134 raise ValueError("bad metainfo - bad length")
135 else:
136 files = info.get("files")
137 if not isinstance(files, (list, tuple)):
138 raise ValueError("bad metainfo - bad file list")
139
140 for item in files:
141 if not isinstance(item, dict):
142 raise ValueError("bad metainfo - bad file value")
143
144 length = item.get("length")
145 if not isinstance(length, (int, long)) or length < 0:
146 raise ValueError("bad metainfo - bad length")
147
148 path = item.get("path")
149 if not isinstance(path, (list, tuple)) or not path:
150 raise ValueError("bad metainfo - bad path")
151
152 for part in path:
153 if not isinstance(part, basestring):
154 raise ValueError("bad metainfo - bad path dir")
155 if not ALLOWED_NAME.match(part):
156 raise ValueError("path %s disallowed for security reasons" % part)
157
158 file_paths = [os.sep.join(item["path"]) for item in files]
159 if len(set(file_paths)) != len(file_paths):
160 raise ValueError("bad metainfo - duplicate path")
161
162 return info
163
164
166 """ Validate meta dict.
167
168 Raise ValueError if validation fails.
169 """
170 if not isinstance(meta, dict):
171 raise ValueError("bad metadata - not a dictionary")
172 if not isinstance(meta.get("announce"), basestring):
173 raise ValueError("bad announce URL - not a string")
174 check_info(meta.get("info"))
175
176 return meta
177
178
180 """ Clean meta dict. Optionally log changes using the given logger.
181
182 @param logger: If given, a callable accepting a string message.
183 @return: Set of keys removed from C{meta}.
184 """
185 modified = set()
186
187 for key in meta.keys():
188 if [key] not in METAFILE_STD_KEYS:
189 if logger:
190 logger("Removing key %r..." % (key,))
191 del meta[key]
192 modified.add(key)
193
194 if including_info:
195 for key in meta["info"].keys():
196 if ["info", key] not in METAFILE_STD_KEYS:
197 if logger:
198 logger("Removing key %r..." % ("info." + key,))
199 del meta["info"][key]
200 modified.add("info." + key)
201
202 for idx, entry in enumerate(meta["info"].get("files", [])):
203 for key in entry.keys():
204 if ["info", "files", key] not in METAFILE_STD_KEYS:
205 if logger:
206 logger("Removing key %r from file #%d..." % (key, idx + 1))
207 del entry[key]
208 modified.add("info.files." + key)
209
210 return modified
211
212
214 """ Try to fix common problems, especially transcode non-standard string encodings.
215 """
216 def sane_encoding(text):
217 "Transcoding helper."
218 for encoding in ('utf-8', meta.get('encoding', None), 'cp1252'):
219 if encoding:
220 try:
221 return text.decode(encoding).encode("utf-8")
222 except UnicodeError:
223 continue
224 else:
225 # Broken beyond anything reasonable
226 return unicode(text, 'utf-8', 'replace').replace(u'\ufffd', '_').encode("utf-8")
227
228 # Go through all string fields and check them
229 for field in ("comment", "created by"):
230 if field in meta:
231 meta[field] = sane_encoding(meta[field])
232
233 meta["info"]["name"] = sane_encoding(meta["info"]["name"])
234
235 for entry in meta["info"].get("files", []):
236 entry["path"] = [sane_encoding(i) for i in entry["path"]]
237
238 return meta
239
240
242 """ Takes a list of C{key=value} strings and
243 assigns them to the given metafile.
244
245 If just a key name is given (no '='), the field is removed.
246 """
247 for assignment in assignments:
248 try:
249 if '=' in assignment:
250 field, val = assignment.split('=', 1)
251 else:
252 field, val = assignment, None
253
254 if val and val[0] in "+-" and val[1:].isdigit():
255 val = int(val, 10)
256
257 # TODO: Allow numerical indices, and "+" for append
258 namespace = meta
259 for key in field.split('.')[:-1]:
260 # Create missing dicts as we go...
261 namespace = namespace.setdefault(key, {})
262 except (KeyError, IndexError, TypeError, ValueError), exc:
263 raise error.UserError("Bad assignment %r (%s)!" % (assignment, exc))
264 else:
265 if val is None:
266 del namespace[field.split('.')[-1]]
267 else:
268 namespace[field.split('.')[-1]] = val
269
270 return meta
271
272
274 """ Add fast resume data to a metafile dict.
275 """
276 # Get list of files
277 files = meta["info"].get("files", None)
278 single = files is None
279 if single:
280 if os.path.isdir(datapath):
281 datapath = os.path.join(datapath, meta["info"]["name"])
282 files = [Bunch(
283 path=[os.path.abspath(datapath)],
284 length=meta["info"]["length"],
285 )]
286
287 # Prepare resume data
288 resume = meta.setdefault("libtorrent_resume", {})
289 resume["bitfield"] = len(meta["info"]["pieces"]) // 20
290 resume["files"] = []
291 piece_length = meta["info"]["piece length"]
292 offset = 0
293
294 for fileinfo in files:
295 # Get the path into the filesystem
296 filepath = os.sep.join(fileinfo["path"])
297 if not single:
298 filepath = os.path.join(datapath, filepath)
299
300 # Check file size
301 if os.path.getsize(filepath) != fileinfo["length"]:
302 raise OSError(errno.EINVAL, "File size mismatch for %r [is %d, expected %d]" % (
303 filepath, os.path.getsize(filepath), fileinfo["length"],
304 ))
305
306 # Add resume data for this file
307 resume["files"].append(dict(
308 priority=1,
309 mtime=int(os.path.getmtime(filepath)),
310 completed=(offset+fileinfo["length"]+piece_length-1) // piece_length
311 - offset // piece_length,
312 ))
313 offset += fileinfo["length"]
314
315 return meta
316
317
319 """ Return info hash as a string.
320 """
321 return hashlib.sha1(bencode.bencode(metadata['info'])).hexdigest().upper()
322
323
325 """ Calculate the size of a torrent based on parsed metadata.
326 """
327 info = metadata['info']
328
329 if info.has_key('length'):
330 # Single file
331 total_size = info['length']
332 else:
333 # Directory structure
334 total_size = sum([f['length'] for f in info['files']])
335
336 return total_size
337
338
340 """ A torrent metafile.
341 """
342
343 # Patterns of names to ignore
344 IGNORE_GLOB = [
345 "core", "CVS", ".*", "*~", "*.swp", "*.tmp", "*.bak",
346 "[Tt]humbs.db", "[Dd]esktop.ini", "ehthumbs_vista.db",
347 ]
348
349
351 """ Initialize metafile.
352 """
353 self.filename = filename
354 self.progress = None
355 self.datapath = datapath
356 self.ignore = self.IGNORE_GLOB[:]
357 self.LOG = pymagic.get_class_logger(self)
358
359
361 """ Get a valid datapath, else raise an exception.
362 """
363 if self._datapath is None:
364 raise OSError(errno.ENOENT, "You didn't provide any datapath for %r" % self.filename)
365
366 return self._datapath
367
369 """ Set a datapath.
370 """
371 if datapath:
372 self._datapath = datapath.rstrip(os.sep)
373 self._fifo = int(stat.S_ISFIFO(os.stat(self.datapath).st_mode))
374 else:
375 self._datapath = None
376 self._fifo = False
377
378 datapath = property(_get_datapath, _set_datapath)
379
380
382 """ Generate paths in "self.datapath".
383 """
384 # FIFO?
385 if self._fifo:
386 if self._fifo > 1:
387 raise RuntimeError("INTERNAL ERROR: FIFO read twice!")
388 self._fifo += 1
389
390 # Read paths relative to directory containing the FIFO
391 with closing(open(self.datapath, "r")) as fifo:
392 while True:
393 relpath = fifo.readline().rstrip('\n')
394 if not relpath: # EOF?
395 break
396 self.LOG.debug("Read relative path %r from FIFO..." % (relpath,))
397 yield os.path.join(os.path.dirname(self.datapath), relpath)
398
399 self.LOG.debug("FIFO %r closed!" % (self.datapath,))
400
401 # Directory?
402 elif os.path.isdir(self.datapath):
403 # Walk the directory tree
404 for dirpath, dirnames, filenames in os.walk(self.datapath): #, followlinks=True):
405 # Don't scan blacklisted directories
406 for bad in dirnames[:]:
407 if any(fnmatch.fnmatch(bad, pattern) for pattern in self.ignore):
408 dirnames.remove(bad)
409
410 # Yield all filenames that aren't blacklisted
411 for filename in filenames:
412 if not any(fnmatch.fnmatch(filename, pattern) for pattern in self.ignore):
413 #yield os.path.join(dirpath[len(self.datapath)+1:], filename)
414 yield os.path.join(dirpath, filename)
415
416 # Single file
417 else:
418 # Yield the filename
419 yield self.datapath
420
421
423 """ Get total size of "self.datapath".
424 """
425 return sum(os.path.getsize(filename)
426 for filename in self.walk()
427 )
428
429
431 """ Create info dict.
432 """
433 # These collect the file descriptions and piece hashes
434 file_list = []
435 pieces = []
436
437 # Initialize progress state
438 totalsize = -1 if self._fifo else self._calc_size()
439 totalhashed = 0
440
441 # Start a new piece
442 sha1 = hashlib.sha1()
443 done = 0
444
445 # Hash all files
446 for filename in walker:
447 # Assemble file info
448 filesize = os.path.getsize(filename)
449 filepath = filename[len(os.path.dirname(self.datapath) if self._fifo else self.datapath):].lstrip(os.sep)
450 file_list.append({
451 "length": filesize,
452 "path": filepath.replace(os.sep, '/').split('/'),
453 })
454 self.LOG.debug("Hashing %r, size %d..." % (filename, filesize))
455
456 # Open file and hash it
457 fileoffset = 0
458 handle = open(filename, "rb")
459 try:
460 while fileoffset < filesize:
461 # Read rest of piece or file, whatever is smaller
462 chunk = handle.read(min(filesize - fileoffset, piece_size - done))
463 sha1.update(chunk)
464 done += len(chunk)
465 fileoffset += len(chunk)
466 totalhashed += len(chunk)
467
468 # Piece is done
469 if done == piece_size:
470 pieces.append(sha1.digest())
471 if piece_callback:
472 piece_callback(filename, pieces[-1])
473
474 # Start a new piece
475 sha1 = hashlib.sha1()
476 done = 0
477
478 # Report progress
479 if progress:
480 progress(totalhashed, totalsize)
481 finally:
482 handle.close()
483
484 # Add hash of partial last piece
485 if done > 0:
486 pieces.append(sha1.digest())
487 if piece_callback:
488 piece_callback(filename, pieces[-1])
489
490 # Build the meta dict
491 metainfo = {
492 "pieces": "".join(pieces),
493 "piece length": piece_size,
494 "name": os.path.basename(self.datapath),
495 }
496
497 # Handle directory/FIFO vs. single file
498 if self._fifo or os.path.isdir(self.datapath):
499 metainfo["files"] = file_list
500 else:
501 metainfo["length"] = totalhashed
502
503 # Return validated info dict
504 return check_info(metainfo)
505
506
508 """ Create torrent dict.
509 """
510 # Calculate piece size
511 if self._fifo:
512 # TODO we need to add a (command line) param, probably for total data size
513 # for now, always 1MB
514 piece_size_exp = 20
515 else:
516 total_size = self._calc_size()
517 if total_size:
518 piece_size_exp = int(math.log(total_size) / math.log(2)) - 9
519 else:
520 piece_size_exp = 0
521
522 piece_size_exp = min(max(15, piece_size_exp), 24)
523 piece_size = 2 ** piece_size_exp
524
525 # Build info hash
526 info = self._make_info(piece_size, progress, self.walk() if self._fifo else sorted(self.walk()))
527
528 # Enforce unique hash per tracker
529 info["x_cross_seed"] = hashlib.md5(tracker_url).hexdigest()
530
531 # Set private flag
532 if private:
533 info["private"] = 1
534
535 # Freely chosen root name (default is basename of the data path)
536 if root_name:
537 info["name"] = root_name
538
539 # Torrent metadata
540 meta = {
541 "info": info,
542 "announce": tracker_url.strip(),
543 }
544
545 #XXX meta["encoding"] = "UTF-8"
546
547 # Return validated meta dict
548 return check_meta(meta)
549
550
551 - def create(self, datapath, tracker_urls, comment=None, root_name=None,
552 created_by=None, private=False, no_date=False, progress=None,
553 callback=None):
554 """ Create a metafile with the path given on object creation.
555 Returns the last metafile dict that was written (as an object, not bencoded).
556 """
557 if datapath:
558 self.datapath = datapath
559
560 try:
561 tracker_urls = ['' + tracker_urls]
562 except TypeError:
563 tracker_urls = list(tracker_urls)
564 multi_mode = len(tracker_urls) > 1
565
566 # TODO add optimization so the hashing happens only once for multiple URLs!
567 for tracker_url in tracker_urls:
568 # Lookup announce URLs from config file
569 try:
570 if urlparse.urlparse(tracker_url).scheme:
571 tracker_alias = urlparse.urlparse(tracker_url).netloc.split(':')[0].split('.')
572 tracker_alias = tracker_alias[-2 if len(tracker_alias) > 1 else 0]
573 else:
574 tracker_alias, tracker_url = config.lookup_announce_alias(tracker_url)
575 tracker_url = tracker_url[0]
576 except (KeyError, IndexError):
577 raise error.UserError("Bad tracker URL %r, or unknown alias!" % (tracker_url,))
578
579 # Determine metafile name
580 output_name = self.filename
581 if multi_mode:
582 # Add 2nd level of announce URL domain to metafile name
583 output_name = list(os.path.splitext(output_name))
584 try:
585 output_name[1:1] = '-' + tracker_alias
586 except (IndexError,):
587 self.LOG.error("Malformed announce URL %r, skipping!" % (tracker_url,))
588 continue
589 output_name = ''.join(output_name)
590
591 # Hash the data
592 self.LOG.info("Creating %r for %s %r..." % (
593 output_name, "filenames read from" if self._fifo else "data in", self.datapath,
594 ))
595 meta = self._make_meta(tracker_url, root_name, private, progress)
596
597 # Add optional fields
598 if comment:
599 meta["comment"] = comment
600 if created_by:
601 meta["created by"] = created_by
602 if not no_date:
603 meta["creation date"] = long(time.time())
604 if callback:
605 callback(meta)
606
607 # Write metafile to disk
608 self.LOG.debug("Writing %r..." % (output_name,))
609 bencode.bwrite(output_name, meta)
610
611 return meta
612
613
615 """ Check piece hashes of a metafile against the given datapath.
616 """
617 if datapath:
618 self.datapath = datapath
619
620 def check_piece(filename, piece):
621 "Callback for new piece"
622 if piece != metainfo["info"]["pieces"][check_piece.piece_index:check_piece.piece_index+20]:
623 self.LOG.warn("Piece #%d: Hashes differ in file %r" % (check_piece.piece_index//20, filename))
624 check_piece.piece_index += 20
625 check_piece.piece_index = 0
626
627 datameta = self._make_info(int(metainfo["info"]["piece length"]), progress,
628 [datapath] if "length" in metainfo["info"] else
629 (os.path.join(*([datapath] + i["path"])) for i in metainfo["info"]["files"]),
630 piece_callback=check_piece
631 )
632 return datameta["pieces"] == metainfo["info"]["pieces"]
633
634
636 """ List torrent info & contents. Returns a list of formatted lines.
637 """
638 # Assemble data
639 metainfo = sanitize(bencode.bread(self.filename))
640 announce = metainfo['announce']
641 info = metainfo['info']
642 info_hash = hashlib.sha1(bencode.bencode(info))
643
644 total_size = data_size(metainfo)
645 piece_length = info['piece length']
646 piece_number, last_piece_length = divmod(total_size, piece_length)
647
648 # Build result
649 result = [
650 "NAME %s" % (os.path.basename(self.filename)),
651 "SIZE %s (%i * %s + %s)" % (
652 fmt.human_size(total_size).strip(),
653 piece_number, fmt.human_size(piece_length).strip(),
654 fmt.human_size(last_piece_length).strip(),
655 ),
656 "HASH %s" % (info_hash.hexdigest().upper()),
657 "URL %s" % (mask_keys if masked else str)(announce),
658 "PRV %s" % ("YES (DHT/PEX disabled)" if info.get("private") else "NO (DHT/PEX enabled)"),
659 "TIME %s" % ("N/A" if "creation date" not in metainfo else
660 time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(metainfo["creation date"]))
661 ),
662 ]
663
664 for label, key in (("BY ", "created by"), ("REM ", "comment")):
665 if key in metainfo:
666 result.append("%s %s" % (label, metainfo.get(key, "N/A")))
667
668 result.extend([
669 "",
670 "FILE LISTING",
671 ])
672 if info.has_key('length'):
673 # Single file
674 result.append("%-69s%9s" % (
675 info['name'],
676 fmt.human_size(total_size),
677 ))
678 else:
679 # Directory structure
680 result.append("%s/" % info['name'])
681 oldpaths = [None] * 99
682 for entry in info['files']:
683 for idx, item in enumerate(entry['path'][:-1]):
684 if item != oldpaths[idx]:
685 result.append("%s%s/" % (' ' * (4*(idx+1)), item))
686 oldpaths[idx] = item
687 result.append("%-69s%9s" % (
688 ' ' * (4*len(entry['path'])) + entry['path'][-1],
689 fmt.human_size(entry['length']),
690 ))
691
692 return result
693
| Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Sun Jun 5 17:25:43 2011 | http://epydoc.sourceforge.net |