Python边学边用--BT客户端实现之

发布时间:2019-08-26 08:42:18编辑:auto阅读(1962)

    BitTorrent文件使用bencode编码,其中包括了4种数据类型:

    'd' 开头表示是dict类型,'e'表示结束

    'l' (小写字母L)开头表示是list类型,'e'表示结束

    'i'开头表示是integer类型,'e'表示结束,可以表示负数

    以数字开头表示string类型,数字为string长度,长度与string内容以':'分割

    默认所有text类型的属性为utf-8编码,但是大多数BitTorrent包含codepage 和 encoding属性,指定了text的编码格式

    BitTorrent的标准参见:http://www.bittorrent.org/beps/bep_0003.html

    以下是自己写的Python实现,初学Python,代码写起来还都是C/C++风格,慢慢改进吧。

     

     torrent_file.py
    复制代码
    import os
    from datetime import tzinfo
    from datetime import datetime
    
    import bcodec
    
    _READ_MAX_LEN = -1
    
    class BTFormatError(BaseException):
        pass
        
    class TorrentFile(object):
        
        __metainfo = {}
        __file_name = ''
        
        def read_file(self, filename):
            
            torrent_file = open(filename, 'rb')
            data = torrent_file.read(_READ_MAX_LEN)
            torrent_file.close()
            
            data = list(data)
            metainfo = bcodec.bdcode(data)
            if metainfo and type(metainfo) == type({}):
                self.__file_name = filename
                self.__metainfo = metainfo
            else:
                raise BTFormatError()
               
        def __is_singlefile(self):
            return 'length' in self.__metainfo.keys()
        
        def __decode_text(self, text):
            encoding = 'utf-8'
            resultstr = ''
            if self.get_encoding():
                encoding = self.get_encoding()
            elif self.get_codepage():
                encoding = 'cp' + str(self.get_codepage())
            if text:
                try:
                    resultstr = text.decode(encoding=encoding)
                except ValueError:
                    return text
            else:
                return None
            return resultstr
        
        def __get_meta_top(self, key):
            if key in self.__metainfo.keys():
                return self.__metainfo[key]
            else:
                return None
        def __get_meta_info(self,key):
            meta_info = self.__get_meta_top('info')
            if meta_info and key in meta_info.keys():
                    return meta_info[key]
            return None
        
        def get_codepage(self):
            return self.__get_meta_top('codepage')
        def get_encoding(self):
            return self.__get_meta_top('encoding')
        
        def get_announces(self):
            announces = []
            ann = self.__get_meta_top('announce')
            if ann:
                ann_list = []
                ann_list.append(ann)
                announces.append(ann_list)
            announces.append(self.__get_meta_top('announce-list'))
            return announces
        
        def get_publisher(self):
            return self.__decode_text(self.__get_meta_top('publisher'))
        def get_publisher_url(self):
            return self.__decode_text(self.__get_meta_top('publisher-url'))
        
        def get_creater(self):
            return self.__decode_text(self.__get_meta_top('created by'))
        def get_creation_date(self):
            utc_date = self.__get_meta_top('creation date')
            if utc_date is None:
                return utc_date
            creationdate = datetime.utcfromtimestamp(utc_date)
            return creationdate
        def get_comment(self):
            return self.__get_meta_top('comment')
              
        def get_nodes(self):
            return self.__get_meta_top('nodes')
        
        def get_piece_length(self):
            return self.__get_meta_info('piece length')
        
        def get_files(self):
            
            files = []
            pieces = self.__get_meta_info('pieces')
            name = self.__decode_text(self.__get_meta_info('name'))
            piece_length = self.get_piece_length()
            
            if not pieces or not name:
                return files
            
            if self.__is_singlefile():
                file_name = name
                file_length = self.__get_meta_info('length')
                if not file_length:
                    return files
                
                pieces_num = file_length/piece_length
                if file_length % piece_length:
                    pieces_num = int(pieces_num) + 1
                if 20*pieces_num > len(pieces):
                    return  files
                               
                file_pieces = []
                i = 0
                pn = 0
                while pn < pieces_num:
                    file_pieces.append(pieces[i:i+20])
                    i += 20
                    pn += 1
                
                files.appen({'name':[file_name], 'length':file_length, 'peaces':file_pieces})
                return files
            
    
            folder = name
            meta_files = self.__get_meta_info('files')
            if not meta_files:
                return files
            
            total_length = 0
            for one_file in self.__get_meta_info('files'):
                
                file_info = {}
                path_list = []
                path_list.append(folder)
                            
                if 'path' not in one_file.keys():
                    break
                for path in one_file['path']:
                    path_list.append(self.__decode_text(path))
                file_info['name'] = path_list
                
                if 'length' not in one_file.keys():
                    break
                
                file_info['length'] =  one_file['length']
                
                piece_index = int(total_length / piece_length)
                total_length += one_file['length']
                pieces_num = int(total_length / piece_length) - piece_index
                pieces_num = int(file_info['length']/piece_length)
                
                if total_length % piece_length:
                    pieces_num += 1
                
               # print  (piece_index+pieces_num)*20, len(pieces),pieces_num,file_info['length'], self.get_piece_length()
                
                if (piece_index+pieces_num)*20 > len(pieces):
                    break
                
                file_info['pieces'] = []
                
                pn = 0
                while pn < pieces_num:
                    file_info['pieces'].append(pieces[piece_index*20:piece_index*20+20])
                    pn += 1
    
                files.append(file_info)
                
            return files
        
    if __name__ == '__main__':
        #filename = r".\huapi2.torrent"
        #filename = r".\mh5t3tJ0EC.torrent"
        filename = r".\huapi2.1.torrent"   
        torrent = TorrentFile()
    
        print "begin to read file"
        try:
            torrent.read_file(filename)
        except (IOError,BTFormatError), reason:
            print "Read bittorrent file error! Error:%s" %reason
         
        print "end to read file"
    
        print "announces: " , torrent.get_announces() 
        print "peace length:", torrent.get_piece_length()
        print "code page:" , torrent.get_codepage()
        print "encoding:" , torrent.get_encoding()
        print "publisher:" ,torrent.get_publisher()
        print "publisher url:", torrent.get_publisher_url()
        print "creater:" , torrent.get_creater()
        print "creation date:", torrent.get_creation_date()
        print "commnent:", torrent.get_comment()
        print "nodes:", torrent.get_nodes()
        torrent.get_files()
        for one_file in torrent.get_files():
            print 'file name:', '\\'.join(one_file['name'])
            print 'file length:', one_file['length']
            print 'pieces:', list(one_file['pieces'])
    复制代码
    bcodec.py
    复制代码
      1 '''
      2 Created on 2012-9-30
      3 
      4 @author: ddt
      5 '''
      6 def bdcode(data):
      7     data = list(data)
      8     return _read_chunk(data)
      9     
     10 def _read_chunk(data):
     11     
     12     chunk = None
     13     
     14     if len(data) == 0:
     15         return chunk
     16     
     17     leading_chr = data[0]
     18                      
     19     if leading_chr.isdigit():
     20         chunk = _read_string(data)
     21     elif leading_chr == 'd':
     22         chunk = _read_dict(data)
     23     elif leading_chr == 'i':
     24         chunk = _read_integer(data)
     25     elif leading_chr == 'l':
     26         chunk = _read_list(data)
     27 
     28     #print leading_chr, chunk
     29     return chunk
     30                            
     31 def _read_dict(data):
     32     
     33     if  len(data) == 0 or data.pop(0) != 'd': 
     34         return None
     35     
     36     chunk = {} 
     37     while len(data) > 0 and data[0] != 'e':
     38         
     39         key = _read_chunk(data)
     40         value = _read_chunk(data)
     41         
     42         if key and value and type(key) == type(''):
     43             chunk[key] = value
     44         else:
     45             return None
     46         
     47     if len(data) == 0 or data.pop(0) != 'e':
     48         return None
     49     
     50     return chunk
     51 
     52 def _read_list(data):
     53 
     54     if  len(data) == 0 or data.pop(0) != 'l': 
     55         return None
     56     
     57     chunk = []
     58     while len(data) > 0 and data[0] != 'e':
     59         value = _read_chunk(data)
     60         if value:
     61             chunk.append(value)
     62         else:
     63             return None
     64         
     65     if len(data) == 0 or data.pop(0) != 'e': 
     66         return None
     67 
     68     return chunk
     69 
     70 def _read_string(data):
     71     
     72     str_len = ''
     73     while len(data) > 0 and data[0].isdigit():
     74         str_len +=  data.pop(0)
     75     
     76     if len(data) == 0 or data.pop(0) != ':':
     77         return None
     78     
     79     str_len = int(str_len)
     80     if str_len > len(data):
     81         return None
     82     
     83     value = data[0:str_len]
     84     del data[0:str_len]
     85     return ''.join(value)
     86 
     87 def _read_integer(data):
     88    
     89     integer = ''
     90     if len(data) < len('i2e') or data.pop(0) != 'i': 
     91         return None
     92     
     93     sign = data.pop(0)
     94     if sign != '-' and not sign.isdigit():
     95         return None
     96     integer += sign
     97     
     98     while len(data) > 0 and data[0].isdigit():
     99         integer += data.pop(0)
    100     
    101     if len(data) == 0 or data.pop(0) != 'e':
    102         return None
    103 
    104     return  int(integer)
    复制代码

关键字