torrent文件解析器

时间:2021-06-07 04:29:44

第二步工作是解析torrent文件,有了bencoding编码解析器 解析torrent文件当然是易如反掌的任务了.
实现的封装类CTorrentParser,完成的主要任务有:
1.判断torrent文件是否有效
2.得到如下的重要信息:
        tracker服务器列表
        文件列表
        分块尺寸
        分块个数
        分块sha1的数组
3.其他的一些次要信息如发布者,发布日期,注释等
4.计算infohash
        

Metainfo files are bencoded dictionaries with the following keys:

announce
The URL of the tracker.

info
This maps to a dictionary, with keys described below.

The name key maps to a string which is the suggested name to save the file (or directory) as. It is purely advisory.

piece length maps to the number of bytes in each piece the file is split into. For the purposes of transfer, files are split into fixed-size pieces which are all the same length except for possibly the last one which may be truncated. Piece length is almost always a power of two, most commonly 218 = 256 K (BitTorrent prior to version 3.2 uses 220 = 1 M as default).

pieces maps to a string whose length is a multiple of 20. It is to be subdivided into strings of length 20, each of which is the SHA1 hash of the piece at the corresponding index.

There is also a key length or a key files, but not both or neither. If length is present then the download represents a single file, otherwise it represents a set of files which go in a directory structure.

In the single file case, length maps to the length of the file in bytes.

For the purposes of the other keys, the multi-file case is treated as only having a single file by concatenating the files in the order they appear in the files list. The files list is the value files maps to, and is a list of dictionaries containing the following keys:

length
The length of the file, in bytes.
path
A list of strings corresponding to subdirectory names, the last of which is the actual file name (a zero length list is an error case).
In the single file case, the name key is the name of a file, in the muliple file case, it's the name of a directory.

下面是torrent文件解析器的VC++源代码:

// TorrentParser.h: interface for the CTorrentParser class.//
//////////////////////////////////////////////////////////////////////

#if !defined(AFX_TORRENTPARSER_H__7E67DA03_B65C_427F_A241_24230BCD6D54__INCLUDED_)
#define AFX_TORRENTPARSER_H__7E67DA03_B65C_427F_A241_24230BCD6D54__INCLUDED_

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
//torrent文件解析器
#include "BEncode.h"
#include "cyfile.h"
#include "cyhash.h"

class CTorrentParser
{
public:
class CFileInfo
{
public:
string strfilename;//文件相对路径
double dbfilelen;//文件长度
};
class CPieceSha1
{
public:
string getstring();
bool isempty();
BYTE btData[20];
};
CTorrentParser();
virtual ~CTorrentParser();
void clear();
bool parse(const char * szFilename);
bool isvalid();
CBEncodeObjectBase * getvalue(const char * szname);
public:
bool getinfo();
list<string> m_listAnnounce;//tracker服务器列表
list<CFileInfo> m_listFile;//文件列表
string m_strName;//建议的默认文件名或者路径名
int m_iPiecelength;//每个分块的长度
CPieceSha1 m_Infohash;//info字段的sha1
CPieceSha1* m_pPieceSha1;//块数组指针
int m_iPiececount;//块个数
int m_iCreationDate;//创建日期
string m_strComment;//注释
string m_strPublisher; //发布者
string m_strPublisherurl;//发布者网址
string m_strCreatedBy;//创建工具
private:
string m_strfilename;
CCyFile m_cyfile;
CBEncode m_bencode;
CBEncodeDict * m_pRootDict;//解析得到的根节点
};

#endif // !defined(AFX_TORRENTPARSER_H__7E67DA03_B65C_427F_A241_24230BCD6D54__INCLUDED_)

// TorrentParser.cpp: implementation of the CTorrentParser class.////////////////////////////////////////////////////////////////////////#include "stdafx.h"#include "TorrentParser.h"//////////////////////////////////////////////////////////////////////// Construction/Destruction//////////////////////////////////////////////////////////////////////CTorrentParser::CTorrentParser(){	m_pRootDict = NULL;}CTorrentParser::~CTorrentParser(){	clear();}bool CTorrentParser::parse(const char *szFilename){	clear();	if(szFilename)		m_strfilename = szFilename;		if(!m_cyfile.IsExist(m_strfilename.c_str()))		return false;		if(m_cyfile.OpenFile(m_strfilename.c_str()))//打开文件	{		BYTE* pData= m_cyfile.GetData();	//读取文件数据		m_cyfile.CloseFile();		m_bencode.parse((const char*)pData);		//得到根节点		list<CBEncodeObjectBase*>::iterator it;		for(it = m_bencode.m_listObj.begin();it!=m_bencode.m_listObj.end();++it)		{			if((*it)->m_type == enum_BEncodeType_Dict)			{				m_pRootDict	= (CBEncodeDict*)(*it);				break;			}		}				CBEncodeString* pEOBAnnounce = (CBEncodeString*)getvalue("announce");		CBEncodeDict* pEOBInfo = (CBEncodeDict*)getvalue("info");		if(pEOBAnnounce && pEOBInfo)			true;					}	return false;}void CTorrentParser::clear(){	m_bencode.clear();	m_pRootDict = NULL;	m_cyfile.ReleaseData();}//检查是否有效的torrent文件bool CTorrentParser::isvalid(){	//检查是否存在announce和info字段			CBEncodeObjectBase* pEOBAnnounce = getvalue("announce");	CBEncodeObjectBase* pEOBInfo = getvalue("info");	if(pEOBAnnounce && pEOBInfo)		return true;		return false;}CBEncodeObjectBase * CTorrentParser::getvalue(const char *szname){		if(m_pRootDict)		return m_pRootDict->getvalue(szname);	return NULL;}//开始获取信息bool CTorrentParser::getinfo(){		string strValue;	CBEncodeString* pEOBAnnounce = (CBEncodeString*)getvalue("announce");	CBEncodeDict* pEOBInfo = (CBEncodeDict*)getvalue("info");	if(pEOBAnnounce && pEOBInfo)	{		//得到缺省tracker服务器								if(pEOBAnnounce->getstring(strValue))			m_listAnnounce.push_back(strValue);		//得到备用tracker列表		CBEncodeList* pEOBAnnounceList = (CBEncodeList*)getvalue("announce-list");		if(pEOBAnnounceList && pEOBAnnounceList->m_type == enum_BEncodeType_List)		{			list<CBEncodeObjectBase *>::iterator it;			for(it = pEOBAnnounceList->m_listObj.begin();it!=pEOBAnnounceList->m_listObj.end();++it)			{				if((*it)->m_type == enum_BEncodeType_List)				{					CBEncodeList* pEOBAnnounceList2 = (CBEncodeList*)(*it);					if(pEOBAnnounceList2->m_listObj.begin()!=pEOBAnnounceList2->m_listObj.end())					{							if(((CBEncodeString*)(*pEOBAnnounceList2->m_listObj.begin()))->getstring(strValue))							m_listAnnounce.push_back(strValue);												}				}								}		}		CBEncodeString* pEOBTmp;		CBEncodeInt * pEOBInt;		//得到创建日期		m_iCreationDate = 0;		pEOBInt = (CBEncodeInt*)getvalue("creation date");		if(pEOBInt && pEOBInt->m_type == enum_BEncodeType_Int)					m_iCreationDate = (int)pEOBInt->m_fValue;		//得到注释		pEOBTmp= (CBEncodeString*)getvalue("comment");		if(pEOBTmp)			pEOBTmp->getstring(m_strComment);		//得到创建工具		pEOBTmp = (CBEncodeString*)getvalue("createdby");		if(pEOBTmp)			pEOBTmp->getstring(m_strCreatedBy);		//得到发布者		pEOBTmp = (CBEncodeString*)getvalue("publisher");		if(pEOBTmp)			pEOBTmp->getstring(m_strPublisher);		pEOBTmp = (CBEncodeString*)getvalue("publisher-url");		if(pEOBTmp)			pEOBTmp->getstring(m_strPublisherurl);				//计算infohash					if(pEOBInfo->m_error == enm_BEncodeErr_noerr)		{			CCyHash ch;			BYTE szSha1[21];			if(ch.GetHash(CALG_SHA1,(BYTE*)pEOBInfo->szPos,pEOBInfo->ilen,szSha1))				memcpy(m_Infohash.btData,szSha1,20);			else				memset(m_Infohash.btData,0,20);			//得到建议的默认文件名或者路径名			pEOBTmp = (CBEncodeString*)pEOBInfo->getvalue("name");			if(pEOBTmp)				pEOBTmp->getstring(m_strName);			//如果没有得到发布者,再次尝试获取			if(m_strPublisher.empty())			{				pEOBTmp = (CBEncodeString*)pEOBInfo->getvalue("publisher");				if(pEOBTmp)					pEOBTmp->getstring(m_strPublisher);				pEOBTmp = (CBEncodeString*)pEOBInfo->getvalue("publisher-url");				if(pEOBTmp)					pEOBTmp->getstring(m_strPublisherurl);			}											//得到分块长度			m_iPiecelength = 0;			pEOBInt = (CBEncodeInt*)pEOBInfo->getvalue("piece length");			if(pEOBInt && pEOBInt->m_type == enum_BEncodeType_Int)				m_iPiecelength = (int)pEOBInt->m_fValue;						//得到piece的sha1数组及其piece个数			pEOBTmp = (CBEncodeString*)pEOBInfo->getvalue("pieces");			if(pEOBTmp->m_type == enum_BEncodeType_String && pEOBTmp->m_error == enm_BEncodeErr_noerr && pEOBTmp->m_szData)			{				m_pPieceSha1 = (CPieceSha1*)pEOBTmp->m_szData;				m_iPiececount = pEOBTmp->m_ilen/20;			}			//得到文件列表			CFileInfo fi;			pEOBInt = (CBEncodeInt*)pEOBInfo->getvalue("length");			if(pEOBInt && pEOBInt->m_type == enum_BEncodeType_Int)			{				fi.dbfilelen = (double)pEOBInt->m_fValue;							fi.strfilename = m_strName;				if(fi.strfilename.size()>0&&fi.dbfilelen>0)					m_listFile.push_back(fi);			}			else			{				CBEncodeList * pEOBList = (CBEncodeList*)pEOBInfo->getvalue("files");				if(pEOBList && pEOBList->m_type == enum_BEncodeType_List)				{					list<CBEncodeObjectBase *>::iterator it;					for(it = pEOBList ->m_listObj.begin();it!=pEOBList ->m_listObj.end();++it)					{						if((*it)->m_type == enum_BEncodeType_Dict)						{							fi.dbfilelen = 0;							fi.strfilename = "";							pEOBInt = (CBEncodeInt*)((CBEncodeDict*)(*it))->getvalue("length");							if(pEOBInt && pEOBInt->m_type == enum_BEncodeType_Int)															fi.dbfilelen = (double)pEOBInt->m_fValue;							CBEncodeList* pEOBPathList = (CBEncodeList*)((CBEncodeDict*)(*it))->getvalue("path");							if(pEOBPathList && pEOBPathList->m_type == enum_BEncodeType_List)							{								list<CBEncodeObjectBase *>::iterator it2;								for(it2 = pEOBPathList->m_listObj.begin();it2 != pEOBPathList->m_listObj.end();++it2)								{									if(((CBEncodeString*)(*it2))->getstring(strValue))																						fi.strfilename +="//"+ strValue;									else									{										fi.strfilename = "";										break;									}								}							}							if(fi.strfilename.size()>0&&fi.dbfilelen>0)								m_listFile.push_back(fi);						}										}				}			}			//检查主要的信息是否正确获取,如果是就返回成功			if(m_listAnnounce.size()==0 ||				m_listFile.size()==0 ||				m_Infohash.isempty() ||				m_iPiececount == 0 ||				m_iPiecelength == 0 ||				m_pPieceSha1 == NULL)				return false;						return true;		}				}	return false;}//检查CPieceSha1是否为空bool CTorrentParser::CPieceSha1::isempty(){	CPieceSha1 pstmp;	memset(pstmp.btData,0,sizeof(pstmp));	if(memcmp(pstmp.btData,btData,sizeof(pstmp))==0)		return true;	return false;}string CTorrentParser::CPieceSha1::getstring(){	char sz[100];	char * szTmp = sz;	BYTE bt;	for(int i = 0;i<sizeof(CPieceSha1);i++)	{				bt = btData[i];		if((bt>='a' && bt<='z')||			(bt>='A' && bt<='Z')||			(bt>='0' && bt<='9'))		{			sprintf(szTmp,"%c",bt);			szTmp +=1;		}		else				{			sprintf(szTmp,"%%%02X",bt);			szTmp +=3;		}	}	*szTmp = 0;	return sz;}
在逸搜论坛有一个解析torrent文件的小工具可以看到测试效果.