蛋白质PDB文件读取C++代码

在生物信息学中，PDB数据库是非常重要的蛋白质结构数据库。PDB数据文件的读取是进行蛋白质结构相关预测的重要步骤！下面给出了简单的PDB文件读取的C++代码，以供参考：
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <sstream>
#include <iostream>
#include <fstream>
#include <vector>
using namespace std;

class Residue {
public:
	vector<double*> atomxyzs;
	vector<string> atominfos;	// each element for example : "ATOM      1  N   GLU    98    "
	int caind;
	int cbind;
	int res_pos;
	int orig_res_pos;

	Residue(vector<double*> _atomxyzs, vector<string> _atominfos, int _caind, int _cbind, int _res_pos, int _orig_res_pos){
		atomxyzs = _atomxyzs;
		atominfos = _atominfos;
		caind = _caind;
		cbind = _cbind;
		res_pos = _res_pos;
		orig_res_pos = _orig_res_pos;
	}
};

class Protein{
private:
	string pdbpath;				// in
	vector<Residue> residues;	// out

public:
	Protein(string _pdbpath){
		pdbpath = _pdbpath;

		readPDB();
	}

	int size(){
		return residues.size();
	}

	Residue getIthResidue(int i){
		return residues[i];
	}

	int getIthResidueOriginalIndexInProt(int i){
		return residues[i].orig_res_pos;
	}

	double* getIthResidueCAXYZ(int i){
		return residues[i].atomxyzs[residues[i].caind];
	}

	double* getIthResidueCBXYZ(int i){
		return residues[i].atomxyzs[residues[i].cbind];
	}

	vector<Residue> getResidues(){
		return residues;
	}

	void save(vector<Residue> _residues, string savepath){
		ofstream fout(savepath);
		for (int i = 0; i < _residues.size(); i++){
			Residue res = _residues[i];
			for (int j = 0 ; j < res.atominfos.size(); j++){
				char temp[1000];

				string atominfo = res.atominfos[j];	// "ATOM      1  N   GLU    98    "
				double* xyz = res.atomxyzs[j];	//"  66.677"
				char xstr[9];
				char ystr[9];
				char zstr[9];
				sprintf(xstr, "%8.3f", xyz[0]);
				sprintf(ystr, "%8.3f", xyz[1]);
				sprintf(zstr, "%8.3f", xyz[2]);
				string useless = "  1.00  0.00";
				fout << atominfo << xstr << ystr << zstr << useless << endl;
			}
		}

		fout.close();
	}

	/*char* formatDouble(double v, int ch_num){
		char tmp[64];
		sprintf(tmp, "%8.3f %8.3f %8.3f\n", x[i][0], x[i][1], x[i][2]);
	}*/

private:
	void readPDB(){
		vector<int> atomindex;
		vector<string> atominfos;
		vector<double*> points;
		int ca_pos_in_res = 0;
		int cb_pos_in_res = 0;

		int pre_index = -9999999;
		ifstream fin(pdbpath);
		string line;
		getline(fin, line);
		string lastline(line);
		bool isFirstATOM = true;
		bool isContainCA = false;
		int tmp_pos = 0;
		int res_pos = 1;
		int atom_pos = 1;
		while (fin.good()){
			if (line.compare(0, 3, "TER")==0)
				break;
			if (line.compare(0, 4, "ATOM")==0){
				char cstr[50];
				double x = 0.0;
				strcpy(cstr, (line.substr(30, 8)).c_str());
				sscanf(cstr, "%lf", &x);

				double y = 0.0;
				strcpy(cstr, (line.substr(38, 8)).c_str());
				sscanf(cstr, "%lf", &y);

				double z = 0.0;
				strcpy(cstr, (line.substr(46, 8)).c_str());
				sscanf(cstr, "%lf", &z);

				double* xyz = new double[3];
				xyz[0] = x;
				xyz[1] = y;
				xyz[2] = z;

				string atominfo = line.substr(0, 30);	

				int now_index = -999999;
				strcpy(cstr, (line.substr(22, 4)).c_str());
				sscanf(cstr, "%d", &now_index);

				if (line.compare(13, 3, "CA ")==0){
					isContainCA = true;
					ca_pos_in_res = tmp_pos;

					if (isFirstATOM){
						pre_index = now_index;
					}
				}

				if (line.compare(13, 3, "CB ")==0){
					cb_pos_in_res = tmp_pos;
				}

				if (now_index != pre_index){
					if (isContainCA){
						if (pre_index != -9999999){
							Residue tmpResidue(points, atominfos, ca_pos_in_res, cb_pos_in_res, res_pos, pre_index);
							residues.push_back(tmpResidue);
							res_pos++;
						}
						points.clear();
						points.push_back(xyz);
						atominfos.clear();
						atominfos.push_back(atominfo);

						tmp_pos = 0;
						isContainCA = false;
						if (line.compare(13, 3, "CA ")==0){
							isContainCA = true;
						}
					}else{
						if (isFirstATOM){
							points.push_back(xyz);
							atominfos.push_back(atominfo);
						}
					}
					pre_index = now_index;
				}else{
					points.push_back(xyz);
					atominfos.push_back(atominfo);
				}

				isFirstATOM = false;

				tmp_pos++;
			}

			lastline = line;
			getline(fin, line);
		}

		if (isContainCA || lastline.length() >= 16 || lastline.compare(13, 3, "CA ")==0){
			Residue tmpResidue(points, atominfos, ca_pos_in_res, cb_pos_in_res, res_pos, pre_index);
			residues.push_back(tmpResidue);
		}

		fin.close();
	}
};
秒客网

蛋白质PDB文件读取C++代码

相关文章