00001
00002
00003 namespace std
00004 {
00005
00006 using bioinfo::fasta;
00007 using bioinfo::fasta_seq;
00008
00009 template <typename seqtype>
00010 std::ostream&
00011 operator<<(std::ostream& os, const bioinfo::fasta<seqtype>& mf) {
00012 typename fasta<seqtype>::const_iterator iter = mf.begin();
00013 typename fasta<seqtype>::const_iterator stop = mf.end();
00014 while(iter != stop) {
00015 const fasta_seq<seqtype>& fs = **iter;
00016 os<<fs;
00017 ++iter;
00018 }
00019 return os;
00020 }
00021
00022 template <typename seqtype>
00023 std::istream&
00024 operator>>(std::istream& is, bioinfo::fasta<seqtype>& mf) {
00025 for(;;) {
00026 fasta_seq<seqtype>* fseq = new fasta_seq<seqtype>;
00027 is>>(*fseq);
00028 if(is.fail()) {
00029 delete fseq;
00030 break;
00031 } else {
00032 mf.push_back(fseq);
00033 }
00034 }
00035 return is;
00036 }
00037 }
00038
00039 namespace bioinfo
00040 {
00041
00042 template <typename seqtype>
00043 fasta<seqtype>::fasta(const string& filename) {
00044 read(filename);
00045 }
00046
00047 template <typename seqtype>
00048 fasta<seqtype>::~fasta() {
00049
00050 typename fasta<seqtype>::iterator iter = begin();
00051 typename fasta<seqtype>::iterator stop = end();
00052 while(iter != stop) {
00053 fasta_seq<seqtype>* tmp = *iter;
00054 delete tmp;
00055 ++iter;
00056 }
00057 }
00058
00059
00060 template <typename seqtype>
00061 void
00062 fasta<seqtype>::write(const string& filename) const {
00063 std::ofstream ofs(filename.c_str());
00064 if( !ofs ) {
00065 throw runtime_error("Unable to write fasta file: " + filename);
00066 }
00067 ofs<<*this;
00068 }
00069
00070 template <typename seqtype>
00071 void
00072 fasta<seqtype>::read(const string& filename) {
00073 std::ifstream ifs(filename.c_str());
00074 if( !ifs ) {
00075 throw std::runtime_error("Unable to read fasta file: " + filename);
00076 }
00077 ifs>>*this;
00078 }
00079
00080
00081 }
00082
00083 #if 0
00084 void dsu::writeFastaFile(const string& header, const string& sequence, const string& filename) {
00085 ofstream ofs(filename.c_str());
00086 if( !ofs ) {
00087 throw runtime_error("unable to write fasta file: " + filename);
00088 }
00089 ofs<<">"<<header<<endl;
00090 const unsigned linelen = 60;
00091 for(unsigned i = 0; i < sequence.length(); i+=linelen) {
00092 string sub = sequence.substr(i,linelen);
00093 ofs<<sub<<endl;
00094 }
00095 }
00096
00097 void dsu::readFastaFile(string& sequenceResult, const string& fastaFile) {
00098 const int BUFF_SIZE = 512000;
00099 char stra[BUFF_SIZE];
00100 ifstream ifs(fastaFile.c_str());
00101 if(!ifs) {
00102 ostrstream ostrm;
00103 ostrm<<"Unable to open (for reading): "<<endl;
00104 throw runtime_error(ostrm.str());
00105 }
00106 sequenceResult = "";
00107
00108 bool status = ifs.getline(stra,BUFF_SIZE);
00109 if( !status )
00110 throw runtime_error("unable to read file");
00111 if( stra[0] != '>' )
00112 throw runtime_error("fasta does not start with header");
00113
00114 while( ifs.getline(stra,BUFF_SIZE) ) {
00115 sequenceResult.append(stra);
00116 }
00117
00118 }
00119
00120 void dsu::readFastaHeader(string& headerResult, const string& fastaFile) {
00121 ifstream ifs(fastaFile.c_str());
00122 if(!ifs) {
00123 ostrstream ostrm;
00124 ostrm<<"Unable to open (for reading): "<<endl;
00125 throw runtime_error(ostrm.str());
00126 }
00127 headerResult = "";
00128
00129 if( ! (ifs>>headerResult) ) {
00130 throw runtime_error("unable to read file");
00131 } else if( headerResult[0] != '>') {
00132 throw runtime_error("fasta does not start with header");
00133 }
00134 }
00135
00136 #if 0
00137 void dsu::readMultiFastaFile(list<string>& strlst, const string& filename) {
00138 ifstream ifs(filename.c_str());
00139 if(!ifs) {
00140 throw runtime_error("Unable to read: "+filename);
00141 }
00142 const int bsize = 200;
00143 char buff[bsize];
00144 ifs.getline(buff,bsize);
00145 if( *buff != '>') {
00146 throw runtime_error("not multifasta file");
00147 }
00148 string seq;
00149 while(ifs.getline(buff,bsize)) {
00150 if( *buff == '>') {
00151 strlst.push_back(seq);
00152 seq = "";
00153 } else {
00154 string strbuff = buff;
00155 seq += strbuff;
00156 }
00157 }
00158 strlst.push_back(seq);
00159 }
00160 #endif
00161
00162
00163 void readMultiFastaFile(fasta_t& strlst, const string& filename) {
00164 ifstream ifs(filename.c_str());
00165 if(!ifs) {
00166 throw runtime_error("Unable to read: "+filename);
00167 }
00168 const int bsize = 200;
00169 char buff[bsize];
00170 ifs.getline(buff,bsize);
00171 fasta_t seq;
00172 if( *buff != '>') {
00173 throw runtime_error("not multifasta file");
00174 } else
00175 seq.first = buff;
00176 while(ifs.getline(buff,bsize)) {
00177 if( *buff == '>') {
00178 strlst.push_back(seq);
00179 seq.first = buff;
00180 seq.second = "";
00181 } else {
00182 string strbuff = buff;
00183 for(unsigned k = 0; k < strbuff.size(); ++k) {
00184 strbuff[k] = tolower(strbuff[k]);
00185 }
00186 seq.second += strbuff;
00187 }
00188 }
00189 strlst.push_back(seq);
00190 }
00191
00192
00193
00194 namespace std
00195 {
00196
00197 ostream& operator<<(ostream& os, const bioinfo_utils::fasta& fasta) {
00198 fasta::const_iterator iter = fasta.begin();
00199 fasta::const_iterator stop = fasta.end();
00200 while(iter != stop) {
00201 os<<*iter<<endl;
00202 ++iter;
00203 }
00204 return os;
00205 }
00206 }
00207
00208 #endif