Main Page   Class Hierarchy   Compound List   File List   Compound Members  

mfasta.hpp

00001 // mfasta.tcc -*- C++ -*-
00002 
00003 namespace std
00004 {
00005 
00006   using bioinfo::mfasta;
00007   using bioinfo::fasta;
00008 
00009   template <typename seqtype>
00010   std::ostream& 
00011   operator<<(std::ostream& os, const bioinfo::mfasta<seqtype>& mf) {
00012     typename mfasta<seqtype>::const_iterator iter = mf.begin();
00013     typename mfasta<seqtype>::const_iterator stop = mf.end();
00014     while(iter != stop) {
00015       const fasta<seqtype>& fs = **iter;
00016       os<<fs;
00017       ++iter;
00018     }
00019     return os;
00020   }
00021 
00022   template <typename seqtype>
00023   std::istream& 
00024   operator>>(std::istream& is, bioinfo::mfasta<seqtype>& mf) {
00025     for(;;) {
00026       fasta<seqtype>* fseq = new fasta<seqtype>;
00027       is>>(*fseq);
00028       if(is.fail()) {
00029         delete fseq;
00030         break;
00031       } else {
00032         mf.push_back(fseq);
00033       }
00034     }
00035     return is;
00036   }
00037 }
00038 
00039 namespace bioinfo
00040 {
00041 
00042   template <typename seqtype>
00043   mfasta<seqtype>::mfasta(const string& filename) {
00044     read(filename);
00045   }
00046 
00047   template <typename seqtype>
00048   mfasta<seqtype>::~mfasta() {
00049     // what's the best way to destroy?
00050     typename mfasta<seqtype>::iterator iter = begin();
00051     typename mfasta<seqtype>::iterator stop = end();
00052     while(iter != stop) {
00053       fasta<seqtype>* tmp = *iter;
00054       delete tmp;
00055       ++iter;
00056     }
00057   }
00058   
00059 
00060   template <typename seqtype>
00061   void 
00062   mfasta<seqtype>::write(const string& filename) const {
00063     std::ofstream ofs(filename.c_str());
00064     if( !ofs ) {
00065       throw runtime_error("Unable to write fasta file: " + filename);
00066     }
00067     ofs<<*this;
00068   }
00069 
00070   template <typename seqtype>
00071   void 
00072   mfasta<seqtype>::read(const string& filename) {
00073     std::ifstream ifs(filename.c_str());
00074     if( !ifs ) {
00075       throw std::runtime_error("Unable to read fasta file: " + filename);
00076     }
00077     ifs>>*this;
00078   }
00079 
00080 
00081 }
00082 
00083 #if 0
00084 void dsu::writeFastaFile(const string& header, const string& sequence, const string& filename) {
00085   ofstream ofs(filename.c_str());
00086   if( !ofs ) {
00087     throw runtime_error("unable to write fasta file: " + filename);
00088   }
00089   ofs<<">"<<header<<endl;
00090   const unsigned linelen = 60;
00091   for(unsigned i = 0; i < sequence.length(); i+=linelen) {
00092     string sub = sequence.substr(i,linelen);
00093     ofs<<sub<<endl;
00094   }
00095 }
00096 
00097 void dsu::readFastaFile(string& sequenceResult, const string& fastaFile) {
00098         const int BUFF_SIZE = 512000;
00099    char stra[BUFF_SIZE];
00100    ifstream ifs(fastaFile.c_str());
00101    if(!ifs) {
00102                 ostrstream ostrm;
00103       ostrm<<"Unable to open (for reading): "<<endl;
00104                 throw runtime_error(ostrm.str());
00105    }
00106         sequenceResult = "";
00107    // load dna strings
00108         bool status = ifs.getline(stra,BUFF_SIZE);
00109         if( !status )
00110                 throw runtime_error("unable to read file");
00111         if( stra[0] != '>' ) 
00112                 throw runtime_error("fasta does not start with header");
00113 
00114    while( ifs.getline(stra,BUFF_SIZE) ) {
00115       sequenceResult.append(stra);
00116    }
00117         
00118 }
00119 
00120 void dsu::readFastaHeader(string& headerResult, const string& fastaFile) {
00121    ifstream ifs(fastaFile.c_str());
00122    if(!ifs) {
00123       ostrstream ostrm; 
00124       ostrm<<"Unable to open (for reading): "<<endl;
00125       throw runtime_error(ostrm.str());
00126    }
00127    headerResult = "";
00128    // load dna strings
00129         if( ! (ifs>>headerResult) ) {
00130       throw runtime_error("unable to read file");
00131         } else if( headerResult[0] != '>') {
00132       throw runtime_error("fasta does not start with header");
00133         }
00134 }
00135 
00136 #if 0
00137 void dsu::readMultiFastaFile(list<string>& strlst, const string& filename) {
00138    ifstream ifs(filename.c_str());
00139    if(!ifs) {
00140       throw runtime_error("Unable to read: "+filename);
00141    }
00142    const int bsize = 200;
00143    char buff[bsize];
00144    ifs.getline(buff,bsize);
00145    if( *buff != '>') {
00146      throw runtime_error("not multifasta file");
00147    }
00148    string seq;
00149    while(ifs.getline(buff,bsize)) {
00150      if( *buff == '>') {
00151        strlst.push_back(seq);
00152        seq = "";
00153      } else {
00154        string strbuff = buff;
00155        seq += strbuff;
00156      }
00157    }
00158    strlst.push_back(seq);
00159 }
00160 #endif
00161 
00162 
00163 void readMultiFastaFile(mfasta_t& strlst, const string& filename) {
00164    ifstream ifs(filename.c_str());
00165    if(!ifs) {
00166       throw runtime_error("Unable to read: "+filename);
00167    }
00168    const int bsize = 200;
00169    char buff[bsize];
00170    ifs.getline(buff,bsize);
00171    fasta_t seq;
00172    if( *buff != '>') {
00173      throw runtime_error("not multifasta file");
00174    } else 
00175      seq.first = buff;
00176    while(ifs.getline(buff,bsize)) {
00177      if( *buff == '>') {
00178        strlst.push_back(seq);
00179        seq.first = buff;
00180        seq.second = "";
00181      } else {
00182        string strbuff = buff;
00183        for(unsigned k = 0; k < strbuff.size(); ++k) {
00184          strbuff[k] = tolower(strbuff[k]);
00185        }
00186        seq.second += strbuff;
00187      }
00188    }
00189    strlst.push_back(seq);
00190 }
00191 
00192 
00193 
00194 namespace std
00195 {
00196 
00197   ostream& operator<<(ostream& os, const bioinfo_utils::mfasta& mfasta) {
00198     mfasta::const_iterator iter = mfasta.begin();
00199     mfasta::const_iterator stop = mfasta.end();
00200     while(iter != stop) {
00201       os<<*iter<<endl;
00202       ++iter;
00203     }
00204     return os;
00205   }
00206 } //namespace std;
00207 
00208 #endif