00001 #include <stdexcept>
00002 #include <fstream>
00003 #include <strstream>
00004
00005 namespace std
00006 {
00007
00008 template <typename seqtype>
00009 ostream&
00010 operator<<(ostream& os, const bioinfo::fasta_seq<seqtype>& fasta_seq) {
00011 os<<">"<<fasta_seq.get_header()<<endl;
00012 const seqtype& seq = fasta_seq.get_seq();
00013 unsigned cnt = 0;
00014 const unsigned num_chars = 60;
00015 while(cnt < seq.size()) {
00016 for(unsigned i = cnt; i < num_chars && i < seq.size();
00017 i+=num_chars, cnt+=num_chars) {
00018 const seqtype& sub_seq = seq.substr(i,num_chars);
00019 os<<sub_seq;
00020 }
00021 os<<endl;
00022 }
00023 return os;
00024 }
00025
00026
00033 template <typename CharT, typename CharTraits, typename seqtype>
00034 basic_istream<CharT,CharTraits>&
00035 operator>>(basic_istream<CharT,CharTraits>& is, bioinfo::fasta_seq<seqtype>& fasta_seq) {
00036 string* strPtr = new string;
00037 if( !(is>>*strPtr) ) {
00038 delete strPtr;
00039 return is;
00040 }
00041 if( (*strPtr)[0] != '>' ) {
00042 string tmp(*strPtr);
00043 delete strPtr;
00044 throw runtime_error("invalid fasta_seq format: " + tmp);
00045 }
00046
00047
00048 strPtr->erase(strPtr->begin());
00049 fasta_seq.set_header(strPtr);
00050 string str,sum;
00051 seqtype& seq = fasta_seq.get_seq();
00052 std::istream::pos_type pos = is.tellg();
00053 const typename CharTraits::int_type eof = CharTraits::eof();
00054
00055
00056
00057
00058 for(;;) {
00059 if(is>>str) {
00060 if( str[0] == '>' ) {
00061 is.seekg(pos);
00062 break;
00063 }
00064 pos = is.tellg();
00065 sum.append(str);
00066
00067
00068
00069 if( '\n' == CharTraits::to_char_type(is.peek()) ) {
00070
00071 is.get();
00072 if( is.peek() == eof ) {
00073
00074 is.unget();
00075 break;
00076 }
00077 }
00078 }
00079 }
00080 istrstream istrm(sum.c_str());
00081 istrm>>seq;
00082 return is;
00083 }
00084 }
00085
00086 namespace bioinfo
00087 {
00088
00089 using std::string;
00090
00091 template <typename seqtype>
00092 fasta_seq<seqtype>::fasta_seq() {
00093 _fasta_seq.first.reset(new string);
00094 _fasta_seq.second.reset(new seqtype);
00095 }
00096
00097 template <typename seqtype>
00098 fasta_seq<seqtype>::~fasta_seq() {
00099
00100 }
00101
00102 template <typename seqtype>
00103 void
00104 fasta_seq<seqtype>::write(const std::string& filename) const {
00105 std::ofstream ofs(filename.c_str());
00106 if( !ofs ) {
00107 throw runtime_error("unable to write fasta_seq file: " + filename);
00108 }
00109 ofs<<*this;
00110 }
00111
00112 template <typename seqtype>
00113 void
00114 fasta_seq<seqtype>::read(const std::string& filename) {
00115 std::ifstream ifs(filename.c_str());
00116 if( !ofs ) {
00117 throw runtime_error("unable to write fasta_seq file: " + filename);
00118 }
00119 ifs>>*this;
00120 }
00121 }
00122
00123 #if 0
00124
00125 void dsu::readFastaFile(string& sequenceResult, const string& fasta_seqFile) {
00126 const int BUFF_SIZE = 512000;
00127 ifstream ifs(fasta_seqFile.c_str());
00128 if(!ifs) {
00129 ostrstream ostrm;
00130 ostrm<<"Unable to open (for reading): "<<endl;
00131 throw runtime_error(ostrm.str());
00132 }
00133 sequenceResult = "";
00134
00135 bool status = ifs.getline(stra,BUFF_SIZE);
00136 if( !status )
00137 throw runtime_error("unable to read file");
00138 if( stra[0] != '>' )
00139 throw runtime_error("fasta_seq does not start with header");
00140
00141 while( ifs.getline(stra,BUFF_SIZE) ) {
00142 sequenceResult.append(stra);
00143 }
00144
00145 }
00146
00147 void dsu::readFastaHeader(string& headerResult, const string& fasta_seqFile) {
00148 ifstream ifs(fasta_seqFile.c_str());
00149 if(!ifs) {
00150 ostrstream ostrm;
00151 ostrm<<"Unable to open (for reading): "<<endl;
00152 throw runtime_error(ostrm.str());
00153 }
00154 headerResult = "";
00155
00156 if( ! (ifs>>headerResult) ) {
00157 throw runtime_error("unable to read file");
00158 } else if( headerResult[0] != '>') {
00159 throw runtime_error("fasta_seq does not start with header");
00160 }
00161 }
00162
00163 #if 0
00164 void dsu::readMultiFastaFile(list<string>& strlst, const string& filename) {
00165 ifstream ifs(filename.c_str());
00166 if(!ifs) {
00167 throw runtime_error("Unable to read: "+filename);
00168 }
00169 const int bsize = 200;
00170 char buff[bsize];
00171 ifs.getline(buff,bsize);
00172 if( *buff != '>') {
00173 throw runtime_error("not multifasta_seq file");
00174 }
00175 string seq;
00176 while(ifs.getline(buff,bsize)) {
00177 if( *buff == '>') {
00178 strlst.push_back(seq);
00179 seq = "";
00180 } else {
00181 string strbuff = buff;
00182 seq += strbuff;
00183 }
00184 }
00185 strlst.push_back(seq);
00186 }
00187 #endif
00188
00189
00190 void readMultiFastaFile(mfasta_seq_t& strlst, const string& filename) {
00191 ifstream ifs(filename.c_str());
00192 if(!ifs) {
00193 throw runtime_error("Unable to read: "+filename);
00194 }
00195 const int bsize = 200;
00196 char buff[bsize];
00197 ifs.getline(buff,bsize);
00198 fasta_seq_t seq;
00199 if( *buff != '>') {
00200 throw runtime_error("not multifasta_seq file");
00201 } else
00202 seq.first = buff;
00203 while(ifs.getline(buff,bsize)) {
00204 if( *buff == '>') {
00205 strlst.push_back(seq);
00206 seq.first = buff;
00207 seq.second = "";
00208 } else {
00209 string strbuff = buff;
00210 for(unsigned k = 0; k < strbuff.size(); ++k) {
00211 strbuff[k] = tolower(strbuff[k]);
00212 }
00213 seq.second += strbuff;
00214 }
00215 }
00216 strlst.push_back(seq);
00217 }
00218
00219
00220
00221 Strand_t str2strnd(const string& parseStrand) {
00222 dsu::Strand_t theStrand = dsu::eEither;
00223 if(parseStrand == "-")
00224 theStrand = dsu::eNeg;
00225 else if(parseStrand == "+")
00226 theStrand = dsu::ePos;
00227 return theStrand;
00228 }
00229
00230 dsu::Strand_t dsu::coord2strnd(int end5, int end3) {
00231 return end5 > end3 ? dsu::eNeg : dsu::ePos;
00232 }
00233
00234 static const string posStr = "+";
00235 static const string negStr = "-";
00236 static const string eitherStr = "either";
00237
00238 const string& dsu::strnd2str(dsu::Strand_t strnd) {
00239 switch(strnd) {
00240 case ePos:
00241 return posStr;
00242 case eNeg:
00243 return negStr;
00244 default:
00245 return eitherStr;
00246 }
00247 }
00248
00249 }
00250
00251 namespace std
00252 {
00253
00254 ostream& operator<<(ostream& os, const dsu::fasta_seq_t& fasta_seq) {
00255 os<<">"<<fasta_seq.first<<endl;
00256 const unsigned linelen = 60;
00257 for(unsigned i = 0; i < fasta_seq.second.length(); i+=linelen) {
00258 string sub = fasta_seq.second.substr(i,linelen);
00259 os<<sub<<endl;
00260 }
00261 return os;
00262 }
00263 }
00264 #endif