首页 > 代码库 > 454ITS数据按barcode和primer分类程序v1.0

454ITS数据按barcode和primer分类程序v1.0

不知道有什么好办法可以让primer允许漏配,现在仅仅是允许错配,还是有一些没有配上,454数据有些primer漏配了一些,下一步解决这个问题

  1 #include <cstdio>  2 #include <cstdlib>  3 #include <string>  4 #include <iostream>  5 #include <fstream>  6 #include <iomanip>  7 #include <getopt.h>  8 using namespace std;  9  10 int max_mis = 1; 11 int N = 0; 12 string primer = ""; 13 string* barcode = NULL; 14 string* barcodeName = NULL; 15 void usage () 16 { 17         cout << "\nUsage: splitByBarcode [options] <read.fasta> <barcode.txt> <output prefix>\n" 18         << "  -m <int>  allow the max mis num, default " << max_mis << "\n" 19     << "  -p <string> primer, required    "<<endl 20     << "  -n <int>  barcode num, required"<<endl; 21         exit(1); 22 } 23 struct COMPARE{ 24     int ifCompared; 25     int posStart; 26     int posEnd; 27 }; 28 COMPARE compare(string reads, string barcode, int maxmis,int b,int e); 29 int main(int argc, char *argv[]) 30 { 31     if (argc < 4) 32     { 33         usage(); 34     } 35     int c; 36     while ((c=getopt(argc,argv,"m:n:p:")) != -1) 37     { 38         switch (c) 39         { 40             case m : max_mis = atoi(optarg);break; 41             case n : N = atoi(optarg); break; 42             case p : primer = optarg;break;  43             default  : cout<<"error:"<<(char)c<<endl;usage(); 44         } 45     } 46     if(N<=0){ 47         cerr<<"please input the required barcode num\n"; 48         exit(1);     49     } 50     if(primer == ""){ 51         cerr<<"please input the required primer\n"; 52         exit(1); 53     } 54     string seq_file = argv[optind++]; 55     string barcode_file = argv[optind++]; 56     string outPrefix = argv[optind++]; 57     string out_file = outPrefix + ".list"; 58     barcode = new string[N]; 59     barcodeName = new string[N]; 60  61     for(int i=0;i<N;i++){ 62         barcodeName[i] = ""; 63         barcode[i] = ""; 64     } 65      66     ifstream reads; 67     reads.open(seq_file.c_str()); 68     if(!reads){ 69         cerr<<"fail to open input file"<<seq_file<<endl;     70     } 71     ifstream barcode_in; 72     barcode_in.open(barcode_file.c_str()); 73     if(!barcode_in){ 74         cerr<<"fail to open barcode file" <<barcode_file<<endl; 75     } 76      77     string text; 78     int k=0; 79     while( getline( barcode_in, text, \n) ) 80     { 81         int startPos = 0, endPos; 82         endPos = text.find("\t"); 83         barcodeName[k] = text.substr(startPos, endPos-startPos+1); 84         startPos = endPos + 1; 85         endPos = text.find("\t",startPos); 86         barcode[k] = text.substr(startPos, endPos-startPos+1); 87         //cout << barcodeName[k] <<"\t"<< barcodeForward[k] <<"\t"<< barcodeReverse[k] << endl; 88         k++; 89     } 90  91     ofstream splitlist; 92     splitlist.open(out_file.c_str()); 93     if(!splitlist){ 94         cerr<<"fail to creat output file" << out_file <<endl; 95     } 96     int line_num = 1; 97     splitlist<<"barcodeName\tid\tid_line\n"; 98     while( getline( reads, text, \n) ) 99     {100         string seq = "", id = "";101         id = text;102         getline( reads, text, \n);103         seq = text;104 105         COMPARE p1 ,p2;106         p1.ifCompared = p2.ifCompared = 0;107         for(int k=0;k<N;k++){108             p1 = compare(seq,primer,max_mis,0,seq.length()-primer.length()+1);109             if( p1.ifCompared )110             {111                 p2 = compare(seq,barcode[k],0,0,p1.posStart);112                 if ( p2.ifCompared ){113                 //cout<<seq1<<"\t"<<barcodeForward[k]<<"\t"<<seq2<<"\t"<<barcodeReverse[k]<<endl;114                     splitlist<<barcodeName[k]<<"\t"<<id<<"\t"<<line_num<<endl;115                     break;116                 }117             }118         }119         line_num = line_num + 2;120     }121     delete[] barcode;122     delete[] barcodeName;123     splitlist.close();124     barcode_in.close();125     reads.close();126 }127 COMPARE compare(string reads, string barcode, int maxmis, int b, int e)128 {129     COMPARE p;130     //cout<<b<<"\t"<<e<<endl;131 //    cout<<barcode<<"\t"<<barcode.length()<<endl;132     for(int i=b;i<e;i++)133     {134         int mismatch = 0;135         int pos = 0;136         while(mismatch <= maxmis ){137             if(reads[i+pos] != barcode[pos]) mismatch++;138             pos++;139             if(pos == barcode.length()){140                 p.posStart = i;141                 p.posEnd = i+pos;142             //    cout<<p.posStart<<"\t"<<p.posEnd<<endl;143                 p.ifCompared = 1;144                 return p;145             }146         }147     }148     p.ifCompared = 0;149     return p;150 }

 

454ITS数据按barcode和primer分类程序v1.0