首页 > 代码库 > 454ITS数据按barcode和primer分类程序v1.0
454ITS数据按barcode和primer分类程序v1.0
不知道有什么好办法可以让primer允许漏配,现在仅仅是允许错配,还是有一些没有配上,454数据有些primer漏配了一些,下一步解决这个问题
1 #include <cstdio> 2 #include <cstdlib> 3 #include <string> 4 #include <iostream> 5 #include <fstream> 6 #include <iomanip> 7 #include <getopt.h> 8 using namespace std; 9 10 int max_mis = 1; 11 int N = 0; 12 string primer = ""; 13 string* barcode = NULL; 14 string* barcodeName = NULL; 15 void usage () 16 { 17 cout << "\nUsage: splitByBarcode [options] <read.fasta> <barcode.txt> <output prefix>\n" 18 << " -m <int> allow the max mis num, default " << max_mis << "\n" 19 << " -p <string> primer, required "<<endl 20 << " -n <int> barcode num, required"<<endl; 21 exit(1); 22 } 23 struct COMPARE{ 24 int ifCompared; 25 int posStart; 26 int posEnd; 27 }; 28 COMPARE compare(string reads, string barcode, int maxmis,int b,int e); 29 int main(int argc, char *argv[]) 30 { 31 if (argc < 4) 32 { 33 usage(); 34 } 35 int c; 36 while ((c=getopt(argc,argv,"m:n:p:")) != -1) 37 { 38 switch (c) 39 { 40 case ‘m‘ : max_mis = atoi(optarg);break; 41 case ‘n‘ : N = atoi(optarg); break; 42 case ‘p‘ : primer = optarg;break; 43 default : cout<<"error:"<<(char)c<<endl;usage(); 44 } 45 } 46 if(N<=0){ 47 cerr<<"please input the required barcode num\n"; 48 exit(1); 49 } 50 if(primer == ""){ 51 cerr<<"please input the required primer\n"; 52 exit(1); 53 } 54 string seq_file = argv[optind++]; 55 string barcode_file = argv[optind++]; 56 string outPrefix = argv[optind++]; 57 string out_file = outPrefix + ".list"; 58 barcode = new string[N]; 59 barcodeName = new string[N]; 60 61 for(int i=0;i<N;i++){ 62 barcodeName[i] = ""; 63 barcode[i] = ""; 64 } 65 66 ifstream reads; 67 reads.open(seq_file.c_str()); 68 if(!reads){ 69 cerr<<"fail to open input file"<<seq_file<<endl; 70 } 71 ifstream barcode_in; 72 barcode_in.open(barcode_file.c_str()); 73 if(!barcode_in){ 74 cerr<<"fail to open barcode file" <<barcode_file<<endl; 75 } 76 77 string text; 78 int k=0; 79 while( getline( barcode_in, text, ‘\n‘) ) 80 { 81 int startPos = 0, endPos; 82 endPos = text.find("\t"); 83 barcodeName[k] = text.substr(startPos, endPos-startPos+1); 84 startPos = endPos + 1; 85 endPos = text.find("\t",startPos); 86 barcode[k] = text.substr(startPos, endPos-startPos+1); 87 //cout << barcodeName[k] <<"\t"<< barcodeForward[k] <<"\t"<< barcodeReverse[k] << endl; 88 k++; 89 } 90 91 ofstream splitlist; 92 splitlist.open(out_file.c_str()); 93 if(!splitlist){ 94 cerr<<"fail to creat output file" << out_file <<endl; 95 } 96 int line_num = 1; 97 splitlist<<"barcodeName\tid\tid_line\n"; 98 while( getline( reads, text, ‘\n‘) ) 99 {100 string seq = "", id = "";101 id = text;102 getline( reads, text, ‘\n‘);103 seq = text;104 105 COMPARE p1 ,p2;106 p1.ifCompared = p2.ifCompared = 0;107 for(int k=0;k<N;k++){108 p1 = compare(seq,primer,max_mis,0,seq.length()-primer.length()+1);109 if( p1.ifCompared )110 {111 p2 = compare(seq,barcode[k],0,0,p1.posStart);112 if ( p2.ifCompared ){113 //cout<<seq1<<"\t"<<barcodeForward[k]<<"\t"<<seq2<<"\t"<<barcodeReverse[k]<<endl;114 splitlist<<barcodeName[k]<<"\t"<<id<<"\t"<<line_num<<endl;115 break;116 }117 }118 }119 line_num = line_num + 2;120 }121 delete[] barcode;122 delete[] barcodeName;123 splitlist.close();124 barcode_in.close();125 reads.close();126 }127 COMPARE compare(string reads, string barcode, int maxmis, int b, int e)128 {129 COMPARE p;130 //cout<<b<<"\t"<<e<<endl;131 // cout<<barcode<<"\t"<<barcode.length()<<endl;132 for(int i=b;i<e;i++)133 {134 int mismatch = 0;135 int pos = 0;136 while(mismatch <= maxmis ){137 if(reads[i+pos] != barcode[pos]) mismatch++;138 pos++;139 if(pos == barcode.length()){140 p.posStart = i;141 p.posEnd = i+pos;142 // cout<<p.posStart<<"\t"<<p.posEnd<<endl;143 p.ifCompared = 1;144 return p;145 }146 }147 }148 p.ifCompared = 0;149 return p;150 }
454ITS数据按barcode和primer分类程序v1.0
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。